From b6c9e49bb83666a70458ff6e6a98df730f13753e Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Mon, 23 Mar 2026 16:29:48 -0700
Subject: [PATCH 01/13] Improve samples with cache-awareness, add 4 new
 samples, fix SDK versions, and prepare repo for public sharing

---
 SUPPORT.md                                    |  25 +-
 .../src/AudioTranscriptionExample/Program.cs  |  29 +-
 .../src/FoundryLocalWebServer/Program.cs      |  30 +-
 .../src/HelloFoundryLocalSdk/Program.cs       |  29 +-
 .../src/ModelManagementExample/Program.cs     |  29 +-
 .../src/ToolCallingFoundryLocalSdk/Program.cs |  30 +-
 .../Program.cs                                |  30 +-
 .../Health/FoundryHealthCheck.cs              |  28 +
 .../Middleware/ErrorHandlingMiddleware.cs     |  32 +
 samples/cs/whisper-transcription/Program.cs   |  98 +++
 samples/cs/whisper-transcription/README.md    | 112 +++
 .../Services/FoundryModelService.cs           |  89 +++
 .../Services/FoundryOptions.cs                |   8 +
 .../Services/TranscriptionService.cs          |  54 ++
 .../WhisperTranscription.csproj               |  22 +
 .../cs/whisper-transcription/appsettings.json |  14 +
 samples/cs/whisper-transcription/nuget.config |  16 +
 .../cs/whisper-transcription/wwwroot/app.js   | 124 +++
 .../whisper-transcription/wwwroot/index.html  |  73 ++
 .../whisper-transcription/wwwroot/styles.css  | 126 +++
 samples/js/audio-transcription-example/app.js |  30 +-
 .../chat-and-audio-foundry-local/package.json |   2 +-
 .../js/copilot-sdk-foundry-local/package.json |   2 +-
 .../js/copilot-sdk-foundry-local/src/app.ts   |  13 +-
 .../src/tool-calling.ts                       |  13 +-
 .../js/electron-chat-application/package.json |   1 +
 .../js/langchain-integration-example/app.js   |  28 +-
 samples/js/local-cag/README.md                | 125 +++
 .../js/local-cag/docs/emergency-shutdown.md   |  40 +
 .../js/local-cag/docs/gas-leak-detection.md   |  44 ++
 samples/js/local-cag/docs/ppe-requirements.md |  54 ++
 samples/js/local-cag/docs/pressure-testing.md |  51 ++
 samples/js/local-cag/docs/valve-inspection.md |  44 ++
 samples/js/local-cag/package.json             |  27 +
 samples/js/local-cag/public/index.html        | 724 ++++++++++++++++++
 samples/js/local-cag/src/chatEngine.js        | 222 ++++++
 samples/js/local-cag/src/config.js            |  35 +
 samples/js/local-cag/src/context.js           | 301 ++++++++
 samples/js/local-cag/src/modelSelector.js     | 115 +++
 samples/js/local-cag/src/prompts.js           |  44 ++
 samples/js/local-cag/src/server.js            | 186 +++++
 samples/js/local-rag/README.md                | 143 ++++
 .../js/local-rag/docs/emergency-shutdown.md   |  53 ++
 .../js/local-rag/docs/gas-leak-detection.md   |  46 ++
 samples/js/local-rag/docs/ppe-requirements.md |  50 ++
 samples/js/local-rag/docs/pressure-testing.md |  52 ++
 samples/js/local-rag/docs/valve-inspection.md |  59 ++
 samples/js/local-rag/package.json             |  31 +
 samples/js/local-rag/public/index.html        | 698 +++++++++++++++++
 samples/js/local-rag/src/chatEngine.js        | 228 ++++++
 samples/js/local-rag/src/chunker.js           |  74 ++
 samples/js/local-rag/src/config.js            |  25 +
 samples/js/local-rag/src/ingest.js            |  65 ++
 samples/js/local-rag/src/prompts.js           |  44 ++
 samples/js/local-rag/src/server.js            | 230 ++++++
 samples/js/local-rag/src/vectorStore.js       | 145 ++++
 samples/js/native-chat-completions/app.js     |  34 +-
 .../js/tool-calling-foundry-local/src/app.js  |  13 +-
 samples/js/web-server-example/app.js          |  28 +-
 samples/python/agent-framework/.env.example   |   8 +
 samples/python/agent-framework/README.md      | 132 ++++
 .../data/agent_framework_guide.md             |  54 ++
 .../data/foundry_local_overview.md            |  42 +
 .../data/orchestration_patterns.md            |  67 ++
 samples/python/agent-framework/pyproject.toml |  24 +
 .../python/agent-framework/requirements.txt   |   6 +
 .../agent-framework/src/app/__init__.py       |   1 +
 .../agent-framework/src/app/__main__.py       |  85 ++
 .../python/agent-framework/src/app/agents.py  | 116 +++
 .../agent-framework/src/app/demos/__init__.py |  20 +
 .../src/app/demos/code_reviewer.py            | 228 ++++++
 .../src/app/demos/math_agent.py               | 201 +++++
 .../src/app/demos/multi_agent_debate.py       | 189 +++++
 .../agent-framework/src/app/demos/registry.py |  42 +
 .../src/app/demos/sentiment_analyzer.py       | 246 ++++++
 .../src/app/demos/weather_tools.py            | 177 +++++
 .../agent-framework/src/app/documents.py      |  89 +++
 .../agent-framework/src/app/foundry_boot.py   |  76 ++
 .../agent-framework/src/app/orchestrator.py   | 182 +++++
 .../src/app/templates/index.html              | 628 +++++++++++++++
 .../agent-framework/src/app/tool_demo.py      |  97 +++
 samples/python/agent-framework/src/app/web.py | 179 +++++
 .../agent-framework/tests/test_smoke.py       |  88 +++
 samples/python/functioncalling/README.md      |  16 +-
 samples/python/functioncalling/fl_tools.ipynb |  51 +-
 samples/python/hello-foundry-local/README.md  |   9 +-
 .../hello-foundry-local/requirements.txt      |   2 +
 samples/python/hello-foundry-local/src/app.py |  38 +-
 samples/python/summarize/README.md            |   6 +
 samples/python/summarize/requirements.txt     |   2 +-
 samples/python/summarize/summarize.py         |  48 +-
 samples/rag/README.md                         |  47 +-
 samples/rag/rag_foundrylocal_demo.ipynb       | 204 +++--
 93 files changed, 8290 insertions(+), 227 deletions(-)
 create mode 100644 samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs
 create mode 100644 samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
 create mode 100644 samples/cs/whisper-transcription/Program.cs
 create mode 100644 samples/cs/whisper-transcription/README.md
 create mode 100644 samples/cs/whisper-transcription/Services/FoundryModelService.cs
 create mode 100644 samples/cs/whisper-transcription/Services/FoundryOptions.cs
 create mode 100644 samples/cs/whisper-transcription/Services/TranscriptionService.cs
 create mode 100644 samples/cs/whisper-transcription/WhisperTranscription.csproj
 create mode 100644 samples/cs/whisper-transcription/appsettings.json
 create mode 100644 samples/cs/whisper-transcription/nuget.config
 create mode 100644 samples/cs/whisper-transcription/wwwroot/app.js
 create mode 100644 samples/cs/whisper-transcription/wwwroot/index.html
 create mode 100644 samples/cs/whisper-transcription/wwwroot/styles.css
 create mode 100644 samples/js/local-cag/README.md
 create mode 100644 samples/js/local-cag/docs/emergency-shutdown.md
 create mode 100644 samples/js/local-cag/docs/gas-leak-detection.md
 create mode 100644 samples/js/local-cag/docs/ppe-requirements.md
 create mode 100644 samples/js/local-cag/docs/pressure-testing.md
 create mode 100644 samples/js/local-cag/docs/valve-inspection.md
 create mode 100644 samples/js/local-cag/package.json
 create mode 100644 samples/js/local-cag/public/index.html
 create mode 100644 samples/js/local-cag/src/chatEngine.js
 create mode 100644 samples/js/local-cag/src/config.js
 create mode 100644 samples/js/local-cag/src/context.js
 create mode 100644 samples/js/local-cag/src/modelSelector.js
 create mode 100644 samples/js/local-cag/src/prompts.js
 create mode 100644 samples/js/local-cag/src/server.js
 create mode 100644 samples/js/local-rag/README.md
 create mode 100644 samples/js/local-rag/docs/emergency-shutdown.md
 create mode 100644 samples/js/local-rag/docs/gas-leak-detection.md
 create mode 100644 samples/js/local-rag/docs/ppe-requirements.md
 create mode 100644 samples/js/local-rag/docs/pressure-testing.md
 create mode 100644 samples/js/local-rag/docs/valve-inspection.md
 create mode 100644 samples/js/local-rag/package.json
 create mode 100644 samples/js/local-rag/public/index.html
 create mode 100644 samples/js/local-rag/src/chatEngine.js
 create mode 100644 samples/js/local-rag/src/chunker.js
 create mode 100644 samples/js/local-rag/src/config.js
 create mode 100644 samples/js/local-rag/src/ingest.js
 create mode 100644 samples/js/local-rag/src/prompts.js
 create mode 100644 samples/js/local-rag/src/server.js
 create mode 100644 samples/js/local-rag/src/vectorStore.js
 create mode 100644 samples/python/agent-framework/.env.example
 create mode 100644 samples/python/agent-framework/README.md
 create mode 100644 samples/python/agent-framework/data/agent_framework_guide.md
 create mode 100644 samples/python/agent-framework/data/foundry_local_overview.md
 create mode 100644 samples/python/agent-framework/data/orchestration_patterns.md
 create mode 100644 samples/python/agent-framework/pyproject.toml
 create mode 100644 samples/python/agent-framework/requirements.txt
 create mode 100644 samples/python/agent-framework/src/app/__init__.py
 create mode 100644 samples/python/agent-framework/src/app/__main__.py
 create mode 100644 samples/python/agent-framework/src/app/agents.py
 create mode 100644 samples/python/agent-framework/src/app/demos/__init__.py
 create mode 100644 samples/python/agent-framework/src/app/demos/code_reviewer.py
 create mode 100644 samples/python/agent-framework/src/app/demos/math_agent.py
 create mode 100644 samples/python/agent-framework/src/app/demos/multi_agent_debate.py
 create mode 100644 samples/python/agent-framework/src/app/demos/registry.py
 create mode 100644 samples/python/agent-framework/src/app/demos/sentiment_analyzer.py
 create mode 100644 samples/python/agent-framework/src/app/demos/weather_tools.py
 create mode 100644 samples/python/agent-framework/src/app/documents.py
 create mode 100644 samples/python/agent-framework/src/app/foundry_boot.py
 create mode 100644 samples/python/agent-framework/src/app/orchestrator.py
 create mode 100644 samples/python/agent-framework/src/app/templates/index.html
 create mode 100644 samples/python/agent-framework/src/app/tool_demo.py
 create mode 100644 samples/python/agent-framework/src/app/web.py
 create mode 100644 samples/python/agent-framework/tests/test_smoke.py
 create mode 100644 samples/python/hello-foundry-local/requirements.txt

diff --git a/SUPPORT.md b/SUPPORT.md
index eaf439ae..0b1323a9 100644
--- a/SUPPORT.md
+++ b/SUPPORT.md
@@ -1,25 +1,14 @@
-# TODO: The maintainer of this repo has not yet edited this file
-
-**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
-
-- **No CSS support:** Fill out this template with information about how to file issues and get help.
-- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
-- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
-
-*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
-
 # Support
 
-## How to file issues and get help  
+## How to file issues and get help
 
-This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
-issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
+This project uses GitHub Issues to track bugs and feature requests. Please search the existing
+issues before filing new issues to avoid duplicates. For new issues, file your bug or
 feature request as a new Issue.
 
-For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
-FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
-CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
+For help and questions about using Foundry Local, please refer to the [documentation](docs/README.md)
+and the [samples](samples/) in this repository.
 
-## Microsoft Support Policy  
+## Microsoft Support Policy
 
-Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
+Support for Foundry Local is limited to the resources listed above.
diff --git a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs b/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs
index be1db5db..0049f999 100644
--- a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs
+++ b/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs
@@ -29,21 +29,32 @@
 model.SelectVariant(modelVariant);
 
 
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
+// Download the model (check cache first)
+if (!await model.IsCachedAsync())
 {
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
+    Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading...");
+    await model.DownloadAsync(progress =>
     {
-        Console.WriteLine();
-    }
-});
+        var filled = (int)Math.Round(progress / 100.0 * 30);
+        var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled);
+        Console.Write($"\rDownloading: [{bar}] {progress:F1}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+    Console.WriteLine("\u2713 Model downloaded");
+}
+else
+{
+    Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download");
+}
 
 
-// Load the model
+// Load the model into memory
 Console.Write($"Loading model {model.Id}...");
 await model.LoadAsync();
-Console.WriteLine("done.");
+Console.WriteLine("done. \u2713 Model ready");
 
 
 // Get a chat client
diff --git a/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs b/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs
index f50ac1b0..187feaf3 100644
--- a/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs
+++ b/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs
@@ -31,21 +31,33 @@
 
 // Get a model using an alias
 var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
+
+// Check cache before downloading — skip download if model is already cached
+if (!await model.IsCachedAsync())
 {
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
+    Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading...");
+    await model.DownloadAsync(progress =>
     {
-        Console.WriteLine();
-    }
-});
+        var filled = (int)Math.Round(progress / 100.0 * 30);
+        var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled);
+        Console.Write($"\rDownloading: [{bar}] {progress:F1}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+    Console.WriteLine("\u2713 Model downloaded");
+}
+else
+{
+    Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download");
+}
 
 
-// Load the model
+// Load the model into memory
 Console.Write($"Loading model {model.Id}...");
 await model.LoadAsync();
-Console.WriteLine("done.");
+Console.WriteLine("done. \u2713 Model ready");
 
 
 // Start the web service
diff --git a/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs b/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs
index 52efe410..a94a8514 100644
--- a/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs
+++ b/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs
@@ -29,20 +29,31 @@
 // Get a model using an alias.
 var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
 
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
+// Check cache before downloading — skip download if model is already cached
+if (!await model.IsCachedAsync())
 {
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
+    Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading...");
+    await model.DownloadAsync(progress =>
     {
-        Console.WriteLine();
-    }
-});
+        var filled = (int)Math.Round(progress / 100.0 * 30);
+        var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled);
+        Console.Write($"\rDownloading: [{bar}] {progress:F1}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+    Console.WriteLine("\u2713 Model downloaded");
+}
+else
+{
+    Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download");
+}
 
-// Load the model
+// Load the model into memory
 Console.Write($"Loading model {model.Id}...");
 await model.LoadAsync();
-Console.WriteLine("done.");
+Console.WriteLine("done. \u2713 Model ready");
 
 // Get a chat client
 var chatClient = await model.GetChatClientAsync();
diff --git a/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs b/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs
index 2b6fe2e8..cca66c13 100644
--- a/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs
+++ b/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs
@@ -89,18 +89,31 @@
 model.SelectVariant(modelVariant);
 
 
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
+// Download the model (check cache first)
+if (!await model.IsCachedAsync())
 {
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
+    Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading...");
+    await model.DownloadAsync(progress =>
     {
-        Console.WriteLine();
-    }
-});
+        var filled = (int)Math.Round(progress / 100.0 * 30);
+        var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled);
+        Console.Write($"\rDownloading: [{bar}] {progress:F1}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+    Console.WriteLine("\u2713 Model downloaded");
+}
+else
+{
+    Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download");
+}
 
-// Load the model
+// Load the model into memory
+Console.Write($"Loading model {model.Id}...");
 await model.LoadAsync();
+Console.WriteLine("done. \u2713 Model ready");
 
 
 // List loaded models (i.e. in memory) from the catalog
diff --git a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs
index 3cdf3d38..eca12824 100644
--- a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs
+++ b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs
@@ -32,22 +32,32 @@
 // Get a model using an alias.
 var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
 
-
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
+// Check cache before downloading — skip download if model is already cached
+if (!await model.IsCachedAsync())
 {
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
+    Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading...");
+    await model.DownloadAsync(progress =>
     {
-        Console.WriteLine();
-    }
-});
+        var filled = (int)Math.Round(progress / 100.0 * 30);
+        var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled);
+        Console.Write($"\rDownloading: [{bar}] {progress:F1}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+    Console.WriteLine("\u2713 Model downloaded");
+}
+else
+{
+    Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download");
+}
 
 
-// Load the model
+// Load the model into memory
 Console.Write($"Loading model {model.Id}...");
 await model.LoadAsync();
-Console.WriteLine("done.");
+Console.WriteLine("done. \u2713 Model ready");
 
 
 // Get a chat client
diff --git a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs
index 6d6937fd..8882847a 100644
--- a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs
+++ b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs
@@ -33,21 +33,33 @@
 
 // Get a model using an alias
 var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found");
-// Download the model (the method skips download if already cached)
-await model.DownloadAsync(progress =>
+
+// Check cache before downloading — skip download if model is already cached
+if (!await model.IsCachedAsync())
 {
-    Console.Write($"\rDownloading model: {progress:F2}%");
-    if (progress >= 100f)
+    Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading...");
+    await model.DownloadAsync(progress =>
     {
-        Console.WriteLine();
-    }
-});
+        var filled = (int)Math.Round(progress / 100.0 * 30);
+        var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled);
+        Console.Write($"\rDownloading: [{bar}] {progress:F1}%");
+        if (progress >= 100f)
+        {
+            Console.WriteLine();
+        }
+    });
+    Console.WriteLine("\u2713 Model downloaded");
+}
+else
+{
+    Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download");
+}
 
 
-// Load the model
+// Load the model into memory
 Console.Write($"Loading model {model.Id}...");
 await model.LoadAsync();
-Console.WriteLine("done.");
+Console.WriteLine("done. \u2713 Model ready");
 
 
 // Start the web service
diff --git a/samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs b/samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs
new file mode 100644
index 00000000..dc53f5ec
--- /dev/null
+++ b/samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs
@@ -0,0 +1,28 @@
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+
+namespace WhisperTranscription;
+
+public class FoundryHealthCheck : IHealthCheck
+{
+    private readonly FoundryModelService _modelService;
+
+    public FoundryHealthCheck(FoundryModelService modelService)
+    {
+        _modelService = modelService;
+    }
+
+    public async Task<HealthCheckResult> CheckHealthAsync(
+        HealthCheckContext context,
+        CancellationToken cancellationToken = default)
+    {
+        try
+        {
+            var model = await _modelService.GetModelAsync();
+            return HealthCheckResult.Healthy($"Model available: {model.Id}");
+        }
+        catch (Exception ex)
+        {
+            return HealthCheckResult.Unhealthy("Foundry Local unavailable", ex);
+        }
+    }
+}
diff --git a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
new file mode 100644
index 00000000..e44fd2e9
--- /dev/null
+++ b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
@@ -0,0 +1,32 @@
+using System.Net;
+using System.Text.Json;
+
+namespace WhisperTranscription;
+
+public class ErrorHandlingMiddleware
+{
+    private readonly RequestDelegate _next;
+    private readonly ILogger<ErrorHandlingMiddleware> _logger;
+
+    public ErrorHandlingMiddleware(RequestDelegate next, ILogger<ErrorHandlingMiddleware> logger)
+    {
+        _next = next;
+        _logger = logger;
+    }
+
+    public async Task InvokeAsync(HttpContext context)
+    {
+        try
+        {
+            await _next(context);
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "Unhandled exception");
+            context.Response.StatusCode = (int)HttpStatusCode.InternalServerError;
+            context.Response.ContentType = "application/json";
+            var payload = JsonSerializer.Serialize(new { error = ex.Message });
+            await context.Response.WriteAsync(payload);
+        }
+    }
+}
diff --git a/samples/cs/whisper-transcription/Program.cs b/samples/cs/whisper-transcription/Program.cs
new file mode 100644
index 00000000..f985f26c
--- /dev/null
+++ b/samples/cs/whisper-transcription/Program.cs
@@ -0,0 +1,98 @@
+using Microsoft.AspNetCore.Mvc;
+using WhisperTranscription;
+
+var builder = WebApplication.CreateBuilder(args);
+
+builder.Services.Configure<FoundryOptions>(
+    builder.Configuration.GetSection(FoundryOptions.SectionName));
+builder.Services.AddSingleton<FoundryModelService>();
+builder.Services.AddSingleton<TranscriptionService>();
+builder.Services.AddHealthChecks()
+    .AddCheck<FoundryHealthCheck>("foundry");
+builder.Services.AddEndpointsApiExplorer();
+builder.Services.AddSwaggerGen();
+
+builder.Services.ConfigureHttpJsonOptions(options =>
+{
+    options.SerializerOptions.WriteIndented = true;
+    options.SerializerOptions.PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase;
+});
+
+var app = builder.Build();
+
+app.UseMiddleware<ErrorHandlingMiddleware>();
+app.UseDefaultFiles();
+app.UseStaticFiles();
+
+if (app.Environment.IsDevelopment())
+{
+    app.UseSwagger();
+    app.UseSwaggerUI();
+}
+
+app.MapHealthChecks("/health");
+
+app.MapGet("/api/health/status", async ([FromServices] FoundryModelService modelService) =>
+{
+    try
+    {
+        var model = await modelService.GetModelAsync();
+        var isCached = await model.IsCachedAsync();
+        return Results.Ok(new
+        {
+            status = "Healthy",
+            model = model.Id,
+            cached = isCached,
+        });
+    }
+    catch (Exception ex)
+    {
+        return Results.Ok(new
+        {
+            status = "Degraded",
+            error = ex.Message,
+        });
+    }
+}).WithName("GetHealthStatus");
+
+app.MapPost("/v1/audio/transcriptions", async (
+    [FromServices] TranscriptionService svc,
+    [FromForm] IFormFile file,
+    [FromForm] string? model,
+    [FromForm] string? format) =>
+{
+    if (file is null || file.Length == 0)
+    {
+        return Results.BadRequest(new { error = "No audio file provided" });
+    }
+
+    // Save upload to temp file
+    var tmp = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + Path.GetExtension(file.FileName));
+    await using (var fs = File.Create(tmp))
+    {
+        await file.CopyToAsync(fs);
+    }
+
+    try
+    {
+        var result = await svc.TranscribeAsync(tmp, model);
+        var outputFormat = format?.ToLowerInvariant() ?? "text";
+        return outputFormat switch
+        {
+            "json" => Results.Ok(new { text = result.Text, model = result.ModelId }),
+            _ => Results.Text(result.Text, "text/plain"),
+        };
+    }
+    finally
+    {
+        try { File.Delete(tmp); } catch { /* cleanup best-effort */ }
+    }
+}).WithName("TranscribeAudio")
+  .DisableAntiforgery()
+  .Produces(200)
+  .ProducesProblem(400)
+  .ProducesProblem(500);
+
+app.MapFallbackToFile("index.html");
+
+app.Run();
diff --git a/samples/cs/whisper-transcription/README.md b/samples/cs/whisper-transcription/README.md
new file mode 100644
index 00000000..3d13b434
--- /dev/null
+++ b/samples/cs/whisper-transcription/README.md
@@ -0,0 +1,112 @@
+# Whisper Transcription — Foundry Local
+
+An on-device audio transcription web application powered by [Foundry Local](https://foundrylocal.ai) and OpenAI Whisper models. All processing runs locally — no audio data leaves your machine.
+
+Based on the [FLWhisper](https://github.com/leestott/FLWhisper) project.
+
+## Features
+
+- **100% local processing** — audio never leaves your device
+- **Streaming transcription** — uses the Foundry Local SDK streaming API for real-time output
+- **Web UI** — drag-and-drop or file picker with audio preview
+- **REST API** — OpenAI-compatible `/v1/audio/transcriptions` endpoint
+- **Health checks** — built-in health endpoint for monitoring
+- **Cache-aware** — skips download when the model is already cached
+
+## Prerequisites
+
+- **Windows 10/11** (ARM64 or x64)
+- **.NET 9 SDK** — [Download here](https://dotnet.microsoft.com/download/dotnet/9.0)
+- **Foundry Local** — installed and on PATH
+
+## Quick Start
+
+```bash
+cd samples/cs/whisper-transcription
+
+# Restore and run
+dotnet restore
+dotnet run
+```
+
+Open **http://localhost:5000** (or the port shown in console output).
+
+On first launch, Foundry Local will download the Whisper model if it is not already cached. Subsequent launches will be near-instant.
+
+## Project Structure
+
+```
+whisper-transcription/
+├── Program.cs                      # ASP.NET Core Minimal API entry point
+├── Health/
+│   └── FoundryHealthCheck.cs       # Health check implementation
+├── Middleware/
+│   └── ErrorHandlingMiddleware.cs  # Global error handler
+├── Services/
+│   ├── FoundryOptions.cs           # Configuration options
+│   ├── FoundryModelService.cs      # Model management (cache check, download, load)
+│   └── TranscriptionService.cs     # Audio transcription via streaming API
+├── wwwroot/
+│   ├── index.html                  # Web UI
+│   ├── app.js                      # Client-side logic
+│   └── styles.css                  # Styling
+├── appsettings.json                # Configuration
+├── nuget.config                    # NuGet package sources
+├── WhisperTranscription.csproj     # Project file
+└── README.md
+```
+
+## API Endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/health` | ASP.NET Core health check |
+| GET | `/api/health/status` | Model status with cache info |
+| POST | `/v1/audio/transcriptions` | Transcribe audio (OpenAI compatible) |
+| GET | `/swagger` | Interactive API docs (dev mode) |
+
+### Transcription Request
+
+```
+POST /v1/audio/transcriptions
+Content-Type: multipart/form-data
+```
+
+Parameters:
+- `file` (required) — audio file (WAV, MP3, M4A, etc.)
+- `model` (optional) — model alias (default: from config)
+- `format` (optional) — `text` (default) or `json`
+
+## Configuration
+
+Edit `appsettings.json`:
+
+```json
+{
+  "Foundry": {
+    "ModelAlias": "whisper-tiny",
+    "LogLevel": "Information"
+  }
+}
+```
+
+Override via environment variable: `Foundry__ModelAlias=whisper-medium`
+
+## How It Works
+
+1. **Bootstrap** — `FoundryModelService` initializes the Foundry Local runtime and registers execution providers.
+2. **Model resolution** — the configured model alias is resolved from the catalog.
+3. **Cache check** — `IsCachedAsync()` skips download when the model is already on disk.
+4. **Download** — if not cached, the model is downloaded with progress logging.
+5. **Load** — the CPU variant is selected and loaded into the inference engine.
+6. **Transcription** — audio is transcribed using `TranscribeAudioStreamingAsync()` for streaming output.
+7. **Response** — the full transcription text is returned as plain text or JSON.
+
+## Related Samples
+
+- [AudioTranscriptionExample](../GettingStarted/src/AudioTranscriptionExample/) — console-based Whisper transcription
+- [FLWhisper](https://github.com/leestott/FLWhisper) — full-featured medical transcription app
+
+## License
+
+This sample is provided under the [MIT License](../../../LICENSE).
diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
new file mode 100644
index 00000000..97f34bbe
--- /dev/null
+++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
@@ -0,0 +1,89 @@
+using Microsoft.AI.Foundry.Local;
+using Microsoft.Extensions.Options;
+
+namespace WhisperTranscription;
+
+public class FoundryModelService
+{
+    private readonly ILogger<FoundryModelService> _logger;
+    private readonly ILoggerFactory _loggerFactory;
+    private readonly FoundryOptions _options;
+    private bool _initialized;
+
+    public FoundryModelService(
+        IOptions<FoundryOptions> options,
+        ILogger<FoundryModelService> logger,
+        ILoggerFactory loggerFactory)
+    {
+        _logger = logger;
+        _loggerFactory = loggerFactory;
+        _options = options.Value;
+    }
+
+    public async Task InitializeAsync()
+    {
+        if (_initialized) return;
+
+        _logger.LogInformation("Initializing Foundry Local Manager");
+        var config = new Configuration
+        {
+            AppName = "WhisperTranscription",
+            LogLevel = Enum.TryParse<Microsoft.AI.Foundry.Local.LogLevel>(
+                _options.LogLevel, true, out var lvl)
+                ? lvl
+                : Microsoft.AI.Foundry.Local.LogLevel.Information,
+        };
+
+        await FoundryLocalManager.CreateAsync(config, _loggerFactory.CreateLogger("FoundryLocal"));
+        var mgr = FoundryLocalManager.Instance;
+        await mgr.EnsureEpsDownloadedAsync();
+        _initialized = true;
+    }
+
+    public async Task<Model> GetModelAsync(string? aliasOrId = null)
+    {
+        await InitializeAsync();
+        var mgr = FoundryLocalManager.Instance;
+        var catalog = await mgr.GetCatalogAsync()
+            ?? throw new InvalidOperationException("Failed to get model catalog");
+
+        var alias = string.IsNullOrWhiteSpace(aliasOrId) ? _options.ModelAlias : aliasOrId;
+        var model = await catalog.GetModelAsync(alias)
+            ?? throw new InvalidOperationException($"Model '{alias}' not found in catalog");
+
+        return model;
+    }
+
+    public async Task EnsureModelReadyAsync(Model model)
+    {
+        // Prefer CPU variant
+        var cpuVariant = model.Variants.FirstOrDefault(
+            v => v.Info.Runtime?.DeviceType == DeviceType.CPU);
+        if (cpuVariant != null)
+        {
+            model.SelectVariant(cpuVariant);
+        }
+
+        // Check cache and download if needed
+        if (!await model.IsCachedAsync())
+        {
+            _logger.LogInformation("Model \"{ModelId}\" not cached — downloading...", model.Id);
+            await model.DownloadAsync(progress =>
+            {
+                if (progress % 10 == 0)
+                {
+                    _logger.LogInformation("Download progress: {Progress:F0}%", progress);
+                }
+            });
+            _logger.LogInformation("Model downloaded");
+        }
+        else
+        {
+            _logger.LogInformation("Model \"{ModelId}\" already cached", model.Id);
+        }
+
+        _logger.LogInformation("Loading model \"{ModelId}\"...", model.Id);
+        await model.LoadAsync();
+        _logger.LogInformation("Model loaded and ready");
+    }
+}
diff --git a/samples/cs/whisper-transcription/Services/FoundryOptions.cs b/samples/cs/whisper-transcription/Services/FoundryOptions.cs
new file mode 100644
index 00000000..9fb875fb
--- /dev/null
+++ b/samples/cs/whisper-transcription/Services/FoundryOptions.cs
@@ -0,0 +1,8 @@
+namespace WhisperTranscription;
+
+public class FoundryOptions
+{
+    public const string SectionName = "Foundry";
+    public string ModelAlias { get; set; } = "whisper-tiny";
+    public string LogLevel { get; set; } = "Information";
+}
diff --git a/samples/cs/whisper-transcription/Services/TranscriptionService.cs b/samples/cs/whisper-transcription/Services/TranscriptionService.cs
new file mode 100644
index 00000000..bef7001e
--- /dev/null
+++ b/samples/cs/whisper-transcription/Services/TranscriptionService.cs
@@ -0,0 +1,54 @@
+using Microsoft.AI.Foundry.Local;
+
+namespace WhisperTranscription;
+
+public class TranscriptionService
+{
+    private readonly FoundryModelService _modelService;
+    private readonly ILogger<TranscriptionService> _logger;
+
+    public TranscriptionService(
+        FoundryModelService modelService,
+        ILogger<TranscriptionService> logger)
+    {
+        _modelService = modelService;
+        _logger = logger;
+    }
+
+    public async Task<TranscriptionResult> TranscribeAsync(string filePath, string? modelAlias = null)
+    {
+        var model = await _modelService.GetModelAsync(modelAlias);
+        await _modelService.EnsureModelReadyAsync(model);
+
+        var audioClient = await model.GetAudioClientAsync()
+            ?? throw new InvalidOperationException("Failed to get audio client");
+
+        _logger.LogInformation("Transcribing \"{FilePath}\" with model {ModelId}", filePath, model.Id);
+
+        // Use streaming transcription for real-time output
+        var textParts = new List<string>();
+        var response = audioClient.TranscribeAudioStreamingAsync(filePath, CancellationToken.None);
+        await foreach (var chunk in response)
+        {
+            if (!string.IsNullOrEmpty(chunk.Text))
+            {
+                textParts.Add(chunk.Text);
+            }
+        }
+
+        var fullText = string.Join("", textParts);
+        _logger.LogInformation("Transcription complete: {Length} characters", fullText.Length);
+
+        return new TranscriptionResult
+        {
+            Text = fullText,
+            ModelId = model.Id,
+        };
+    }
+}
+
+public class TranscriptionResult
+{
+    public string Text { get; set; } = "";
+    public string ModelId { get; set; } = "";
+}
diff --git a/samples/cs/whisper-transcription/WhisperTranscription.csproj b/samples/cs/whisper-transcription/WhisperTranscription.csproj
new file mode 100644
index 00000000..f6d1d553
--- /dev/null
+++ b/samples/cs/whisper-transcription/WhisperTranscription.csproj
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk.Web">
+
+  <PropertyGroup>
+    <TargetFramework>net9.0-windows10.0.26100</TargetFramework>
+    <Nullable>enable</Nullable>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Platforms>ARM64;x64</Platforms>
+    <WindowsAppSDKSelfContained>false</WindowsAppSDKSelfContained>
+    <WindowsPackageType>None</WindowsPackageType>
+    <EnableCoreMrtTooling>false</EnableCoreMrtTooling>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
+    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" Version="0.9.0" />
+    <PackageReference Include="Swashbuckle.AspNetCore" Version="7.2.0" />
+  </ItemGroup>
+
+</Project>
diff --git a/samples/cs/whisper-transcription/appsettings.json b/samples/cs/whisper-transcription/appsettings.json
new file mode 100644
index 00000000..63b756a1
--- /dev/null
+++ b/samples/cs/whisper-transcription/appsettings.json
@@ -0,0 +1,14 @@
+{
+  "Foundry": {
+    "ModelAlias": "whisper-tiny",
+    "LogLevel": "Information"
+  },
+  "Logging": {
+    "LogLevel": {
+      "Default": "Information",
+      "Microsoft": "Warning",
+      "Microsoft.Hosting.Lifetime": "Information"
+    }
+  },
+  "AllowedHosts": "*"
+}
diff --git a/samples/cs/whisper-transcription/nuget.config b/samples/cs/whisper-transcription/nuget.config
new file mode 100644
index 00000000..5ae1c6b2
--- /dev/null
+++ b/samples/cs/whisper-transcription/nuget.config
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="utf-8"?>
+<configuration>
+  <packageSources>
+    <clear />
+    <add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
+    <add key="ORT" value="https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT/nuget/v3/index.json" />
+  </packageSources>
+  <packageSourceMapping>
+    <packageSource key="nuget.org">
+      <package pattern="*" />
+    </packageSource>
+    <packageSource key="ORT">
+      <package pattern="*Foundry*" />
+    </packageSource>
+  </packageSourceMapping>
+</configuration>
diff --git a/samples/cs/whisper-transcription/wwwroot/app.js b/samples/cs/whisper-transcription/wwwroot/app.js
new file mode 100644
index 00000000..7c2bbf81
--- /dev/null
+++ b/samples/cs/whisper-transcription/wwwroot/app.js
@@ -0,0 +1,124 @@
+const state = { file: null, fileName: null };
+
+function bindUpload() {
+  const fileInput = document.getElementById('fileInput');
+  const chooseBtn = document.getElementById('chooseFileBtn');
+  const dropzone = document.getElementById('dropzone');
+
+  chooseBtn.addEventListener('click', () => fileInput.click());
+
+  fileInput.addEventListener('change', (e) => {
+    const file = e.target.files[0];
+    if (file) loadFile(file);
+  });
+
+  dropzone.addEventListener('dragover', (e) => { e.preventDefault(); dropzone.classList.add('drag-over'); });
+  dropzone.addEventListener('dragleave', () => dropzone.classList.remove('drag-over'));
+  dropzone.addEventListener('drop', (e) => {
+    e.preventDefault();
+    dropzone.classList.remove('drag-over');
+    const file = e.dataTransfer.files[0];
+    if (file) loadFile(file);
+  });
+}
+
+function loadFile(file) {
+  state.file = file;
+  state.fileName = file.name;
+
+  const preview = document.getElementById('previewSection');
+  const nameEl = document.getElementById('fileName');
+  const player = document.getElementById('audioPlayer');
+
+  nameEl.textContent = `${file.name} (${(file.size / 1024).toFixed(1)} KB)`;
+  player.src = URL.createObjectURL(file);
+  player.load();
+  preview.style.display = 'block';
+  document.getElementById('transcribeBtn').disabled = false;
+}
+
+async function transcribe() {
+  const statusEl = document.getElementById('transcribeStatus');
+  const btn = document.getElementById('transcribeBtn');
+
+  if (!state.file) {
+    statusEl.textContent = 'Please select an audio file first.';
+    statusEl.classList.add('error');
+    return;
+  }
+
+  statusEl.textContent = 'Transcribing\u2026';
+  statusEl.classList.remove('error');
+  btn.disabled = true;
+
+  const format = document.getElementById('formatSelect').value;
+  const form = new FormData();
+  form.append('file', state.file, state.fileName);
+  form.append('format', format);
+
+  try {
+    const res = await fetch('/v1/audio/transcriptions', { method: 'POST', body: form });
+    if (!res.ok) {
+      const txt = await res.text();
+      throw new Error(txt || `HTTP ${res.status}`);
+    }
+
+    if (format === 'json') {
+      const data = await res.json();
+      renderResult(JSON.stringify(data, null, 2));
+    } else {
+      const text = await res.text();
+      renderResult(text);
+    }
+    statusEl.textContent = 'Done — transcription complete.';
+  } catch (err) {
+    statusEl.textContent = `Error: ${err.message}`;
+    statusEl.classList.add('error');
+  } finally {
+    btn.disabled = false;
+  }
+}
+
+function renderResult(text) {
+  const resultEl = document.getElementById('resultText');
+  const copyBtn = document.getElementById('copyBtn');
+  resultEl.textContent = text;
+  copyBtn.style.display = 'inline-block';
+}
+
+function setupCopyButton() {
+  const copyBtn = document.getElementById('copyBtn');
+  const resultEl = document.getElementById('resultText');
+  copyBtn.addEventListener('click', async () => {
+    try {
+      await navigator.clipboard.writeText(resultEl.textContent);
+      const orig = copyBtn.textContent;
+      copyBtn.textContent = 'Copied!';
+      copyBtn.classList.add('success');
+      setTimeout(() => { copyBtn.textContent = orig; copyBtn.classList.remove('success'); }, 2000);
+    } catch { alert('Failed to copy'); }
+  });
+}
+
+async function checkHealth() {
+  try {
+    const res = await fetch('/api/health/status');
+    if (res.ok) {
+      const data = await res.json();
+      document.getElementById('stat-status').textContent = data.status || 'Unknown';
+      document.getElementById('stat-model').textContent = data.model || '—';
+      document.getElementById('stat-cached').textContent = data.cached ? 'Yes' : 'No';
+    } else {
+      document.getElementById('stat-status').textContent = 'Degraded';
+    }
+  } catch {
+    document.getElementById('stat-status').textContent = 'Offline';
+  }
+}
+
+document.addEventListener('DOMContentLoaded', () => {
+  bindUpload();
+  setupCopyButton();
+  document.getElementById('transcribeBtn').addEventListener('click', transcribe);
+  checkHealth();
+});
diff --git a/samples/cs/whisper-transcription/wwwroot/index.html b/samples/cs/whisper-transcription/wwwroot/index.html
new file mode 100644
index 00000000..df66b2e2
--- /dev/null
+++ b/samples/cs/whisper-transcription/wwwroot/index.html
@@ -0,0 +1,73 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Whisper Transcription — Foundry Local</title>
+  <link rel="stylesheet" href="/styles.css" />
+</head>
+<body>
+  <header class="hero">
+    <div class="hero-content">
+      <h1>Whisper Transcription</h1>
+      <p>On-device audio transcription powered by Foundry Local + OpenAI Whisper.</p>
+      <div class="hero-stats">
+        <div class="stat">
+          <span class="stat-number" id="stat-model">—</span>
+          <span class="stat-label">Model</span>
+        </div>
+        <div class="stat">
+          <span class="stat-number" id="stat-status">—</span>
+          <span class="stat-label">Service</span>
+        </div>
+        <div class="stat">
+          <span class="stat-number" id="stat-cached">—</span>
+          <span class="stat-label">Cached</span>
+        </div>
+      </div>
+    </div>
+    <div class="hero-illustration">
+      <div class="waveform"></div>
+    </div>
+  </header>
+
+  <main class="main">
+    <section class="panel upload-panel">
+      <h2>Upload Audio</h2>
+      <div class="upload-area" id="dropzone">
+        <input type="file" id="fileInput" accept="audio/*" hidden />
+        <button id="chooseFileBtn" class="btn primary">Choose Audio File</button>
+        <p class="hint">Drag &amp; drop or pick a WAV, MP3, or M4A file.</p>
+      </div>
+      <div class="preview" id="previewSection" style="display: none;">
+        <h3>Preview</h3>
+        <p class="file-name" id="fileName"></p>
+        <audio id="audioPlayer" controls preload="none"></audio>
+      </div>
+      <div class="transcription-options">
+        <h3>Options</h3>
+        <div class="option-group">
+          <label for="formatSelect">Output Format:</label>
+          <select id="formatSelect" class="format-select">
+            <option value="text" selected>Plain Text</option>
+            <option value="json">JSON</option>
+          </select>
+        </div>
+      </div>
+      <button id="transcribeBtn" class="btn accent" disabled>Transcribe</button>
+      <div id="transcribeStatus" class="status"></div>
+    </section>
+
+    <section class="panel result-panel">
+      <div class="result-header">
+        <h2>Transcription Result</h2>
+        <button id="copyBtn" class="btn secondary" style="display: none;">Copy</button>
+      </div>
+      <pre id="resultText" class="result-text"></pre>
+    </section>
+  </main>
+
+  <footer class="footer">Whisper Transcription &middot; Foundry Local — all processing on device.</footer>
+  <script src="/app.js"></script>
+</body>
+</html>
diff --git a/samples/cs/whisper-transcription/wwwroot/styles.css b/samples/cs/whisper-transcription/wwwroot/styles.css
new file mode 100644
index 00000000..ab6224df
--- /dev/null
+++ b/samples/cs/whisper-transcription/wwwroot/styles.css
@@ -0,0 +1,126 @@
+:root {
+  --bg: #f5fbff;
+  --card: #ffffff;
+  --border: #d6e4f0;
+  --accent: #2d8cff;
+  --accent-dark: #1b6dd8;
+  --teal: #16a6b6;
+  --text: #1d2a35;
+  --muted: #6c7a89;
+  --shadow: 0 20px 45px rgba(37, 99, 235, 0.12);
+}
+
+body {
+  margin: 0;
+  font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
+  background: var(--bg);
+  color: var(--text);
+}
+
+.hero {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  padding: 40px 60px;
+  background: linear-gradient(135deg, #2193b0, #6dd5ed);
+  color: #fff;
+  box-shadow: var(--shadow);
+}
+
+.hero-content h1 { margin: 0 0 8px; font-size: 2.5rem; }
+.hero-content p { margin: 0; opacity: 0.9; }
+
+.hero-stats { display: flex; gap: 24px; margin-top: 24px; }
+.stat { display: flex; flex-direction: column; }
+.stat-number { font-size: 1.4rem; font-weight: 600; }
+.stat-label { font-size: 0.8rem; color: rgba(255,255,255,0.7); }
+
+.hero-illustration { position: relative; width: 120px; height: 60px; }
+.waveform {
+  width: 100%; height: 4px; background: #fff; border-radius: 2px;
+  position: absolute; top: 50%;
+  animation: pulse 2s infinite;
+}
+@keyframes pulse {
+  0%   { box-shadow: 0 0 0 0 rgba(255,255,255,0.6); }
+  70%  { box-shadow: 0 0 0 10px rgba(255,255,255,0); }
+  100% { box-shadow: 0 0 0 0 rgba(255,255,255,0); }
+}
+
+.main {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: 24px;
+  padding: 32px 48px 60px;
+}
+
+.panel {
+  background: var(--card);
+  border-radius: 16px;
+  padding: 24px;
+  box-shadow: var(--shadow);
+  border: 1px solid var(--border);
+  min-height: 360px;
+}
+
+.upload-area {
+  border: 2px dashed var(--accent);
+  border-radius: 12px;
+  padding: 24px;
+  text-align: center;
+  margin-bottom: 20px;
+  background: rgba(45,140,255,0.05);
+  transition: background 0.2s;
+}
+.upload-area.drag-over { background: rgba(45,140,255,0.15); }
+
+.btn {
+  padding: 10px 18px; border: none; border-radius: 8px;
+  cursor: pointer; font-weight: 600;
+}
+.btn.primary { background: var(--accent); color: #fff; }
+.btn.accent  { background: var(--teal); color: #fff; }
+.btn.secondary { background: var(--border); color: var(--text); transition: all 0.2s; }
+.btn.secondary:hover { background: var(--accent); color: #fff; }
+.btn.secondary.success { background: #10b981; color: #fff; }
+.btn:disabled { opacity: 0.5; cursor: not-allowed; }
+
+.hint { color: var(--muted); font-size: 0.9rem; }
+.file-name { font-weight: 600; margin: 0 0 8px; }
+
+.preview { margin-top: 16px; }
+.preview audio { width: 100%; }
+
+.transcription-options {
+  margin-top: 16px; padding: 16px; background: #f9fbff;
+  border-radius: 8px; border: 1px solid var(--border);
+}
+.transcription-options h3 { margin: 0 0 12px; font-size: 1rem; }
+.option-group { margin-bottom: 12px; }
+.format-select {
+  width: 100%; padding: 10px 12px; margin-top: 6px;
+  border: 2px solid var(--border); border-radius: 8px;
+  background: var(--card); color: var(--text); font-size: 0.95rem;
+  cursor: pointer;
+}
+.format-select:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 3px rgba(45,140,255,0.1); }
+
+.status { margin-top: 12px; font-size: 0.9rem; color: var(--muted); }
+.status.error { color: #ef4444; }
+
+.result-panel { display: flex; flex-direction: column; }
+.result-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; }
+.result-header h2 { margin: 0; }
+.result-text {
+  flex: 1; background: #f9fbff; border-radius: 12px; padding: 16px;
+  overflow: auto; max-height: 400px; border: 1px solid var(--border);
+  white-space: pre-wrap; word-break: break-word;
+}
+
+.footer { text-align: center; padding: 16px 0 24px; color: var(--muted); font-size: 0.9rem; }
+
+@media (max-width: 960px) {
+  .main { grid-template-columns: 1fr; }
+  .hero { flex-direction: column; text-align: center; padding: 24px; }
+  .hero-stats { justify-content: center; }
+}
diff --git a/samples/js/audio-transcription-example/app.js b/samples/js/audio-transcription-example/app.js
index fe441d1b..26395dbf 100644
--- a/samples/js/audio-transcription-example/app.js
+++ b/samples/js/audio-transcription-example/app.js
@@ -10,21 +10,29 @@ const manager = FoundryLocalManager.create({
 console.log('✓ SDK initialized successfully');
 
 // Get the model object
-const modelAlias = 'whisper-tiny'; // Using an available model from the list above
+const modelAlias = 'whisper-tiny';
 let model = await manager.catalog.getModel(modelAlias);
 console.log(`Using model: ${model.id}`);
 
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
-
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
+// Check cache before downloading — skip download if model is already cached
+if (!model.isCached) {
+    console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`);
+    await model.download((progress) => {
+        const barWidth = 30;
+        const filled = Math.round((progress / 100) * barWidth);
+        const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
+        process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`);
+        if (progress >= 100) process.stdout.write('\n');
+    });
+    console.log('✓ Model downloaded');
+} else {
+    console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`);
+}
+
+// Load the model into memory
+console.log(`Loading model ${modelAlias}...`);
 await model.load();
-console.log('✓ Model loaded');
+console.log('✓ Model loaded and ready');
 
 // Create audio client
 console.log('\nCreating audio client...');
diff --git a/samples/js/chat-and-audio-foundry-local/package.json b/samples/js/chat-and-audio-foundry-local/package.json
index a91ecda3..339d2956 100644
--- a/samples/js/chat-and-audio-foundry-local/package.json
+++ b/samples/js/chat-and-audio-foundry-local/package.json
@@ -6,6 +6,6 @@
     "start": "node src/app.js"
   },
   "dependencies": {
-    "foundry-local-sdk": "latest"
+    "foundry-local-sdk": "^0.5.1"
   }
 }
diff --git a/samples/js/copilot-sdk-foundry-local/package.json b/samples/js/copilot-sdk-foundry-local/package.json
index d01a25a9..f427f992 100644
--- a/samples/js/copilot-sdk-foundry-local/package.json
+++ b/samples/js/copilot-sdk-foundry-local/package.json
@@ -9,7 +9,7 @@
   },
   "dependencies": {
     "@github/copilot-sdk": "latest",
-    "foundry-local-sdk": "latest",
+    "foundry-local-sdk": "^0.5.1",
     "zod": "^3.0.0"
   },
   "devDependencies": {
diff --git a/samples/js/copilot-sdk-foundry-local/src/app.ts b/samples/js/copilot-sdk-foundry-local/src/app.ts
index c7c7966a..459f0096 100644
--- a/samples/js/copilot-sdk-foundry-local/src/app.ts
+++ b/samples/js/copilot-sdk-foundry-local/src/app.ts
@@ -61,7 +61,18 @@ async function main() {
         });
 
         model = await manager.catalog.getModel(alias);
-        await model.download();
+        if (!model.isCached) {
+            console.log(`Model "${alias}" not in cache. Downloading...`);
+            await model.download((progress: number) => {
+                const barWidth = 30;
+                const filled = Math.round((progress / 100) * barWidth);
+                const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled);
+                process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`);
+                if (progress >= 100) process.stdout.write('\n');
+            });
+        } else {
+            console.log(`\u2713 Model "${alias}" already cached \u2014 skipping download`);
+        }
         await model.load();
         console.log(`Model: ${model.id}`);
 
diff --git a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
index 3e41748c..039849cf 100644
--- a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
+++ b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
@@ -140,7 +140,18 @@ async function main() {
         });
 
         model = await manager.catalog.getModel(alias);
-        await model.download();
+        if (!model.isCached) {
+            console.log(`Model "${alias}" not in cache. Downloading...`);
+            await model.download((progress: number) => {
+                const barWidth = 30;
+                const filled = Math.round((progress / 100) * barWidth);
+                const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled);
+                process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`);
+                if (progress >= 100) process.stdout.write('\n');
+            });
+        } else {
+            console.log(`\u2713 Model "${alias}" already cached \u2014 skipping download`);
+        }
         await model.load();
         console.log(`Model: ${model.id}`);
 
diff --git a/samples/js/electron-chat-application/package.json b/samples/js/electron-chat-application/package.json
index 29ccd2b7..07dc5282 100644
--- a/samples/js/electron-chat-application/package.json
+++ b/samples/js/electron-chat-application/package.json
@@ -19,6 +19,7 @@
     "electron": "^34.5.8"
   },
   "dependencies": {
+    "foundry-local-sdk": "^0.5.1",
     "highlight.js": "^11.11.1",
     "marked": "^15.0.6"
   }
diff --git a/samples/js/langchain-integration-example/app.js b/samples/js/langchain-integration-example/app.js
index 94e0afdc..a6c7d980 100644
--- a/samples/js/langchain-integration-example/app.js
+++ b/samples/js/langchain-integration-example/app.js
@@ -15,20 +15,28 @@ const manager = FoundryLocalManager.create({
 console.log('✓ SDK initialized successfully');
 
 // Get the model object
-const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above
+const modelAlias = 'qwen2.5-0.5b';
 const model = await manager.catalog.getModel(modelAlias);
 
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
+// Check cache before downloading — skip download if model is already cached
+if (!model.isCached) {
+    console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`);
+    await model.download((progress) => {
+        const barWidth = 30;
+        const filled = Math.round((progress / 100) * barWidth);
+        const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
+        process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`);
+        if (progress >= 100) process.stdout.write('\n');
+    });
+    console.log('✓ Model downloaded');
+} else {
+    console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`);
+}
 
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
+// Load the model into memory
+console.log(`Loading model ${modelAlias}...`);
 await model.load();
-console.log('✓ Model loaded');
+console.log('✓ Model loaded and ready');
 
 // Start the web service
 console.log('\nStarting web service...');
diff --git a/samples/js/local-cag/README.md b/samples/js/local-cag/README.md
new file mode 100644
index 00000000..ece03869
--- /dev/null
+++ b/samples/js/local-cag/README.md
@@ -0,0 +1,125 @@
+# Local CAG – Context-Augmented Generation with Foundry Local
+
+A fully offline **Context-Augmented Generation (CAG)** sample application that runs an AI support agent entirely on-device using [Foundry Local](https://foundrylocal.ai).
+
+## What is CAG?
+
+CAG (Context-Augmented Generation) pre-loads **all** domain documents at startup and injects them into the AI prompt — no vector database, no embeddings, no retrieval step. This makes it ideal for:
+
+- **Offline / air-gapped** environments (e.g., field operations)
+- **Small-to-medium knowledge bases** (dozens of documents)
+- **Low-latency responses** — no retrieval round-trip
+- **Simple deployment** — no external dependencies beyond Foundry Local
+
+## Architecture
+
+```
+┌─────────────┐    ┌──────────────────┐    ┌────────────────────┐
+│  Browser UI  │───▶│  Express Server   │───▶│  Foundry Local SDK │
+│  (index.html)│◀───│  (server.js)      │◀───│  (in-process)      │
+└─────────────┘    └──────────────────┘    └────────────────────┘
+                          │                          │
+                   ┌──────┴───────┐           ┌──────┴───────┐
+                   │  ChatEngine   │           │  Model (SLM)  │
+                   │  + Context    │           │  qwen / phi   │
+                   └──────────────┘           └──────────────┘
+```
+
+1. **Startup**: All markdown documents in `docs/` are loaded into memory.
+2. **Model selection**: The SDK auto-selects the best model for the device's RAM.
+3. **Query**: Each user question is matched to the most relevant docs via keyword scoring, then injected into the prompt alongside the question.
+4. **Inference**: Foundry Local runs the model in-process — no HTTP server needed.
+
+## Prerequisites
+
+- **Node.js 20+**
+- **Foundry Local** installed — see [foundrylocal.ai](https://foundrylocal.ai)
+
+## Quick Start
+
+```bash
+# Install dependencies
+npm install
+
+# Start the server
+npm start
+```
+
+Open [http://localhost:3000](http://localhost:3000) in your browser. The UI shows real-time progress as the model loads.
+
+## Configuration
+
+Set these environment variables (all optional):
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `FOUNDRY_MODEL` | *(auto-select)* | Force a specific model alias (e.g., `phi-3.5-mini`) |
+| `PORT` | `3000` | Server port |
+| `HOST` | `localhost` | Server bind address |
+
+## Adding Domain Documents
+
+Place markdown files in the `docs/` folder with YAML front-matter:
+
+```markdown
+---
+title: Your Document Title
+category: Safety
+id: unique-doc-id
+---
+
+# Your Document Title
+
+Content goes here...
+```
+
+The engine loads all `.md` files at startup and makes them available to the AI.
+
+## Project Structure
+
+```
+local-cag/
+├── package.json
+├── README.md
+├── docs/                    # Domain knowledge (markdown with front-matter)
+│   ├── gas-leak-detection.md
+│   ├── emergency-shutdown.md
+│   ├── pressure-testing.md
+│   ├── ppe-requirements.md
+│   └── valve-inspection.md
+├── public/
+│   └── index.html           # Web UI with loading overlay + chat
+└── src/
+    ├── server.js             # Express server with SSE status + chat endpoints
+    ├── chatEngine.js         # CAG engine: SDK init, model selection, inference
+    ├── config.js             # Configuration (env vars + defaults)
+    ├── context.js            # Document loading, parsing, keyword-based selection
+    ├── modelSelector.js      # Dynamic model selection based on device RAM
+    └── prompts.js            # System prompts (full + compact/edge mode)
+```
+
+## Key Features
+
+- **Dynamic model selection** — automatically picks the best model for the device's available RAM
+- **Cache-aware** — skips download if the model is already in the Foundry cache
+- **Edge mode** — toggle compact prompts for smaller models or constrained devices
+- **SSE progress** — real-time loading status streamed to the browser
+- **Keyword-based doc selection** — only the most relevant documents are injected per query
+- **No internet required** — fully offline after initial model download
+
+## How It Differs From RAG
+
+| Feature | CAG (this sample) | RAG |
+|---------|-------------------|-----|
+| Document loading | All at startup | On-demand retrieval |
+| Vector database | Not needed | Required |
+| Embeddings | Not needed | Required |
+| Latency | Lower (no retrieval) | Higher (search + retrieve) |
+| Knowledge base size | Small–medium | Any size |
+| Complexity | Simpler | More complex |
+
+## Learn More
+
+- [Foundry Local Documentation](https://foundrylocal.ai)
+- [Foundry Local SDK (npm)](https://www.npmjs.com/package/foundry-local-sdk)
+- [RAG sample](../local-rag/) — for larger knowledge bases with vector retrieval
diff --git a/samples/js/local-cag/docs/emergency-shutdown.md b/samples/js/local-cag/docs/emergency-shutdown.md
new file mode 100644
index 00000000..71200687
--- /dev/null
+++ b/samples/js/local-cag/docs/emergency-shutdown.md
@@ -0,0 +1,40 @@
+---
+title: Emergency Shutdown Procedures
+category: Safety
+id: emergency-shutdown
+---
+
+# Emergency Shutdown Procedures
+
+## Safety Warning
+**An emergency shutdown (ESD) is a last-resort action.** Only initiate an ESD when there is an immediate threat to life, equipment, or the environment. Follow your site-specific ESD plan.
+
+## When to Initiate ESD
+- Uncontrolled gas release exceeding 50% LEL
+- Fire or explosion on site
+- Equipment failure causing uncontrolled flow
+- Structural collapse near pressurised systems
+- Toxic gas detection above STEL (Short-Term Exposure Limit)
+
+## Procedure
+1. **Sound the alarm** — activate the nearest emergency alarm point.
+2. **Initiate ESD** — press the Emergency Shutdown button at the control panel.
+3. **Isolate** — close the master isolation valve(s) upstream.
+4. **Evacuate** — direct all personnel to the designated muster point.
+5. **Account** — perform a headcount at the muster point.
+6. **Notify** — call emergency services and the site supervisor.
+7. **Do NOT re-enter** until the all-clear is given by the Incident Commander.
+
+## Post-ESD Actions
+- Complete the ESD incident report within 4 hours.
+- Inspect all equipment before restarting.
+- Conduct a root-cause analysis within 48 hours.
+- Brief all affected personnel before resuming operations.
+
+## ESD System Components
+| Component | Location | Function |
+|-----------|----------|----------|
+| ESD Push Button | Control room, wellheads | Initiates shutdown sequence |
+| Master Isolation Valve | Pipeline inlet | Stops gas flow to facility |
+| Blowdown Valve | Process vessels | Depressurises equipment safely |
+| Fire & Gas Panel | Control room | Monitors detectors, triggers alarms |
diff --git a/samples/js/local-cag/docs/gas-leak-detection.md b/samples/js/local-cag/docs/gas-leak-detection.md
new file mode 100644
index 00000000..59f15acd
--- /dev/null
+++ b/samples/js/local-cag/docs/gas-leak-detection.md
@@ -0,0 +1,44 @@
+---
+title: Gas Leak Detection Procedures
+category: Safety
+id: gas-leak-detection
+---
+
+# Gas Leak Detection Procedures
+
+## Safety Warning
+**Always assume a gas leak is dangerous until proven otherwise.** Evacuate the area if concentration exceeds 20% LEL (Lower Explosive Limit). Do NOT operate electrical equipment in suspected leak zones.
+
+## Detection Methods
+
+### Portable Gas Detector
+1. Calibrate detector before each shift using known calibration gas.
+2. Turn on and allow 60-second warm-up period.
+3. Hold sensor 2–5 cm from suspected leak point.
+4. Move slowly along pipe runs, joints, valves, and fittings.
+5. Record readings at each test point on the inspection form.
+
+### Soap Bubble Test
+1. Apply leak-detection fluid to joints and connections.
+2. Observe for 30 seconds — bubbles indicate a leak.
+3. Mark leak location with approved marker tape.
+4. Do NOT use soap solution near high-temperature surfaces.
+
+### Ultrasonic Leak Detection
+- Suitable for pressurised systems above 50 psi.
+- Point the sensor at suspected areas and listen for high-frequency noise.
+- Effective range: up to 15 metres in quiet environments.
+
+## Response Procedure
+1. **Isolate** the gas supply upstream of the detected leak.
+2. **Ventilate** the area — open doors, windows, or activate forced ventilation.
+3. **Evacuate** if concentration exceeds 20% LEL.
+4. **Report** the leak to the site supervisor and log in the incident system.
+5. **Repair** only after the area is confirmed safe by a gas-free certificate.
+
+## PPE Requirements
+- Flame-resistant clothing (FRC)
+- Gas detector (personal monitor)
+- Safety glasses with side shields
+- Steel-toe boots
+- Hard hat
diff --git a/samples/js/local-cag/docs/ppe-requirements.md b/samples/js/local-cag/docs/ppe-requirements.md
new file mode 100644
index 00000000..95a9b68d
--- /dev/null
+++ b/samples/js/local-cag/docs/ppe-requirements.md
@@ -0,0 +1,54 @@
+---
+title: PPE Requirements for Gas Field Operations
+category: Safety
+id: ppe-requirements
+---
+
+# PPE Requirements for Gas Field Operations
+
+## Mandatory PPE (All Areas)
+All personnel entering the operational area must wear:
+
+- **Hard hat** — ANSI Z89.1 Type I or II
+- **Safety glasses** — with side shields, ANSI Z87.1 rated
+- **Steel-toe boots** — ASTM F2413 rated, minimum 6-inch height
+- **Flame-resistant clothing (FRC)** — minimum ATPV 8 cal/cm²
+- **High-visibility vest** — Class 2 or higher
+- **Hearing protection** — when noise exceeds 85 dB TWA
+
+## Additional PPE by Task
+
+### Gas Leak Investigation
+- Personal gas monitor (4-gas: LEL, O₂, CO, H₂S)
+- Chemical-resistant gloves
+- Respiratory protection (if H₂S or low O₂ risk)
+
+### Welding Operations
+- Welding helmet with auto-darkening lens
+- Welding gloves (leather gauntlet)
+- Leather apron or welding jacket
+- Fire watch equipment
+
+### Confined Space Entry
+- Full-body harness with retrieval line
+- Supplied-air breathing apparatus (SABA) or SCBA
+- Communication device (intrinsically safe)
+- Atmospheric monitoring (continuous)
+
+### Pressure Testing
+- Face shield (in addition to safety glasses)
+- Impact-resistant gloves
+- Blast shield at test points (pneumatic tests)
+
+## Inspection and Maintenance
+- Inspect all PPE before each use.
+- Replace damaged PPE immediately — do not field-repair.
+- FRC: launder per manufacturer instructions; do not use bleach.
+- Gas monitors: bump-test daily; full calibrate per manufacturer schedule.
+- Hard hats: replace after any significant impact or per manufacturer expiry.
+
+## Regulatory References
+- OSHA 29 CFR 1910.132 (General PPE requirements)
+- OSHA 29 CFR 1910.134 (Respiratory protection)
+- NFPA 2112 (Flame-resistant garments)
+- API RP 74 (Recommended practice for occupational safety)
diff --git a/samples/js/local-cag/docs/pressure-testing.md b/samples/js/local-cag/docs/pressure-testing.md
new file mode 100644
index 00000000..2027c67c
--- /dev/null
+++ b/samples/js/local-cag/docs/pressure-testing.md
@@ -0,0 +1,51 @@
+---
+title: Pressure Testing Procedures
+category: Maintenance
+id: pressure-testing
+---
+
+# Pressure Testing Procedures
+
+## Overview
+Pressure testing verifies the integrity of pipelines, vessels, and fittings after installation, repair, or modification. Tests must be performed before commissioning or returning equipment to service.
+
+## Safety Warning
+**Never exceed the maximum allowable working pressure (MAWP) of any component in the test section.** Ensure all personnel are at a safe distance during pressurisation.
+
+## Types of Pressure Tests
+
+### Hydrostatic Test (Water)
+- **Preferred method** for most applications.
+- Test medium: clean water (potable or treated).
+- Test pressure: 1.5 × MAWP (held for minimum 30 minutes).
+- Advantages: incompressible medium, lower stored energy.
+
+### Pneumatic Test (Air/Nitrogen)
+- Used when water is impractical (e.g., sub-zero temperatures).
+- Test pressure: 1.1 × MAWP (held for minimum 10 minutes).
+- **Higher risk** due to stored energy — requires additional safety precautions.
+- Mandatory exclusion zone: minimum 15 metres.
+
+## Procedure
+1. **Isolate** the test section with rated blinds or isolation valves.
+2. **Inspect** all connections, flanges, and fittings visually.
+3. **Install** calibrated pressure gauges at both ends of the test section.
+4. **Fill** the section with test medium, venting air from high points.
+5. **Pressurise** gradually in 25% increments to test pressure.
+6. **Hold** at test pressure for the required duration.
+7. **Inspect** for leaks at all joints, welds, and connections.
+8. **Record** start pressure, end pressure, temperature, and duration.
+9. **Depressurise** gradually and remove test equipment.
+10. **Document** results on the pressure test certificate.
+
+## Acceptance Criteria
+- No visible leaks at any point.
+- Pressure drop ≤ 1% over the hold period (accounting for temperature).
+- All gauges agree within calibration tolerance.
+
+## Required Equipment
+- Calibrated pressure gauges (2 minimum)
+- Test pump (manual or powered)
+- Isolation blinds or valves
+- Bleed valves at high and low points
+- Pressure test certificate forms
diff --git a/samples/js/local-cag/docs/valve-inspection.md b/samples/js/local-cag/docs/valve-inspection.md
new file mode 100644
index 00000000..dfcae228
--- /dev/null
+++ b/samples/js/local-cag/docs/valve-inspection.md
@@ -0,0 +1,44 @@
+---
+title: Valve Inspection and Maintenance
+category: Maintenance
+id: valve-inspection
+---
+
+# Valve Inspection and Maintenance
+
+## Overview
+Regular valve inspection ensures safe, reliable gas flow control. All valves in the system must be inspected per the maintenance schedule and after any abnormal operating event.
+
+## Inspection Schedule
+| Valve Type | Routine Interval | Post-Event |
+|------------|-----------------|------------|
+| Gate valve | 12 months | After ESD or overpressure |
+| Ball valve | 12 months | After ESD or overpressure |
+| Check valve | 6 months | After reverse-flow incident |
+| Relief valve | 12 months | After any lift event |
+| Control valve | 6 months | After erratic operation |
+
+## Visual Inspection Checklist
+1. Check for external corrosion or coating damage.
+2. Inspect stem packing for leaks (apply soap solution).
+3. Verify handwheel or actuator is secure and operable.
+4. Check flange bolts for tightness (torque wrench).
+5. Inspect body and bonnet for cracks or deformation.
+6. Verify position indicator matches actual valve position.
+7. Check drain and vent plugs for tightness.
+
+## Operational Test
+1. Cycle the valve through full open/close range.
+2. Verify smooth operation — no binding or excessive torque.
+3. Check for seat leakage using downstream pressure gauge.
+4. For actuated valves: test fail-safe action (remove air supply).
+5. Record stroke time for actuated valves.
+
+## Common Faults
+- **Stem packing leak**: Tighten gland nuts ¼ turn. If leak persists, repack.
+- **Seat leak-through**: May indicate erosion or debris. Isolate, depressurise, inspect internals.
+- **Stiff operation**: Lubricate stem per manufacturer specs. Check for corrosion or scale.
+- **Actuator failure**: Check air supply pressure, solenoid valve, and positioner calibration.
+
+## Safety Warning
+**Never attempt to repair a valve under pressure.** Always isolate and depressurise the section before disassembly. Use a lock-out/tag-out (LOTO) procedure.
diff --git a/samples/js/local-cag/package.json b/samples/js/local-cag/package.json
new file mode 100644
index 00000000..c591ce39
--- /dev/null
+++ b/samples/js/local-cag/package.json
@@ -0,0 +1,27 @@
+{
+  "name": "gas-field-local-cag",
+  "version": "2.0.0",
+  "description": "Offline CAG-powered support agent for gas field engineers using Foundry Local. Pre-loads domain documents into the context window — no vector database, no embeddings, no retrieval pipeline.",
+  "type": "module",
+  "scripts": {
+    "start": "node src/server.js",
+    "dev": "node --watch src/server.js",
+    "test": "node --test test/*.test.js"
+  },
+  "dependencies": {
+    "express": "^4.21.0",
+    "foundry-local-sdk": "^0.5.1"
+  },
+  "license": "MIT",
+  "keywords": [
+    "cag",
+    "offline-ai",
+    "foundry-local",
+    "context-augmented-generation",
+    "gas-field",
+    "support-agent"
+  ],
+  "engines": {
+    "node": ">=20.0.0"
+  }
+}
diff --git a/samples/js/local-cag/public/index.html b/samples/js/local-cag/public/index.html
new file mode 100644
index 00000000..01f40369
--- /dev/null
+++ b/samples/js/local-cag/public/index.html
@@ -0,0 +1,724 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Gas Field Support Agent</title>
+  <style>
+    /* ── Base: high-contrast, large text for field use ── */
+    *, *::before, *::after { box-sizing: border-box; }
+    :root {
+      --bg: #0d1117;
+      --surface: #161b22;
+      --border: #30363d;
+      --text: #e6edf3;
+      --text-muted: #8b949e;
+      --accent: #58a6ff;
+      --warning: #f0883e;
+      --danger: #f85149;
+      --success: #3fb950;
+      --user-bg: #1c2a3f;
+      --agent-bg: #1a1e24;
+      --radius: 8px;
+    }
+    html { font-size: 18px; }
+    body {
+      margin: 0;
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      display: flex;
+      flex-direction: column;
+      height: 100vh;
+      height: 100dvh;
+    }
+
+    /* ── Header ── */
+    header {
+      background: var(--surface);
+      border-bottom: 1px solid var(--border);
+      padding: 12px 20px;
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      flex-shrink: 0;
+    }
+    header h1 {
+      margin: 0;
+      font-size: 1.2rem;
+      font-weight: 700;
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    header h1 .icon { font-size: 1.5rem; }
+    .status {
+      font-size: 0.8rem;
+      padding: 4px 10px;
+      border-radius: 12px;
+      font-weight: 600;
+    }
+    .status.online { background: #1a3a2a; color: var(--success); }
+    .status.offline { background: #3a1a1a; color: var(--danger); }
+    .status.loading { background: #3a2a1a; color: var(--warning); }
+
+    /* ── Mode toggle ── */
+    .controls {
+      display: flex;
+      align-items: center;
+      gap: 12px;
+    }
+    .toggle-label {
+      font-size: 0.75rem;
+      color: var(--text-muted);
+      display: flex;
+      align-items: center;
+      gap: 6px;
+      cursor: pointer;
+    }
+    .toggle-label input[type="checkbox"] {
+      width: 18px;
+      height: 18px;
+      accent-color: var(--accent);
+    }
+
+    /* ── Chat area ── */
+    #chat {
+      flex: 1;
+      overflow-y: auto;
+      padding: 16px;
+      display: flex;
+      flex-direction: column;
+      gap: 16px;
+      scroll-behavior: smooth;
+    }
+
+    .message {
+      max-width: 90%;
+      padding: 14px 18px;
+      border-radius: var(--radius);
+      line-height: 1.6;
+      word-wrap: break-word;
+      animation: fadeIn 0.2s ease-out;
+    }
+    @keyframes fadeIn {
+      from { opacity: 0; transform: translateY(6px); }
+      to { opacity: 1; transform: translateY(0); }
+    }
+    .message.user {
+      align-self: flex-end;
+      background: var(--user-bg);
+      border: 1px solid #264d73;
+    }
+    .message.agent {
+      align-self: flex-start;
+      background: var(--agent-bg);
+      border: 1px solid var(--border);
+    }
+    .message.agent.streaming {
+      white-space: pre-wrap;
+    }
+    .message.agent .safety-warning {
+      background: #3a2a1a;
+      border-left: 4px solid var(--warning);
+      padding: 8px 12px;
+      margin: 8px 0;
+      border-radius: 0 var(--radius) var(--radius) 0;
+      font-weight: 600;
+    }
+
+    /* ── Typing indicator ── */
+    .typing {
+      display: flex;
+      gap: 4px;
+      padding: 14px 18px;
+    }
+    .typing span {
+      width: 8px; height: 8px;
+      background: var(--text-muted);
+      border-radius: 50%;
+      animation: bounce 1.4s infinite ease-in-out;
+    }
+    .typing span:nth-child(2) { animation-delay: 0.16s; }
+    .typing span:nth-child(3) { animation-delay: 0.32s; }
+    @keyframes bounce {
+      0%, 80%, 100% { transform: scale(0); }
+      40% { transform: scale(1); }
+    }
+
+    /* ── Input area ── */
+    #input-area {
+      flex-shrink: 0;
+      padding: 12px 16px;
+      background: var(--surface);
+      border-top: 1px solid var(--border);
+      display: flex;
+      gap: 10px;
+    }
+    #input-area textarea {
+      flex: 1;
+      background: var(--bg);
+      border: 1px solid var(--border);
+      border-radius: var(--radius);
+      color: var(--text);
+      padding: 12px 14px;
+      font-size: 1rem;
+      font-family: inherit;
+      resize: none;
+      min-height: 48px;
+      max-height: 120px;
+    }
+    #input-area textarea:focus {
+      outline: none;
+      border-color: var(--accent);
+    }
+    #input-area textarea::placeholder {
+      color: var(--text-muted);
+    }
+    #send-btn {
+      background: var(--accent);
+      color: #000;
+      border: none;
+      border-radius: var(--radius);
+      padding: 12px 20px;
+      font-size: 1rem;
+      font-weight: 700;
+      cursor: pointer;
+      min-width: 80px;
+      transition: opacity 0.15s;
+    }
+    #send-btn:disabled {
+      opacity: 0.4;
+      cursor: not-allowed;
+    }
+    #send-btn:hover:not(:disabled) {
+      opacity: 0.85;
+    }
+
+    /* ── Quick actions ── */
+    #quick-actions {
+      padding: 8px 16px;
+      display: flex;
+      gap: 8px;
+      flex-wrap: wrap;
+      background: var(--surface);
+    }
+    .quick-btn {
+      background: var(--bg);
+      border: 1px solid var(--border);
+      color: var(--text);
+      padding: 8px 14px;
+      border-radius: 20px;
+      font-size: 0.82rem;
+      cursor: pointer;
+      white-space: nowrap;
+      transition: border-color 0.15s;
+    }
+    .quick-btn:hover {
+      border-color: var(--accent);
+      color: var(--accent);
+    }
+
+    /* ── Markdown-like rendering ── */
+    .message.agent h1, .message.agent h2, .message.agent h3 {
+      margin: 10px 0 6px;
+      font-size: 1rem;
+      color: var(--accent);
+    }
+    .message.agent ul, .message.agent ol {
+      padding-left: 20px;
+      margin: 6px 0;
+    }
+    .message.agent li { margin: 4px 0; }
+    .message.agent code {
+      background: #0d1117;
+      padding: 2px 6px;
+      border-radius: 3px;
+      font-size: 0.9em;
+    }
+    .message.agent strong {
+      color: var(--warning);
+    }
+    .intro-warning {
+      color: var(--warning);
+    }
+
+    /* ── Responsive ── */
+    @media (max-width: 900px) {
+      .message { max-width: 95%; }
+      #quick-actions { gap: 6px; }
+      .quick-btn { padding: 6px 10px; font-size: 0.78rem; }
+    }
+    @media (max-width: 600px) {
+      html { font-size: 16px; }
+      .message { max-width: 98%; padding: 10px 14px; }
+      #quick-actions { overflow-x: auto; flex-wrap: nowrap; }
+      .quick-btn { flex-shrink: 0; }
+      header h1 { font-size: 1rem; }
+      #input-area { padding: 8px 10px; }
+    }
+
+    /* ── Loading overlay ── */
+    #loading-overlay {
+      position: fixed;
+      inset: 0;
+      background: rgba(13, 17, 23, 0.95);
+      display: flex;
+      flex-direction: column;
+      align-items: center;
+      justify-content: center;
+      z-index: 1000;
+      transition: opacity 0.4s;
+    }
+    #loading-overlay.hidden {
+      opacity: 0;
+      pointer-events: none;
+    }
+    .loading-card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: 12px;
+      padding: 32px 40px;
+      max-width: 480px;
+      width: 90%;
+      text-align: center;
+    }
+    .loading-card h2 {
+      margin: 0 0 8px;
+      font-size: 1.3rem;
+    }
+    .loading-card .subtitle {
+      color: var(--text-muted);
+      font-size: 0.85rem;
+      margin: 0 0 24px;
+    }
+    .progress-track {
+      background: var(--bg);
+      border-radius: 8px;
+      height: 18px;
+      overflow: hidden;
+      margin-bottom: 16px;
+      border: 1px solid var(--border);
+    }
+    .progress-fill {
+      height: 100%;
+      background: linear-gradient(90deg, var(--accent), var(--success));
+      border-radius: 8px;
+      width: 0%;
+      transition: width 0.3s ease;
+    }
+    .progress-fill.indeterminate {
+      width: 30%;
+      animation: indeterminate 1.5s ease-in-out infinite;
+    }
+    @keyframes indeterminate {
+      0% { transform: translateX(-100%); }
+      100% { transform: translateX(400%); }
+    }
+    #loading-stage {
+      font-size: 0.95rem;
+      color: var(--text);
+      margin: 0 0 6px;
+      min-height: 1.4em;
+    }
+    #loading-detail {
+      font-size: 0.8rem;
+      color: var(--text-muted);
+      margin: 0;
+      min-height: 1.2em;
+    }
+    .loading-steps {
+      text-align: left;
+      margin-top: 20px;
+      font-size: 0.8rem;
+      color: var(--text-muted);
+    }
+    .loading-steps .step {
+      padding: 4px 0;
+      display: flex;
+      align-items: center;
+      gap: 8px;
+    }
+    .loading-steps .step.done { color: var(--success); }
+    .loading-steps .step.active { color: var(--text); font-weight: 600; }
+    .loading-steps .step .check { width: 16px; text-align: center; }
+  </style>
+</head>
+<body>
+
+<div id="loading-overlay">
+  <div class="loading-card">
+    <h2>Gas Field Support Agent</h2>
+    <p class="subtitle">Preparing your offline AI assistant</p>
+    <div class="progress-track">
+      <div class="progress-fill indeterminate" id="progress-fill"></div>
+    </div>
+    <p id="loading-stage">Connecting to server...</p>
+    <p id="loading-detail"></p>
+    <div class="loading-steps">
+      <div class="step" id="step-context"><span class="check">&#9675;</span> Load domain documents</div>
+      <div class="step" id="step-sdk"><span class="check">&#9675;</span> Initialise Foundry Local</div>
+      <div class="step" id="step-select"><span class="check">&#9675;</span> Select best model for device</div>
+      <div class="step" id="step-download"><span class="check">&#9675;</span> Ensure model is available</div>
+      <div class="step" id="step-load"><span class="check">&#9675;</span> Load model into memory</div>
+    </div>
+  </div>
+</div>
+
+<header>
+  <h1>
+    <span class="icon">&#128295;</span>
+    Gas Field Support Agent
+  </h1>
+  <div class="controls">
+    <label class="toggle-label">
+      <input type="checkbox" id="compact-mode">
+      Edge Mode
+    </label>
+    <span class="status loading" id="status">Connecting...</span>
+  </div>
+</header>
+
+<div id="quick-actions">
+  <button class="quick-btn" data-q="How do I detect a gas leak?">Gas Leak Detection</button>
+  <button class="quick-btn" data-q="Low pressure downstream of regulator">Low Pressure Fault</button>
+  <button class="quick-btn" data-q="Emergency shutdown procedure">Emergency Shutdown</button>
+  <button class="quick-btn" data-q="Pre-inspection checklist">Pre-Inspection</button>
+  <button class="quick-btn" data-q="PPE requirements for gas field work">PPE Requirements</button>
+  <button class="quick-btn" data-q="No gas flow at site">No Gas Flow</button>
+  <button class="quick-btn" data-q="Pressure testing procedure">Pressure Test</button>
+  <button class="quick-btn" data-q="Confined space entry procedure">Confined Space</button>
+</div>
+
+<div id="chat">
+  <div class="message agent">
+    <strong>Gas Field Support Agent – Ready</strong><br>
+    Running locally on this device using Context-Aware Generation (CAG). No internet connection required.<br><br>
+    All domain knowledge is pre-loaded. Ask me about:
+    <ul>
+      <li>Gas leak detection &amp; safety procedures</li>
+      <li>Fault diagnosis &amp; decision trees</li>
+      <li>Equipment maintenance &amp; repair steps</li>
+      <li>Pressure testing &amp; valve inspection</li>
+      <li>PPE requirements &amp; compliance</li>
+    </ul>
+    <em class="intro-warning">Always follow your site-specific safety procedures.</em>
+  </div>
+</div>
+
+<div id="input-area">
+  <textarea
+    id="user-input"
+    placeholder="Describe the issue or ask a question..."
+    rows="1"
+    autofocus
+  ></textarea>
+  <button id="send-btn">Send</button>
+</div>
+
+<script>
+(function() {
+  const chat = document.getElementById('chat');
+  const input = document.getElementById('user-input');
+  const sendBtn = document.getElementById('send-btn');
+  const statusEl = document.getElementById('status');
+  const compactToggle = document.getElementById('compact-mode');
+  const quickBtns = document.querySelectorAll('.quick-btn');
+
+  // ── Loading overlay elements ──
+  const overlay = document.getElementById('loading-overlay');
+  const progressFill = document.getElementById('progress-fill');
+  const loadingStage = document.getElementById('loading-stage');
+  const loadingDetail = document.getElementById('loading-detail');
+
+  const steps = {
+    context: document.getElementById('step-context'),
+    sdk: document.getElementById('step-sdk'),
+    select: document.getElementById('step-select'),
+    download: document.getElementById('step-download'),
+    load: document.getElementById('step-load'),
+  };
+
+  // Map stages to step elements
+  const stageToStep = {
+    context: 'context',
+    sdk: 'sdk',
+    selecting: 'select',
+    selected: 'select',
+    downloading: 'download',
+    cached: 'download',
+    loading: 'load',
+    ready: null,
+  };
+
+  function markStep(stepKey, state) {
+    const el = steps[stepKey];
+    if (!el) return;
+    const check = el.querySelector('.check');
+    if (state === 'done') {
+      el.classList.add('done');
+      el.classList.remove('active');
+      check.textContent = '\u2713';
+    } else if (state === 'active') {
+      el.classList.add('active');
+      el.classList.remove('done');
+      check.textContent = '\u25B8';
+    }
+  }
+
+  function updateLoadingUI(data) {
+    loadingStage.textContent = data.message || '';
+
+    if (data.stage === 'downloading' && data.progress != null) {
+      progressFill.classList.remove('indeterminate');
+      progressFill.style.width = data.progress.toFixed(1) + '%';
+      loadingDetail.textContent = data.progress.toFixed(0) + '% complete';
+    } else if (data.stage === 'loading') {
+      progressFill.classList.add('indeterminate');
+      progressFill.style.width = '';
+      loadingDetail.textContent = 'This may take a moment for larger models';
+    } else if (data.stage === 'ready') {
+      progressFill.classList.remove('indeterminate');
+      progressFill.style.width = '100%';
+      loadingDetail.textContent = '';
+    } else {
+      loadingDetail.textContent = data.model ? 'Model: ' + data.model : '';
+    }
+
+    // Update step indicators
+    const stageOrder = ['context', 'sdk', 'selecting', 'selected', 'downloading', 'cached', 'loading', 'ready'];
+    const currentIdx = stageOrder.indexOf(data.stage);
+    for (let i = 0; i < currentIdx; i++) {
+      const key = stageToStep[stageOrder[i]];
+      if (key) markStep(key, 'done');
+    }
+
+    const currentStepKey = stageToStep[data.stage];
+    if (currentStepKey) {
+      markStep(currentStepKey, 'active');
+    }
+
+    if (data.stage === 'ready') {
+      Object.keys(steps).forEach(k => markStep(k, 'done'));
+      statusEl.textContent = 'Offline Ready';
+      statusEl.className = 'status online';
+      setTimeout(() => {
+        overlay.classList.add('hidden');
+      }, 800);
+    }
+  }
+
+  // ── Connect to SSE status endpoint ──
+  function connectStatus() {
+    statusEl.textContent = 'Loading...';
+    statusEl.className = 'status loading';
+
+    const evtSource = new EventSource('/api/status');
+
+    evtSource.onmessage = (event) => {
+      try {
+        const data = JSON.parse(event.data);
+        updateLoadingUI(data);
+        if (data.stage === 'ready') {
+          evtSource.close();
+        }
+      } catch { /* ignore parse errors */ }
+    };
+
+    evtSource.onerror = () => {
+      loadingStage.textContent = 'Waiting for server...';
+      statusEl.textContent = 'Connecting...';
+      statusEl.className = 'status loading';
+    };
+  }
+
+  connectStatus();
+
+  let sending = false;
+  let conversationHistory = [];
+
+  // ── Auto-resize textarea ──
+  input.addEventListener('input', () => {
+    input.style.height = 'auto';
+    input.style.height = Math.min(input.scrollHeight, 120) + 'px';
+  });
+
+  // ── Send on Enter (Shift+Enter for newline) ──
+  input.addEventListener('keydown', (e) => {
+    if (e.key === 'Enter' && !e.shiftKey) {
+      e.preventDefault();
+      sendMessage();
+    }
+  });
+
+  sendBtn.addEventListener('click', sendMessage);
+
+  // ── Quick action buttons ──
+  quickBtns.forEach(btn => {
+    btn.addEventListener('click', () => {
+      input.value = btn.dataset.q;
+      sendMessage();
+    });
+  });
+
+  // ── Escape HTML to prevent XSS ──
+  function escapeHtml(str) {
+    const div = document.createElement('div');
+    div.appendChild(document.createTextNode(str));
+    return div.innerHTML;
+  }
+
+  // ── Simple markdown to HTML ──
+  function renderMarkdown(text) {
+    let html = escapeHtml(text)
+      .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
+      .replace(/^### (.+)$/gm, '<h3>$1</h3>')
+      .replace(/^## (.+)$/gm, '<h2>$1</h2>')
+      .replace(/^# (.+)$/gm, '<h1>$1</h1>')
+      .replace(/`([^`]+)`/g, '<code>$1</code>')
+      .replace(/\n/g, '<br>');
+
+    html = html.replace(
+      /(Safety Warning[^<]*|DANGER[^<]*|WARNING[^<]*|EVACUATE[^<]*)/gi,
+      '<span class="safety-warning">$1</span>'
+    );
+
+    return html;
+  }
+
+  function addMessage(role, content) {
+    const div = document.createElement('div');
+    div.className = 'message ' + role;
+
+    if (role === 'agent') {
+      div.innerHTML = renderMarkdown(content);
+    } else {
+      div.textContent = content;
+    }
+
+    chat.appendChild(div);
+    chat.scrollTop = chat.scrollHeight;
+    return div;
+  }
+
+  function addTyping() {
+    const div = document.createElement('div');
+    div.className = 'message agent typing';
+    div.id = 'typing';
+    div.innerHTML = '<span></span><span></span><span></span>';
+    chat.appendChild(div);
+    chat.scrollTop = chat.scrollHeight;
+  }
+
+  function removeTyping() {
+    const el = document.getElementById('typing');
+    if (el) el.remove();
+  }
+
+  function finalizeAgentMessage(el, text) {
+    if (!el) return;
+    el.classList.remove('streaming');
+    el.innerHTML = renderMarkdown(text);
+    chat.scrollTop = chat.scrollHeight;
+  }
+
+  async function sendMessage() {
+    const text = input.value.trim();
+    if (!text || sending) return;
+
+    sending = true;
+    sendBtn.disabled = true;
+    input.value = '';
+    input.style.height = 'auto';
+
+    addMessage('user', text);
+    addTyping();
+
+    conversationHistory.push({ role: 'user', content: text });
+
+    try {
+      const res = await fetch('/api/chat/stream', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          message: text,
+          history: conversationHistory.slice(-6),
+          compact: compactToggle.checked
+        })
+      });
+
+      removeTyping();
+
+      if (!res.ok) {
+        addMessage('agent', 'Error communicating with local model. Check that Foundry Local is running.');
+        return;
+      }
+
+      const reader = res.body.getReader();
+      const decoder = new TextDecoder();
+      let agentText = '';
+      let agentDiv = null;
+      let renderPending = false;
+
+      function flushStreamText() {
+        renderPending = false;
+        if (!agentDiv) return;
+        agentDiv.textContent = agentText;
+        chat.scrollTop = chat.scrollHeight;
+      }
+
+      function scheduleStreamText() {
+        if (renderPending) return;
+        renderPending = true;
+        requestAnimationFrame(flushStreamText);
+      }
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        const lines = decoder.decode(value, { stream: true }).split('\n');
+        for (const line of lines) {
+          if (!line.startsWith('data: ')) continue;
+          const payload = line.slice(6).trim();
+          if (payload === '[DONE]') continue;
+
+          try {
+            const chunk = JSON.parse(payload);
+            if (chunk.type === 'text') {
+              agentText += chunk.data;
+              if (!agentDiv) {
+                agentDiv = addMessage('agent', '');
+                agentDiv.classList.add('streaming');
+              }
+              scheduleStreamText();
+            } else if (chunk.type === 'error') {
+              addMessage('agent', chunk.data);
+            }
+          } catch { /* skip malformed chunk */ }
+        }
+      }
+
+      if (renderPending) {
+        flushStreamText();
+      }
+
+      if (agentText) {
+        finalizeAgentMessage(agentDiv, agentText);
+        conversationHistory.push({ role: 'assistant', content: agentText });
+      }
+
+    } catch (err) {
+      removeTyping();
+      addMessage('agent', 'Connection error. Ensure the server is running locally.');
+      console.error(err);
+    } finally {
+      sending = false;
+      sendBtn.disabled = false;
+      input.focus();
+    }
+  }
+})();
+</script>
+</body>
+</html>
diff --git a/samples/js/local-cag/src/chatEngine.js b/samples/js/local-cag/src/chatEngine.js
new file mode 100644
index 00000000..675edf10
--- /dev/null
+++ b/samples/js/local-cag/src/chatEngine.js
@@ -0,0 +1,222 @@
+/**
+ * Foundry Local chat engine – Context-Aware Generation (CAG).
+ * Uses the Foundry Local SDK (native bindings) to run inference
+ * directly in-process, with no HTTP round-trips to a local server.
+ *
+ * Architecture: CAG injects the full domain knowledge base into the
+ * system prompt at startup. No vector search, no embeddings, no
+ * retrieval step at query time.
+ */
+import { FoundryLocalManager } from "foundry-local-sdk";
+import { config } from "./config.js";
+import { selectBestModel } from "./modelSelector.js";
+import { SYSTEM_PROMPT, SYSTEM_PROMPT_COMPACT } from "./prompts.js";
+import {
+  loadDocuments,
+  buildDomainContext,
+  buildCompactContext,
+  findRelevantDocs,
+  buildSelectedContext,
+  buildDocumentIndex,
+  listDocuments,
+} from "./context.js";
+
+export class ChatEngine {
+  constructor() {
+    this.chatClient = null;
+    this.modelAlias = null;
+    this.compactMode = false;
+    this.docs = [];
+    this.domainContext = "";
+    this.compactContext = "";
+    this.docIndex = "";
+  }
+
+  /**
+   * Initialise the engine: load domain context, start Foundry Local, load model.
+   * @param {function} [onProgress] – callback receiving { stage, message, progress?, model? }
+   */
+  async init(onProgress = () => {}) {
+    // 1. Pre-load all domain documents into memory
+    onProgress({ stage: "context", message: "Loading domain documents..." });
+    console.log("[ChatEngine] Loading domain context...");
+    this.docs = loadDocuments();
+    this.domainContext = buildDomainContext(this.docs);
+    this.compactContext = buildCompactContext(this.docs);
+    this.docIndex = buildDocumentIndex(this.docs);
+    console.log(
+      `[ChatEngine] Context loaded: ${this.docs.length} documents (${this.domainContext.length} chars).`
+    );
+    onProgress({ stage: "context", message: `Loaded ${this.docs.length} domain documents` });
+
+    // 2. Initialise Foundry Local SDK (native bindings, no CLI)
+    onProgress({ stage: "sdk", message: "Initialising Foundry Local SDK..." });
+    console.log("[ChatEngine] Initialising Foundry Local SDK...");
+    const manager = FoundryLocalManager.create({ appName: "gas-field-cag" });
+
+    // 3. Select the best model for this device (or use the forced alias)
+    onProgress({ stage: "selecting", message: "Selecting best model for this device..." });
+    const { model, reason } = await selectBestModel(manager.catalog, {
+      forceModel: config.model || undefined,
+      ramBudgetPercent: config.ramBudgetPercent,
+      maxModelSizeMb: config.maxModelSizeMb,
+    });
+    this.selectionReason = reason;
+    onProgress({ stage: "selected", message: `Selected model: ${model.alias}`, model: model.alias });
+
+    // 4. Download model if not cached
+    if (!model.isCached) {
+      console.log(`[ChatEngine] Downloading model ${model.alias}...`);
+      onProgress({ stage: "downloading", message: `Downloading ${model.alias}...`, progress: 0, model: model.alias });
+      await model.download((progress) => {
+        process.stdout.write(`\r[ChatEngine] Download: ${progress.toFixed(0)}%`);
+        onProgress({ stage: "downloading", message: `Downloading ${model.alias}...`, progress, model: model.alias });
+      });
+      console.log("");
+    } else {
+      onProgress({ stage: "cached", message: `${model.alias} is already cached`, model: model.alias });
+    }
+
+    // 5. Load model into memory
+    onProgress({ stage: "loading", message: `Loading ${model.alias} into memory...`, model: model.alias });
+    console.log(`[ChatEngine] Loading model ${model.alias} into memory...`);
+    await model.load();
+    this.modelAlias = model.alias;
+    console.log(`[ChatEngine] Model loaded: ${model.id} (${model.alias})`);
+
+    // 6. Create a ChatClient for direct in-process inference
+    this.chatClient = model.createChatClient();
+    this.chatClient.settings.temperature = 0.1;
+    console.log("[ChatEngine] ChatClient ready (in-process inference).");
+    onProgress({ stage: "ready", message: "Ready", model: model.alias });
+  }
+
+  /**
+   * Get the list of loaded domain documents.
+   */
+  getDocuments() {
+    return listDocuments(this.docs);
+  }
+
+  /**
+   * Set compact mode for extreme latency / edge devices.
+   */
+  setCompactMode(enabled) {
+    this.compactMode = enabled;
+    console.log(`[ChatEngine] Compact mode: ${enabled ? "ON" : "OFF"}`);
+  }
+
+  /**
+   * Build the messages array with pre-loaded context injection.
+   *
+   * Prompt structure:
+   *   System: role + behavioural rules
+   *   System: full domain context (pre-loaded, not retrieved)
+   *   ...conversation history...
+   *   User: question
+   */
+  _buildMessages(userMessage, history = []) {
+    const systemPrompt = this.compactMode
+      ? SYSTEM_PROMPT_COMPACT
+      : SYSTEM_PROMPT;
+
+    const recentHistory = history
+      .slice(-4)
+      .filter((entry) => entry && typeof entry.content === "string" && entry.content.trim())
+      .map((entry) => ({ role: entry.role, content: entry.content.trim() }));
+
+    // Select only the most relevant documents for this query
+    const { docs: relevant, matched, terms } = findRelevantDocs(
+      userMessage,
+      this.docs,
+      config.maxContextDocs,
+    );
+    const context = this.compactMode
+      ? buildCompactContext(relevant)
+      : buildSelectedContext(relevant, userMessage, {
+          terms,
+          maxCharsPerDoc: 1600,
+          maxSections: 2,
+        });
+    const contextEnvelope = matched
+      ? `Relevant documents for this query:\n\n${context}`
+      : `Available documents:\n${this.docIndex}\n\nRelevant documents for this query:\n\n${context}`;
+
+    console.log(
+      `[ChatEngine] Query context: ${relevant.length} docs ` +
+      `(${context.length} chars) – ${relevant.map((d) => d.id).join(", ")}`,
+    );
+
+    return [
+      { role: "system", content: systemPrompt },
+      {
+        role: "system",
+        content: contextEnvelope,
+      },
+      ...recentHistory,
+      { role: "user", content: userMessage },
+    ];
+  }
+
+  /**
+   * Generate a response for a user query (non-streaming).
+   */
+  async query(userMessage, history = []) {
+    const messages = this._buildMessages(userMessage, history);
+
+    this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
+    const response = await this.chatClient.completeChat(messages);
+
+    return {
+      text: response.choices[0].message.content,
+    };
+  }
+
+  /**
+   * Generate a streaming response for a user query.
+   * Returns an async iterable of text chunks.
+   */
+  async *queryStream(userMessage, history = []) {
+    const messages = this._buildMessages(userMessage, history);
+
+    this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
+
+    // Collect streamed chunks via callback and yield them
+    const chunks = [];
+    let resolve;
+    let done = false;
+
+    const promise = this.chatClient
+      .completeStreamingChat(messages, (chunk) => {
+        const content = chunk.choices?.[0]?.delta?.content;
+        if (content) {
+          chunks.push(content);
+          if (resolve) {
+            const r = resolve;
+            resolve = null;
+            r();
+          }
+        }
+      })
+      .then(() => {
+        done = true;
+        if (resolve) {
+          const r = resolve;
+          resolve = null;
+          r();
+        }
+      });
+
+    let index = 0;
+    while (!done || index < chunks.length) {
+      if (index < chunks.length) {
+        yield { type: "text", data: chunks[index++] };
+      } else {
+        await new Promise((r) => { resolve = r; });
+      }
+    }
+
+    // Ensure the streaming promise settles
+    await promise;
+  }
+}
diff --git a/samples/js/local-cag/src/config.js b/samples/js/local-cag/src/config.js
new file mode 100644
index 00000000..8c928df5
--- /dev/null
+++ b/samples/js/local-cag/src/config.js
@@ -0,0 +1,35 @@
+// Application configuration – all paths relative to project root
+import { fileURLToPath } from "url";
+import path from "path";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, "..");
+
+export const config = {
+  // Model – set FOUNDRY_MODEL to force a specific alias (e.g. "phi-3.5-mini").
+  // When left empty the app auto-selects the best model for the device.
+  model: process.env.FOUNDRY_MODEL || "",
+
+  // Maximum fraction of total system RAM the model may occupy (0–1).
+  ramBudgetPercent: parseFloat(process.env.RAM_BUDGET) || 0.6,
+
+  // Maximum model file size in MB. Models larger than this are skipped
+  // even if they fit in the RAM budget. Keeps CPU inference practical.
+  // Set MAX_MODEL_MB to override (e.g. MAX_MODEL_MB=10240 for 10 GB).
+  maxModelSizeMb: parseInt(process.env.MAX_MODEL_MB, 10) || 8192,
+
+  // Context (CAG)
+  docsDir: path.join(ROOT, "docs"),
+
+  // Maximum number of documents injected per query. All documents are
+  // pre-loaded at startup but only the most relevant ones are included
+  // in each prompt to keep context small enough for CPU inference.
+  maxContextDocs: parseInt(process.env.MAX_CONTEXT_DOCS, 10) || 3,
+
+  // Server
+  port: parseInt(process.env.PORT, 10) || 3000,
+  host: "127.0.0.1",
+
+  // UI
+  publicDir: path.join(ROOT, "public"),
+};
diff --git a/samples/js/local-cag/src/context.js b/samples/js/local-cag/src/context.js
new file mode 100644
index 00000000..18251b73
--- /dev/null
+++ b/samples/js/local-cag/src/context.js
@@ -0,0 +1,301 @@
+/**
+ * Context module for Context-Augmented Generation (CAG).
+ * Reads all domain documents from the docs/ folder at startup
+ * and provides them as pre-loaded, structured context blocks.
+ *
+ * Unlike RAG (which retrieves chunks at query time), CAG injects
+ * the full domain knowledge into the prompt upfront — no vector
+ * search, no embeddings, no retrieval step.
+ */
+import fs from "fs";
+import path from "path";
+import { config } from "./config.js";
+
+const STOP_WORDS = new Set([
+  "about", "after", "before", "could", "field", "from", "have",
+  "into", "local", "mode", "need", "should", "that", "them",
+  "there", "these", "this", "what", "when", "with", "would", "your",
+]);
+
+function getDocContent(doc) {
+  return (doc.content ?? doc.body ?? "").trim();
+}
+
+function normalize(text) {
+  return String(text || "").toLowerCase();
+}
+
+function tokenize(text) {
+  return normalize(text)
+    .split(/[^a-z0-9]+/)
+    .filter((term) => term.length > 2 && !STOP_WORDS.has(term));
+}
+
+function uniqueTerms(text) {
+  return [...new Set(tokenize(text))];
+}
+
+function trimToLength(text, maxLength) {
+  if (!text || text.length <= maxLength) return text;
+  const slice = text.slice(0, maxLength);
+  const lastBreak = Math.max(slice.lastIndexOf("\n"), slice.lastIndexOf(". "));
+  return `${slice.slice(0, lastBreak > 200 ? lastBreak : maxLength).trim()}\n...`;
+}
+
+function splitSections(content) {
+  const lines = content.split("\n");
+  const sections = [];
+  let heading = "Overview";
+  let bodyLines = [];
+
+  const pushSection = () => {
+    const body = bodyLines.join("\n").trim();
+    if (!heading && !body) return;
+    sections.push({
+      heading,
+      body,
+      text: [heading, body].filter(Boolean).join("\n"),
+      normalizedHeading: normalize(heading),
+      normalizedBody: normalize(body),
+    });
+  };
+
+  for (const line of lines) {
+    if (/^#{1,3}\s+/.test(line)) {
+      pushSection();
+      heading = line.trim();
+      bodyLines = [];
+      continue;
+    }
+    bodyLines.push(line);
+  }
+  pushSection();
+  return sections.filter((s) => s.body || s.heading !== "Overview");
+}
+
+function extractCompactContent(content) {
+  const lines = content.split("\n");
+  const keyLines = [];
+  let inSafety = false;
+  let inProcedure = false;
+
+  for (const line of lines) {
+    if (/^##\s*(safety|warning)/i.test(line)) {
+      inSafety = true;
+      inProcedure = false;
+      keyLines.push(line);
+    } else if (/^##\s*procedure/i.test(line)) {
+      inProcedure = true;
+      inSafety = false;
+      keyLines.push(line);
+    } else if (/^##\s/.test(line)) {
+      inSafety = false;
+      inProcedure = false;
+    } else if (inSafety || inProcedure) {
+      keyLines.push(line);
+    }
+  }
+
+  if (keyLines.length > 0) return keyLines.join("\n").trim();
+  return lines.filter((l) => l.trim()).slice(0, 5).join("\n");
+}
+
+function buildSectionText(section, maxLength) {
+  const heading = section.heading === "Overview" ? "" : section.heading;
+  return trimToLength([heading, section.body].filter(Boolean).join("\n"), maxLength);
+}
+
+function scoreSection(section, terms) {
+  let score = 0;
+  for (const term of terms) {
+    if (section.normalizedHeading.includes(term)) score += 5;
+    if (section.normalizedBody.includes(term)) score += 2;
+  }
+  return score;
+}
+
+function buildFocusedDocContext(doc, terms, { compact = false, maxCharsPerDoc = 1600, maxSections = 2 } = {}) {
+  const titleLine = `--- ${doc.title} [${doc.id}] ---`;
+
+  if (compact) {
+    const compactContent = trimToLength(doc.compactContent || extractCompactContent(getDocContent(doc)), maxCharsPerDoc);
+    return [titleLine, compactContent].join("\n");
+  }
+
+  const sections = Array.isArray(doc.sections) && doc.sections.length > 0
+    ? doc.sections
+    : splitSections(getDocContent(doc));
+
+  if (terms.length === 0) {
+    return [titleLine, trimToLength(getDocContent(doc), maxCharsPerDoc)].join("\n");
+  }
+
+  const ranked = sections
+    .map((section) => ({ section, score: scoreSection(section, terms) }))
+    .sort((a, b) => b.score - a.score);
+
+  const positiveMatches = ranked.filter((e) => e.score > 0).slice(0, maxSections);
+  const chosen = positiveMatches.length > 0 ? positiveMatches : ranked.slice(0, 1);
+
+  let remaining = maxCharsPerDoc;
+  const blocks = [];
+  for (const entry of chosen) {
+    if (remaining <= 0) break;
+    const sectionText = buildSectionText(entry.section, remaining);
+    if (!sectionText) continue;
+    blocks.push(sectionText);
+    remaining -= sectionText.length + 2;
+  }
+
+  const content = blocks.join("\n\n") || trimToLength(getDocContent(doc), maxCharsPerDoc);
+  return [titleLine, content].join("\n");
+}
+
+export function buildSearchTerms(query) {
+  return uniqueTerms(query);
+}
+
+export function findRelevantDocs(query, docs, maxDocs = 3) {
+  const terms = buildSearchTerms(query);
+
+  if (terms.length === 0) {
+    return { docs: docs.slice(0, maxDocs), matched: false, terms };
+  }
+
+  const scored = docs.map((doc) => {
+    const searchTitle = doc.searchTitle || normalize(doc.title);
+    const searchCategory = doc.searchCategory || normalize(doc.category);
+    const searchContent = doc.searchContent || normalize(getDocContent(doc));
+    let score = 0;
+    for (const term of terms) {
+      if (searchTitle.includes(term)) score += 8;
+      if (searchCategory.includes(term)) score += 3;
+      if (searchContent.includes(term)) score += 1;
+    }
+    return { doc, score };
+  });
+
+  scored.sort((a, b) => b.score - a.score);
+  const selected = scored.slice(0, maxDocs).filter((e) => e.score > 0);
+
+  return {
+    docs: selected.length > 0 ? selected.map((e) => e.doc) : docs.slice(0, maxDocs),
+    matched: selected.length > 0,
+    terms,
+  };
+}
+
+/**
+ * Parse YAML-like front-matter from a markdown document.
+ */
+export function parseFrontMatter(text) {
+  const match = text.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
+  if (!match) return { meta: {}, body: text };
+
+  const meta = {};
+  for (const line of match[1].split("\n")) {
+    const idx = line.indexOf(":");
+    if (idx > 0) {
+      meta[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
+    }
+  }
+  return { meta, body: match[2] };
+}
+
+/**
+ * Load all markdown documents from the docs/ folder.
+ */
+export function loadDocuments() {
+  const docsDir = config.docsDir;
+  if (!fs.existsSync(docsDir)) {
+    console.warn(`[Context] Docs directory not found: ${docsDir}`);
+    return [];
+  }
+
+  const files = fs.readdirSync(docsDir).filter((f) => f.endsWith(".md")).sort();
+
+  const docs = [];
+  for (const file of files) {
+    const raw = fs.readFileSync(path.join(docsDir, file), "utf-8");
+    const { meta, body } = parseFrontMatter(raw);
+    const content = body.trim();
+
+    docs.push({
+      id: meta.id || path.basename(file, ".md"),
+      title: meta.title || file,
+      category: meta.category || "General",
+      content,
+      compactContent: extractCompactContent(content),
+      sections: splitSections(content),
+      searchTitle: normalize(meta.title || file),
+      searchCategory: normalize(meta.category || "General"),
+      searchContent: normalize(content),
+    });
+  }
+
+  return docs;
+}
+
+/**
+ * Build the full domain context block from all loaded documents.
+ */
+export function buildDomainContext(docs) {
+  if (docs.length === 0) return "";
+
+  const categories = new Map();
+  for (const doc of docs) {
+    if (!categories.has(doc.category)) categories.set(doc.category, []);
+    categories.get(doc.category).push(doc);
+  }
+
+  const sections = [];
+  for (const [category, categoryDocs] of categories) {
+    sections.push(`=== ${category} ===`);
+    for (const doc of categoryDocs) {
+      sections.push(`--- ${doc.title} [${doc.id}] ---`);
+      sections.push(getDocContent(doc));
+      sections.push("");
+    }
+  }
+
+  return sections.join("\n");
+}
+
+/**
+ * Build a compact context summary for edge/constrained devices.
+ */
+export function buildCompactContext(docs) {
+  if (docs.length === 0) return "";
+
+  const sections = [];
+  for (const doc of docs) {
+    sections.push(`--- ${doc.title} [${doc.id}] ---`);
+    sections.push(doc.compactContent || extractCompactContent(getDocContent(doc)));
+    sections.push("");
+  }
+
+  return sections.join("\n");
+}
+
+/**
+ * Build context from a subset of selected documents.
+ */
+export function buildSelectedContext(docs, query = "", options = {}) {
+  const terms = options.terms || buildSearchTerms(query);
+  const sections = docs.map((doc) => buildFocusedDocContext(doc, terms, options));
+  return sections.join("\n\n");
+}
+
+/**
+ * Build a short document index listing all available topics.
+ */
+export function buildDocumentIndex(docs) {
+  return docs.map((d) => `- ${d.title} [${d.id}]`).join("\n");
+}
+
+/**
+ * Get a list of loaded documents (for the /api/context endpoint).
+ */
+export function listDocuments(docs) {
+  return docs.map((d) => ({ id: d.id, title: d.title, category: d.category }));
+}
diff --git a/samples/js/local-cag/src/modelSelector.js b/samples/js/local-cag/src/modelSelector.js
new file mode 100644
index 00000000..36d98910
--- /dev/null
+++ b/samples/js/local-cag/src/modelSelector.js
@@ -0,0 +1,115 @@
+/**
+ * Dynamic model selector – picks the best Foundry Local model for
+ * the current device based on available system RAM and the SDK catalogue.
+ *
+ * Selection strategy:
+ *  1. Enumerate all chat-completion models from the catalogue.
+ *  2. Exclude models that are too large for available RAM.
+ *  3. Rank remaining models by a quality preference order.
+ *  4. Boost cached models to avoid lengthy downloads.
+ *  5. Return the best match.
+ */
+import os from "os";
+
+// Chat models ranked by quality for domain Q&A tasks (best first).
+const QUALITY_RANK = [
+  "qwen2.5-7b",
+  "qwen2.5-14b",
+  "phi-4",
+  "gpt-oss-20b",
+  "mistral-7b-v0.2",
+  "phi-4-mini-reasoning",
+  "phi-3.5-mini",
+  "phi-3-mini-128k",
+  "phi-3-mini-4k",
+  "qwen2.5-1.5b",
+  "qwen2.5-0.5b",
+];
+
+// Aliases to skip (not suited for domain Q&A chat)
+const SKIP_ALIASES = new Set([
+  "qwen2.5-coder-0.5b",
+  "qwen2.5-coder-1.5b",
+  "qwen2.5-coder-7b",
+  "qwen2.5-coder-14b",
+]);
+
+/**
+ * Pick the best chat model from the Foundry Local catalogue that
+ * fits within the device's RAM budget.
+ *
+ * @param {object}  catalog        – FoundryLocalManager.catalog instance
+ * @param {object}  [opts]
+ * @param {number}  [opts.ramBudgetPercent=0.6] – fraction of total RAM
+ * @param {number}  [opts.maxModelSizeMb=4096] – hard cap on model file size in MB
+ * @param {string}  [opts.forceModel]          – bypass selection and use this alias
+ * @returns {Promise<{model, reason: string}>}
+ */
+export async function selectBestModel(catalog, opts = {}) {
+  const forceAlias = opts.forceModel || process.env.FOUNDRY_MODEL;
+  if (forceAlias) {
+    const model = await catalog.getModel(forceAlias);
+    return { model, reason: `forced via ${opts.forceModel ? "config" : "FOUNDRY_MODEL env"}` };
+  }
+
+  const totalRamMb = os.totalmem() / (1024 * 1024);
+  const budgetPercent = opts.ramBudgetPercent ?? 0.6;
+  const budgetMb = totalRamMb * budgetPercent;
+  const maxSizeMb = opts.maxModelSizeMb ?? 4096;
+
+  console.log(
+    `[ModelSelector] System RAM: ${(totalRamMb / 1024).toFixed(1)} GB  ` +
+    `| Budget (${(budgetPercent * 100).toFixed(0)}%): ${(budgetMb / 1024).toFixed(1)} GB` +
+    `  | Max model size: ${(maxSizeMb / 1024).toFixed(1)} GB`
+  );
+
+  const allModels = await catalog.getModels();
+
+  // Filter to chat-completion models that fit within the RAM budget
+  const candidates = [];
+  for (const m of allModels) {
+    const info = m.selectedVariant?._modelInfo;
+    if (!info) continue;
+    if (info.task !== "chat-completion") continue;
+    if (SKIP_ALIASES.has(info.alias)) continue;
+    if (info.fileSizeMb > budgetMb) {
+      console.log(`[ModelSelector]   skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > RAM budget)`);
+      continue;
+    }
+    if (info.fileSizeMb > maxSizeMb) {
+      console.log(`[ModelSelector]   skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > max model size)`);
+      continue;
+    }
+    candidates.push({ model: m, info });
+  }
+
+  if (candidates.length === 0) {
+    throw new Error(
+      "No chat model fits within the available RAM budget " +
+      `(${(budgetMb / 1024).toFixed(1)} GB). ` +
+      "Try increasing ramBudgetPercent or freeing memory."
+    );
+  }
+
+  // Score each candidate: quality rank + cache bonus
+  const scored = candidates.map(({ model, info }) => {
+    const rankIndex = QUALITY_RANK.indexOf(info.alias);
+    const qualityScore = rankIndex >= 0
+      ? (QUALITY_RANK.length - rankIndex) * 10
+      : 1;
+    const cacheBonus = info.cached ? 5 : 0;
+    const score = qualityScore + cacheBonus;
+    return { model, info, score };
+  });
+
+  scored.sort((a, b) => b.score - a.score);
+
+  const best = scored[0];
+  const reason =
+    `auto-selected (${(best.info.fileSizeMb / 1024).toFixed(1)} GB, ` +
+    `${best.info.cached ? "cached" : "will download"}, ` +
+    `rank ${scored.indexOf(best) + 1}/${scored.length})`;
+
+  console.log(`[ModelSelector] Selected: ${best.info.alias} – ${reason}`);
+  return { model: best.model, reason };
+}
diff --git a/samples/js/local-cag/src/prompts.js b/samples/js/local-cag/src/prompts.js
new file mode 100644
index 00000000..8f1244b5
--- /dev/null
+++ b/samples/js/local-cag/src/prompts.js
@@ -0,0 +1,44 @@
+// Gas Field Agent – System Prompt (Context-Augmented Generation)
+export const SYSTEM_PROMPT = `You are a local, offline customer services and technical support agent for gas field inspection and maintenance engineers.
+
+Context:
+- You run entirely on-device with no internet connectivity.
+- You are embedded in a field application used during live gas infrastructure inspections and repairs.
+- Your responses must be accurate, concise, safety-first, and aligned with gas engineering standards and field maintenance procedures.
+- You have been provided with the complete domain knowledge base as pre-loaded context. This includes approved gas engineering manuals, inspection procedures, fault codes, safety guidance, and maintenance playbooks.
+
+Primary Objectives:
+1. Assist engineers in diagnosing issues encountered during gas field inspections.
+2. Provide step-by-step repair and maintenance guidance.
+3. Surface relevant safety warnings before any action.
+4. Reference applicable standards, procedures, and documentation from the provided context.
+5. Operate reliably in offline, constrained environments.
+
+Behaviour Rules:
+- Always prioritise safety. If a procedure involves risk, explicitly call it out.
+- Do not hallucinate procedures, measurements, tolerances, or legal requirements.
+- If the answer is not present in the provided domain context, say:
+  "This information is not available in the local knowledge base."
+- Use clear, structured responses suitable for field engineers wearing PPE.
+- Prefer bullet points and numbered steps.
+- Assume noisy, time-critical environments.
+- Keep answers SHORT – engineers are in the field.
+
+Response Format:
+- **Summary** (1–2 lines)
+- **Safety Warnings** (if applicable)
+- **Step-by-step Guidance**
+- **Reference** (document name + section)
+
+You must only use information from the domain context provided in this conversation.`;
+
+// Compact prompt variant for extreme latency / edge devices
+export const SYSTEM_PROMPT_COMPACT = `You are an offline gas field support agent. Safety-first. Concise answers only.
+
+Rules:
+- Prioritise safety warnings before any action.
+- Use bullet points and numbered steps.
+- If info is missing from the provided context, say: "Not in local knowledge base."
+- Never invent procedures, tolerances, or legal requirements.
+
+Format: Summary → Safety → Steps → Reference.`;
diff --git a/samples/js/local-cag/src/server.js b/samples/js/local-cag/src/server.js
new file mode 100644
index 00000000..0a4727cc
--- /dev/null
+++ b/samples/js/local-cag/src/server.js
@@ -0,0 +1,186 @@
+/**
+ * Express server – Gas Field CAG Application.
+ * Serves the web UI and provides the /api/chat endpoint.
+ * Fully offline, connects to Foundry Local on dynamic port.
+ *
+ * Uses Context-Aware Generation (CAG): all domain knowledge is
+ * pre-loaded at startup and injected into the prompt — no retrieval,
+ * no vector search, no embeddings.
+ */
+import express from "express";
+import path from "path";
+import { config } from "./config.js";
+import { ChatEngine } from "./chatEngine.js";
+
+const app = express();
+
+// ── Security headers ──
+app.use((_req, res, next) => {
+  res.setHeader("X-Content-Type-Options", "nosniff");
+  res.setHeader("X-Frame-Options", "DENY");
+  res.setHeader("Referrer-Policy", "no-referrer");
+  res.setHeader("Permissions-Policy", "camera=(), microphone=(), geolocation=()");
+  res.setHeader(
+    "Content-Security-Policy",
+    "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:;"
+  );
+  next();
+});
+
+app.use(express.json({ limit: "1mb" }));
+app.use(express.static(config.publicDir));
+
+// ── Chat engine instance ──
+const engine = new ChatEngine();
+
+// ── Initialisation state (broadcast to connected SSE clients) ──
+let initState = { stage: "starting", message: "Starting up..." };
+const statusClients = new Set();
+
+function broadcastStatus(state) {
+  initState = state;
+  const payload = `data: ${JSON.stringify(state)}\n\n`;
+  for (const client of statusClients) {
+    client.write(payload);
+  }
+}
+
+// ── API: Server-Sent Events for initialisation status ──
+app.get("/api/status", (_req, res) => {
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+  // Send current state immediately
+  res.write(`data: ${JSON.stringify(initState)}\n\n`);
+  statusClients.add(res);
+  _req.on("close", () => statusClients.delete(res));
+});
+
+// ── Guard: reject chat requests while model is loading ──
+function requireReady(_req, res, next) {
+  if (initState.stage !== "ready") {
+    return res.status(503).json({
+      error: "Model is still loading. Please wait.",
+      status: initState,
+    });
+  }
+  next();
+}
+
+// ── API: Chat (non-streaming) ──
+app.post("/api/chat", requireReady, async (req, res) => {
+  try {
+    const { message, history, compact } = req.body;
+    if (!message || typeof message !== "string") {
+      return res.status(400).json({ error: "message is required" });
+    }
+
+    if (compact !== undefined) engine.setCompactMode(!!compact);
+
+    const result = await engine.query(
+      message,
+      Array.isArray(history) ? history : []
+    );
+    res.json(result);
+  } catch (err) {
+    console.error("[API] Error:", err.message);
+    res.status(500).json({ error: "Internal server error" });
+  }
+});
+
+// ── API: Chat (streaming via SSE) ──
+app.post("/api/chat/stream", requireReady, async (req, res) => {
+  try {
+    const { message, history, compact } = req.body;
+    if (!message || typeof message !== "string") {
+      return res.status(400).json({ error: "message is required" });
+    }
+
+    if (compact !== undefined) engine.setCompactMode(!!compact);
+
+    res.setHeader("Content-Type", "text/event-stream");
+    res.setHeader("Cache-Control", "no-cache");
+    res.setHeader("Connection", "keep-alive");
+
+    const stream = engine.queryStream(
+      message,
+      Array.isArray(history) ? history : []
+    );
+
+    for await (const chunk of stream) {
+      res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+    }
+
+    res.write("data: [DONE]\n\n");
+    res.end();
+  } catch (err) {
+    console.error("[API] Stream error:", err.message);
+    res.write(`data: ${JSON.stringify({ type: "error", data: "Internal server error" })}\n\n`);
+    res.end();
+  }
+});
+
+// ── API: List pre-loaded context documents ──
+app.get("/api/context", (_req, res) => {
+  try {
+    const docs = engine.getDocuments();
+    res.json({ docs, count: docs.length });
+  } catch (err) {
+    console.error("[API] Context list error:", err.message);
+    res.status(500).json({ error: "Failed to list context documents" });
+  }
+});
+
+// ── API: Health check ──
+app.get("/api/health", (_req, res) => {
+  res.json({
+    status: "ok",
+    model: engine.modelAlias,
+    modelSelection: engine.selectionReason,
+    architecture: "CAG",
+  });
+});
+
+// ── Fallback: serve index.html for SPA ──
+app.get("*", (_req, res) => {
+  res.sendFile(path.join(config.publicDir, "index.html"));
+});
+
+// ── Start server FIRST so the frontend can connect for status updates ──
+async function start() {
+  console.log("=== Gas Field CAG – Local Support Agent ===\n");
+
+  const server = await new Promise((resolve, reject) => {
+    const candidate = app.listen(config.port, config.host, () => {
+      console.log(`[Server] Running at http://${config.host}:${config.port}`);
+      console.log("[Server] Fully offline – no outbound connections.");
+      console.log("[Server] Architecture: Context-Aware Generation (CAG)");
+      console.log("[Server] Initialising engine – open the browser to see progress...\n");
+      resolve(candidate);
+    });
+
+    candidate.once("error", (err) => {
+      if (err.code === "EADDRINUSE") {
+        console.error(`[Server] Port ${config.port} is already in use.`);
+        console.error("[Server] Stop the other process or set a different PORT.");
+      } else {
+        console.error("[Server] Failed to start:", err.message);
+      }
+      reject(err);
+    });
+  });
+
+  try {
+    // Initialise engine AFTER the server is confirmed listening, broadcasting progress
+    await engine.init(broadcastStatus);
+    console.log("\n[Server] Engine ready – accepting requests.\n");
+  } catch (err) {
+    server.close();
+    throw err;
+  }
+}
+
+start().catch((err) => {
+  console.error("Failed to start:", err);
+  process.exit(1);
+});
diff --git a/samples/js/local-rag/README.md b/samples/js/local-rag/README.md
new file mode 100644
index 00000000..0b71e307
--- /dev/null
+++ b/samples/js/local-rag/README.md
@@ -0,0 +1,143 @@
+# Local RAG – Retrieval-Augmented Generation with Foundry Local
+
+A fully offline **Retrieval-Augmented Generation (RAG)** sample application that runs an AI support agent entirely on-device using [Foundry Local](https://foundrylocal.ai).
+
+## What is RAG?
+
+RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with TF-IDF vectors, and retrieves only the most relevant chunks** at query time — no cloud APIs, no embedding models, no external vector databases. This makes it ideal for:
+
+- **Large knowledge bases** — scales beyond what fits in a single prompt
+- **Offline / air-gapped** environments (e.g., field operations)
+- **Dynamic content** — upload new documents at runtime via the web UI
+- **Precise answers** — retrieval focuses the model on the most relevant content
+
+## Architecture
+
+```
+┌─────────────┐    ┌──────────────────┐    ┌────────────────────┐
+│  Browser UI  │───▶│  Express Server   │───▶│  Foundry Local SDK │
+│  (index.html)│◀───│  (server.js)      │◀───│  (in-process)      │
+└─────────────┘    └──────────────────┘    └────────────────────┘
+                          │                          │
+                   ┌──────┴───────┐           ┌──────┴───────┐
+                   │  ChatEngine   │           │  Model (SLM)  │
+                   │  + VectorStore│           │  phi-3.5-mini │
+                   └──────┬───────┘           └──────────────┘
+                          │
+                   ┌──────┴───────┐
+                   │   SQLite DB   │
+                   │  (TF-IDF idx) │
+                   └──────────────┘
+```
+
+1. **Ingest**: Documents in `docs/` are chunked (200 tokens, 25-token overlap) and stored in SQLite with TF-IDF vectors and an inverted index.
+2. **Query**: Each user question is vectorised using TF-IDF, then cosine similarity finds the top-K most relevant chunks.
+3. **Prompt**: Retrieved chunks are injected into the system prompt with source citations.
+4. **Inference**: Foundry Local runs the model in-process — no external HTTP server needed.
+
+## Prerequisites
+
+- **Node.js 20+**
+- **Foundry Local** installed — see [foundrylocal.ai](https://foundrylocal.ai)
+
+## Quick Start
+
+```bash
+# Install dependencies
+npm install
+
+# Ingest sample documents into the vector store
+npm run ingest
+
+# Start the server
+npm start
+```
+
+Open [http://localhost:3000](http://localhost:3000) in your browser. The UI shows real-time progress as the model loads.
+
+## Configuration
+
+Set these environment variables (all optional):
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `FOUNDRY_MODEL` | `phi-3.5-mini` | Model alias to use |
+| `PORT` | `3000` | Server port |
+| `HOST` | `127.0.0.1` | Server bind address |
+
+## Adding Documents
+
+### Option 1: File System
+
+Place markdown files in the `docs/` folder with YAML front-matter, then re-run `npm run ingest`:
+
+```markdown
+---
+title: Your Document Title
+category: Safety
+id: unique-doc-id
+---
+
+# Your Document Title
+
+Content goes here...
+```
+
+### Option 2: Web UI Upload
+
+Click the **📄** button in the chat interface to upload `.md` or `.txt` files at runtime. Documents are chunked and indexed immediately — no restart required.
+
+## Project Structure
+
+```
+local-rag/
+├── package.json
+├── README.md
+├── docs/                    # Domain knowledge (markdown with front-matter)
+│   ├── gas-leak-detection.md
+│   ├── emergency-shutdown.md
+│   ├── pressure-testing.md
+│   ├── ppe-requirements.md
+│   └── valve-inspection.md
+├── public/
+│   └── index.html           # Web UI with upload, chat, source citations
+├── data/                    # Created at ingest time
+│   └── rag.db               # SQLite vector store
+└── src/
+    ├── server.js             # Express server with SSE status + chat + upload
+    ├── chatEngine.js         # RAG engine: SDK init, retrieval, inference
+    ├── config.js             # Configuration (model, chunking, paths)
+    ├── chunker.js            # Document parsing, chunking, TF-IDF math
+    ├── vectorStore.js        # SQLite-backed vector store with inverted index
+    ├── ingest.js             # Batch document ingestion script
+    └── prompts.js            # System prompts (full + compact/edge mode)
+```
+
+## Key Features
+
+- **Cache-aware** — skips model download if already in the Foundry cache
+- **TF-IDF vector search** — no embedding model needed; lightweight and fast
+- **SQLite storage** — single-file database, no external services
+- **Runtime document upload** — add documents via the web UI without restarting
+- **Source citations** — each response shows which chunks were used and their relevance scores
+- **SSE progress** — real-time loading status streamed to the browser
+- **Edge mode** — toggle compact prompts for smaller models or constrained devices
+- **No internet required** — fully offline after initial model download
+
+## How It Differs From CAG
+
+| Feature | RAG (this sample) | CAG |
+|---------|-------------------|-----|
+| Document loading | Chunked + indexed | All loaded at startup |
+| Vector search | TF-IDF + cosine similarity | Keyword scoring |
+| Storage | SQLite database | In-memory |
+| Knowledge base size | Any size | Small–medium |
+| Runtime upload | Yes | No |
+| Source citations | Chunk-level with scores | Document-level |
+| Complexity | More complex | Simpler |
+
+## Learn More
+
+- [Foundry Local Documentation](https://foundrylocal.ai)
+- [Foundry Local SDK (npm)](https://www.npmjs.com/package/foundry-local-sdk)
+- [CAG sample](../local-cag/) — for simpler use-cases where all docs fit in one prompt
diff --git a/samples/js/local-rag/docs/emergency-shutdown.md b/samples/js/local-rag/docs/emergency-shutdown.md
new file mode 100644
index 00000000..f2794148
--- /dev/null
+++ b/samples/js/local-rag/docs/emergency-shutdown.md
@@ -0,0 +1,53 @@
+---
+title: Emergency Shutdown Procedures
+category: Safety
+id: emergency-shutdown
+---
+
+# Emergency Shutdown (ESD) Procedures
+
+## When to Activate ESD
+Activate the Emergency Shutdown system when any of the following occur:
+- Confirmed uncontrolled gas release
+- Fire detected on site
+- High-high pressure alarm on any vessel
+- Loss of containment from process equipment
+- Structural failure of a pressure vessel or pipeline
+- Direction from the Site Emergency Coordinator
+
+## ESD Levels
+
+### Level 1 – Unit Shutdown
+- Shuts down a single production unit.
+- Isolates fuel gas and process feeds to the affected unit.
+- Activates local fire suppression (deluge) if configured.
+
+### Level 2 – Plant Shutdown
+- Shuts down all production units on the facility.
+- Closes all incoming and outgoing pipeline isolation valves.
+- Activates emergency ventilation and general alarm.
+
+### Level 3 – Total Facility Evacuation
+- Triggers Level 2 actions plus facility-wide muster.
+- Activates public address and perimeter alarms.
+- Notifies external emergency services automatically.
+
+## Manual Activation
+1. Break the glass cover on the nearest ESD push-button (located at muster points and key access routes).
+2. Push and hold the button for 3 seconds until the indicator light turns red.
+3. The system cannot be reset from the field — reset is performed from the control room only.
+
+## Post-ESD Actions
+1. Confirm all personnel are accounted for at the muster point.
+2. Perform gas surveys before re-entering the affected area.
+3. Complete the ESD Event Report within 4 hours.
+4. Do NOT restart equipment until the Site Emergency Coordinator authorises.
+
+## ESD Push-Button Locations
+| Location | ID | Distance from Well Pad |
+|----------|----|----------------------|
+| Main Gate | ESD-001 | N/A |
+| Separator Area | ESD-002 | 30 m |
+| Compressor Building | ESD-003 | 45 m |
+| Flare Stack | ESD-004 | 100 m |
+| Control Room | ESD-005 | 60 m |
diff --git a/samples/js/local-rag/docs/gas-leak-detection.md b/samples/js/local-rag/docs/gas-leak-detection.md
new file mode 100644
index 00000000..9dc3cc44
--- /dev/null
+++ b/samples/js/local-rag/docs/gas-leak-detection.md
@@ -0,0 +1,46 @@
+---
+title: Gas Leak Detection Procedures
+category: Safety
+id: gas-leak-detection
+---
+
+# Gas Leak Detection Procedures
+
+## Safety Warning
+**Always assume a gas leak is dangerous until proven otherwise.** Evacuate the area if concentration exceeds 20% LEL (Lower Explosive Limit). Do NOT operate electrical equipment in suspected leak zones.
+
+## Detection Methods
+
+### Portable Gas Detector
+1. Calibrate detector before each shift using known calibration gas.
+2. Turn on and allow 60-second warm-up period.
+3. Hold sensor 2–5 cm from suspected leak point.
+4. Move slowly along pipe runs, joints, valves, and fittings.
+5. Record readings at each test point on the inspection form.
+
+### Soap Bubble Test
+1. Apply leak-detection fluid to joints and connections.
+2. Observe for 30 seconds — bubbles indicate a leak.
+3. Mark leak location with approved marker tape.
+4. Do NOT use soap solution near high-temperature surfaces.
+
+### Ultrasonic Leak Detection
+- Suitable for pressurised systems above 50 psi.
+- Point the sensor at suspected areas and listen for high-frequency noise.
+- Effective range: up to 15 metres in quiet environments.
+
+## Response Procedure
+1. **Evacuate** — Move upwind, minimum 50 metres from the leak source.
+2. **Notify** — Contact the control room immediately: radio channel 5 or emergency phone.
+3. **Isolate** — Close the nearest upstream and downstream isolation valves if safe to do so.
+4. **Ventilate** — Open doors/vents in enclosed spaces. Use intrinsically safe fans only.
+5. **Monitor** — Take continuous gas readings at the perimeter. Do not re-enter until readings are below 10% LEL for 15 consecutive minutes.
+
+## Common Leak Sources
+| Component | Failure Mode | Check Interval |
+|-----------|-------------|----------------|
+| Flange gaskets | Deterioration, bolt relaxation | Monthly |
+| Valve stems | Packing wear | Quarterly |
+| Threaded connections | Vibration loosening | Monthly |
+| Instrument tubing | Fatigue cracking | Bi-annually |
+| Relief valve outlets | Seat erosion | Annually |
diff --git a/samples/js/local-rag/docs/ppe-requirements.md b/samples/js/local-rag/docs/ppe-requirements.md
new file mode 100644
index 00000000..e7d56994
--- /dev/null
+++ b/samples/js/local-rag/docs/ppe-requirements.md
@@ -0,0 +1,50 @@
+---
+title: Personal Protective Equipment Requirements
+category: Safety
+id: ppe-requirements
+---
+
+# Personal Protective Equipment (PPE) Requirements
+
+## Minimum PPE – All Site Personnel
+Every person entering the operational area must wear:
+- Hard hat (EN 397 or ANSI Z89.1)
+- Safety glasses with side shields (EN 166 or ANSI Z87.1)
+- Steel-toe safety boots (EN ISO 20345 S3 minimum)
+- High-visibility vest or coveralls (EN ISO 20471 Class 2+)
+- Flame-resistant (FR) coveralls in hydrocarbon processing areas
+
+## Task-Specific PPE
+
+### Gas Testing / Confined Space Entry
+| Item | Standard | Notes |
+|------|----------|-------|
+| Personal gas monitor (4-gas) | EN 60079-29-1 | Bump-test daily |
+| Full-face respirator | EN 136 | With gas-specific cartridge |
+| Harness + retrieval line | EN 361 / EN 1496 | Mandatory for confined spaces |
+| Intrinsically safe torch | IECEx / ATEX Zone 1 | No standard torches permitted |
+
+### Welding / Hot Work
+| Item | Standard | Notes |
+|------|----------|-------|
+| Welding helmet (auto-darkening) | EN 379 shade 9–13 | Depending on process |
+| Leather welding gloves | EN 12477 Type A | Replace when damaged |
+| Leather apron or sleeve guards | EN ISO 11611 Class 2 | For overhead work |
+| Fire blanket | EN 1869 | Within 3 metres of work area |
+
+### Chemical Handling
+| Item | Standard | Notes |
+|------|----------|-------|
+| Chemical-resistant gloves | EN 374 | Check permeation data for specific chemical |
+| Face shield | EN 166 grade B | In addition to safety glasses |
+| Chemical splash suit | EN 14605 Type 4 | For pressurised spray risk |
+| Emergency eye-wash station | EN 15154-2 | Within 10 seconds walking distance |
+
+## PPE Inspection
+- Inspect all PPE **before each use**.
+- Replace any cracked, torn, or degraded equipment immediately.
+- Maintain a PPE inspection log — audited quarterly.
+- FR clothing must be industrially laundered (not domestic washing machines).
+
+## Disciplinary Policy
+Failure to wear mandatory PPE is a **zero-tolerance violation**. First offence: verbal warning and immediate correction. Second offence: written warning. Third offence: removal from site.
diff --git a/samples/js/local-rag/docs/pressure-testing.md b/samples/js/local-rag/docs/pressure-testing.md
new file mode 100644
index 00000000..712546e3
--- /dev/null
+++ b/samples/js/local-rag/docs/pressure-testing.md
@@ -0,0 +1,52 @@
+---
+title: Pressure Testing Procedures
+category: Maintenance
+id: pressure-testing
+---
+
+# Pressure Testing Procedures
+
+## Overview
+All newly installed or repaired piping and vessels must undergo pressure testing before being placed into service. This procedure covers both hydrostatic and pneumatic methods.
+
+## Test Methods
+
+### Hydrostatic Test (Preferred)
+- Test medium: clean, fresh water (inhibited if ambient temperature < 5°C).
+- Test pressure: **1.5 × design pressure** for piping; **1.3 × MAWP** for vessels.
+- Hold time: minimum **30 minutes** after temperature stabilisation.
+- Acceptance: no visible leaks, no pressure drop greater than 2% over hold period.
+
+### Pneumatic Test (When Hydrostatic Not Feasible)
+- Test medium: dry nitrogen or clean, dry air.
+- Test pressure: **1.1 × design pressure**.
+- Hold time: minimum **10 minutes**.
+- **Hazard warning**: stored energy in compressed gas is significantly higher than liquid — maintain a 15-metre exclusion zone around the test area.
+- Incremental pressurisation: raise in steps of 25%, holding 5 minutes at each step.
+
+## Pre-Test Checklist
+- [ ] All welds inspected and NDE records reviewed.
+- [ ] Test boundaries clearly identified and isolation valves closed.
+- [ ] Pressure relief device set to 110% of test pressure installed on system.
+- [ ] All instrument connections plugged or valved off.
+- [ ] Exclusion zone barricaded with warning signs.
+- [ ] Test gauge calibrated within the last 6 months.
+- [ ] Permit to Work issued and signed.
+
+## Recording Results
+| Parameter | Value |
+|-----------|-------|
+| Test date | ______ |
+| Test pressure (psi/bar) | ______ |
+| Hold time (minutes) | ______ |
+| Ambient temp (°C) | ______ |
+| Pressure at start | ______ |
+| Pressure at end | ______ |
+| Pass / Fail | ______ |
+| Inspector name | ______ |
+
+## Failure Procedure
+1. Depressurise the system slowly.
+2. Identify the leak using approved detection methods.
+3. Repair and re-inspect per applicable code (ASME B31.3 or PD 5500).
+4. Repeat the pressure test from the beginning.
diff --git a/samples/js/local-rag/docs/valve-inspection.md b/samples/js/local-rag/docs/valve-inspection.md
new file mode 100644
index 00000000..7acec568
--- /dev/null
+++ b/samples/js/local-rag/docs/valve-inspection.md
@@ -0,0 +1,59 @@
+---
+title: Valve Inspection and Maintenance Guide
+category: Maintenance
+id: valve-inspection
+---
+
+# Valve Inspection and Maintenance Guide
+
+## Scope
+This guide covers routine inspection and maintenance of isolation valves, control valves, and safety relief valves on gas field production equipment.
+
+## Inspection Schedule
+
+| Valve Type | Frequency | Method |
+|-----------|-----------|--------|
+| Ball valves (isolation) | Quarterly | Visual + operational stroke |
+| Gate valves | Bi-annually | Visual + ultrasonic wall thickness |
+| Globe valves (control) | Quarterly | Visual + calibration check |
+| Safety Relief Valves (SRV) | Annually | Bench test (pop test) |
+| Check valves | Bi-annually | Visual + internal inspection |
+
+## Ball Valve Inspection
+
+### Visual Checks
+1. Check for external corrosion, especially at the stem and body joint.
+2. Verify handle/actuator is intact and moves freely.
+3. Inspect flange bolts for correct torque and corrosion.
+4. Look for signs of leakage at the stem packing and body seals.
+
+### Operational Stroke Test
+1. Record the current valve position.
+2. Slowly cycle the valve from fully open → fully closed → fully open.
+3. Observe for stiffness, irregular torque, or unusual noise.
+4. If the valve fails to stroke smoothly, **tag it out of service** and schedule repair.
+
+## Safety Relief Valve Maintenance
+- **Do NOT adjust** a safety relief valve in the field.
+- All SRVs must be tested on a certified test bench.
+- Record the set pressure, reseat pressure, and blowdown percentage.
+- Replace any SRV that fails to lift within ±3% of the set pressure.
+- After testing, apply a tamper-proof seal and attach a new test tag showing: date, set pressure, and next test due date.
+
+## Valve Packing Replacement
+1. Isolate the valve (double block and bleed).
+2. Confirm zero energy (pressure gauge reads zero, bleed valve open).
+3. Remove the packing gland nuts and old packing rings.
+4. Clean the stem and stuffing box with a lint-free cloth.
+5. Install new packing rings — stagger the ring joints by 90°.
+6. Tighten the gland evenly. Do NOT over-tighten.
+7. Stroke the valve and check for leaks before returning to service.
+
+## Common Failure Modes
+| Symptom | Likely Cause | Action |
+|---------|-------------|--------|
+| Valve won't close fully | Debris in seat, seat damage | Flush or replace seat |
+| Stem leak | Worn packing | Replace packing |
+| Excessive torque to operate | Corrosion, lack of lubrication | Lubricate or overhaul |
+| Relief valve chattering | Set pressure too close to operating pressure | Review set-point vs operating pressure |
+| Check valve backflow | Disc wear, spring failure | Replace internals |
diff --git a/samples/js/local-rag/package.json b/samples/js/local-rag/package.json
new file mode 100644
index 00000000..8c76bae9
--- /dev/null
+++ b/samples/js/local-rag/package.json
@@ -0,0 +1,31 @@
+{
+  "name": "gas-field-local-rag",
+  "version": "2.0.0",
+  "description": "Offline RAG-powered support agent for gas field engineers using Foundry Local",
+  "type": "module",
+  "scripts": {
+    "ingest": "node src/ingest.js",
+    "start": "node src/server.js",
+    "dev": "node --watch src/server.js",
+    "test": "node --test test/*.test.js"
+  },
+  "dependencies": {
+    "better-sqlite3": "^11.7.0",
+    "express": "^4.21.0",
+    "foundry-local-sdk": "^0.5.1"
+  },
+  "license": "MIT",
+  "keywords": [
+    "rag",
+    "offline-ai",
+    "foundry-local",
+    "phi-3",
+    "gas-field",
+    "support-agent",
+    "sqlite",
+    "tfidf"
+  ],
+  "engines": {
+    "node": ">=20.0.0"
+  }
+}
diff --git a/samples/js/local-rag/public/index.html b/samples/js/local-rag/public/index.html
new file mode 100644
index 00000000..74da5004
--- /dev/null
+++ b/samples/js/local-rag/public/index.html
@@ -0,0 +1,698 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Gas Field Support Agent (RAG)</title>
+  <style>
+    *, *::before, *::after { box-sizing: border-box; }
+    :root {
+      --bg: #0d1117;
+      --surface: #161b22;
+      --border: #30363d;
+      --text: #e6edf3;
+      --text-muted: #8b949e;
+      --accent: #58a6ff;
+      --warning: #f0883e;
+      --danger: #f85149;
+      --success: #3fb950;
+      --user-bg: #1c2a3f;
+      --agent-bg: #1a1e24;
+      --radius: 8px;
+    }
+    html { font-size: 18px; }
+    body {
+      margin: 0;
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      display: flex;
+      flex-direction: column;
+      height: 100vh;
+      height: 100dvh;
+    }
+
+    header {
+      background: var(--surface);
+      border-bottom: 1px solid var(--border);
+      padding: 12px 20px;
+      display: flex;
+      align-items: center;
+      justify-content: space-between;
+      flex-shrink: 0;
+    }
+    header h1 {
+      margin: 0; font-size: 1.2rem; font-weight: 700;
+      display: flex; align-items: center; gap: 8px;
+    }
+    header h1 .icon { font-size: 1.5rem; }
+    .status {
+      font-size: 0.8rem; padding: 4px 10px;
+      border-radius: 12px; font-weight: 600;
+    }
+    .status.online { background: #1a3a2a; color: var(--success); }
+    .status.offline { background: #3a1a1a; color: var(--danger); }
+    .status.loading { background: #3a2a1a; color: var(--warning); }
+
+    .controls { display: flex; align-items: center; gap: 12px; }
+    .toggle-label {
+      font-size: 0.75rem; color: var(--text-muted);
+      display: flex; align-items: center; gap: 6px; cursor: pointer;
+    }
+    .toggle-label input[type="checkbox"] { width: 18px; height: 18px; accent-color: var(--accent); }
+
+    #chat {
+      flex: 1; overflow-y: auto; padding: 16px;
+      display: flex; flex-direction: column; gap: 16px; scroll-behavior: smooth;
+    }
+
+    .message {
+      max-width: 90%; padding: 14px 18px;
+      border-radius: var(--radius); line-height: 1.6;
+      word-wrap: break-word; animation: fadeIn 0.2s ease-out;
+    }
+    @keyframes fadeIn {
+      from { opacity: 0; transform: translateY(6px); }
+      to { opacity: 1; transform: translateY(0); }
+    }
+    .message.user { align-self: flex-end; background: var(--user-bg); border: 1px solid #264d73; }
+    .message.agent { align-self: flex-start; background: var(--agent-bg); border: 1px solid var(--border); }
+    .message.agent .safety-warning {
+      background: #3a2a1a; border-left: 4px solid var(--warning);
+      padding: 8px 12px; margin: 8px 0;
+      border-radius: 0 var(--radius) var(--radius) 0; font-weight: 600;
+    }
+
+    .sources {
+      margin-top: 10px; padding-top: 10px;
+      border-top: 1px solid var(--border);
+      font-size: 0.78rem; color: var(--text-muted);
+    }
+    .sources summary { cursor: pointer; font-weight: 600; color: var(--accent); }
+    .source-item { padding: 4px 0; display: flex; justify-content: space-between; }
+    .source-score {
+      background: #1a3a2a; color: var(--success);
+      padding: 1px 6px; border-radius: 4px; font-size: 0.7rem;
+    }
+
+    .typing { display: flex; gap: 4px; padding: 14px 18px; }
+    .typing span {
+      width: 8px; height: 8px; background: var(--text-muted);
+      border-radius: 50%; animation: bounce 1.4s infinite ease-in-out;
+    }
+    .typing span:nth-child(2) { animation-delay: 0.16s; }
+    .typing span:nth-child(3) { animation-delay: 0.32s; }
+    @keyframes bounce {
+      0%, 80%, 100% { transform: scale(0); }
+      40% { transform: scale(1); }
+    }
+
+    #input-area {
+      flex-shrink: 0; padding: 12px 16px;
+      background: var(--surface); border-top: 1px solid var(--border);
+      display: flex; gap: 10px;
+    }
+    #input-area textarea {
+      flex: 1; background: var(--bg);
+      border: 1px solid var(--border); border-radius: var(--radius);
+      color: var(--text); padding: 12px 14px;
+      font-size: 1rem; font-family: inherit;
+      resize: none; min-height: 48px; max-height: 120px;
+    }
+    #input-area textarea:focus { outline: none; border-color: var(--accent); }
+    #input-area textarea::placeholder { color: var(--text-muted); }
+    #send-btn {
+      background: var(--accent); color: #000; border: none;
+      border-radius: var(--radius); padding: 12px 20px;
+      font-size: 1rem; font-weight: 700; cursor: pointer;
+      min-width: 80px; min-height: 48px; transition: opacity 0.15s;
+    }
+    #send-btn:disabled { opacity: 0.4; cursor: not-allowed; }
+    #send-btn:hover:not(:disabled) { opacity: 0.85; }
+
+    #quick-actions {
+      padding: 8px 16px; display: flex; gap: 8px;
+      flex-wrap: nowrap; overflow-x: auto; background: var(--surface);
+    }
+    #quick-actions::-webkit-scrollbar { display: none; }
+    .quick-btn {
+      background: var(--bg); border: 1px solid var(--border);
+      color: var(--text); padding: 8px 14px; border-radius: 20px;
+      font-size: 0.82rem; cursor: pointer; white-space: nowrap;
+      flex-shrink: 0; min-height: 44px; transition: border-color 0.15s;
+    }
+    .quick-btn:hover { border-color: var(--accent); color: var(--accent); }
+
+    .message.agent h1, .message.agent h2, .message.agent h3 {
+      margin: 10px 0 6px; font-size: 1rem; color: var(--accent);
+    }
+    .message.agent ul, .message.agent ol { padding-left: 20px; margin: 6px 0; }
+    .message.agent li { margin: 4px 0; }
+    .message.agent code {
+      background: #0d1117; padding: 2px 6px; border-radius: 3px; font-size: 0.9em;
+    }
+    .message.agent strong { color: var(--warning); }
+
+    #upload-btn {
+      background: var(--surface); color: var(--text);
+      border: 1px solid var(--border); border-radius: var(--radius);
+      padding: 12px 14px; font-size: 1.1rem; cursor: pointer;
+      min-height: 48px; transition: border-color 0.15s;
+      display: flex; align-items: center; justify-content: center;
+    }
+    #upload-btn:hover { border-color: var(--accent); color: var(--accent); }
+
+    .modal-overlay {
+      display: none; position: fixed; inset: 0;
+      background: rgba(0,0,0,0.7); z-index: 100;
+      align-items: center; justify-content: center;
+    }
+    .modal-overlay.open { display: flex; }
+    .modal {
+      background: var(--surface); border: 1px solid var(--border);
+      border-radius: 12px; padding: 24px;
+      width: 90%; max-width: 520px; max-height: 85vh; overflow-y: auto;
+    }
+    .modal h2 { margin: 0 0 16px; font-size: 1.1rem; display: flex; align-items: center; gap: 8px; }
+    .modal-close {
+      float: right; background: none; border: none;
+      color: var(--text-muted); font-size: 1.4rem; cursor: pointer;
+    }
+    .modal-close:hover { color: var(--text); }
+    .safety-warning { color: var(--warning); }
+    .init-hint { color: var(--text-muted); }
+    .upload-description { color: var(--text-muted); font-size: 0.85rem; margin: 0 0 16px; }
+    .drop-zone {
+      border: 2px dashed var(--border); border-radius: var(--radius);
+      padding: 32px 16px; text-align: center; color: var(--text-muted);
+      cursor: pointer; transition: border-color 0.2s, background 0.2s; margin-bottom: 16px;
+    }
+    .drop-zone.dragover { border-color: var(--accent); background: rgba(88,166,255,0.06); }
+    .drop-zone .drop-icon { font-size: 2rem; display: block; margin-bottom: 8px; }
+    .drop-zone input[type="file"] { display: none; }
+    .upload-status {
+      padding: 10px 14px; border-radius: var(--radius);
+      margin-top: 12px; font-size: 0.9rem; display: none;
+    }
+    .upload-status.success { display: block; background: #1a3a2a; color: var(--success); }
+    .upload-status.error { display: block; background: #3a1a1a; color: var(--danger); }
+    .upload-status.loading { display: block; background: #3a2a1a; color: var(--warning); }
+    .doc-list { margin-top: 16px; border-top: 1px solid var(--border); padding-top: 12px; }
+    .doc-list summary { cursor: pointer; font-weight: 600; color: var(--accent); font-size: 0.85rem; }
+    .doc-item {
+      display: flex; justify-content: space-between;
+      padding: 4px 0; font-size: 0.82rem; color: var(--text-muted);
+    }
+    .doc-item .doc-chunks {
+      background: #1a3a2a; color: var(--success);
+      padding: 1px 6px; border-radius: 4px; font-size: 0.72rem;
+    }
+
+    @media (max-width: 600px) {
+      html { font-size: 16px; }
+      .message { max-width: 98%; padding: 10px 14px; }
+      #quick-actions { padding: 6px 10px; gap: 6px; flex-wrap: wrap; overflow-x: visible; }
+      .quick-btn { flex-shrink: 1; white-space: normal; padding: 8px 12px; font-size: 0.78rem; }
+      #input-area { padding: 8px 10px; gap: 8px; }
+      .modal { width: 95%; padding: 16px; }
+    }
+  </style>
+</head>
+<body>
+
+<header>
+  <h1>
+    <span class="icon">&#128295;</span>
+    <span class="title-text">Gas Field Support Agent (RAG)</span>
+  </h1>
+  <div class="controls">
+    <label class="toggle-label">
+      <input type="checkbox" id="compact-mode">
+      Edge Mode
+    </label>
+    <span class="status loading" id="status">Connecting...</span>
+  </div>
+</header>
+
+<div id="quick-actions">
+  <button class="quick-btn" data-q="How do I detect a gas leak?">Gas Leak Detection</button>
+  <button class="quick-btn" data-q="Low pressure downstream of regulator">Low Pressure Fault</button>
+  <button class="quick-btn" data-q="Emergency shutdown procedure">Emergency Shutdown</button>
+  <button class="quick-btn" data-q="Pre-inspection checklist">Pre-Inspection</button>
+  <button class="quick-btn" data-q="PPE requirements for gas field work">PPE Requirements</button>
+  <button class="quick-btn" data-q="No gas flow at site">No Gas Flow</button>
+  <button class="quick-btn" data-q="Pressure testing procedure">Pressure Test</button>
+  <button class="quick-btn" data-q="Confined space entry procedure">Confined Space</button>
+</div>
+
+<div id="chat">
+  <div class="message agent">
+    <strong>Initializing...</strong><br>
+    Starting the AI model. This may take a moment on first run.<br><br>
+    <em class="init-hint">The chat will be available once the model is loaded.</em>
+  </div>
+</div>
+
+<div id="input-area">
+  <button id="upload-btn" title="Upload document to knowledge base">&#128196;</button>
+  <textarea
+    id="user-input"
+    placeholder="Describe the issue or ask a question..."
+    rows="1"
+    autofocus
+  ></textarea>
+  <button id="send-btn">Send</button>
+</div>
+
+<!-- Upload Modal -->
+<div class="modal-overlay" id="upload-modal">
+  <div class="modal">
+    <button class="modal-close" id="upload-close">&times;</button>
+    <h2>Upload Document</h2>
+    <p class="upload-description">
+      Add a <strong>.md</strong> or <strong>.txt</strong> file to the local knowledge base.
+      It will be chunked and indexed immediately.
+    </p>
+    <div class="drop-zone" id="drop-zone">
+      <span class="drop-icon">&#128193;</span>
+      <span>Drop file here or <u>click to browse</u></span>
+      <input type="file" id="file-input" accept=".md,.txt">
+    </div>
+    <div class="upload-status" id="upload-status"></div>
+    <div class="doc-list" id="doc-list">
+      <details>
+        <summary>Indexed Documents</summary>
+        <div id="doc-list-items">Loading...</div>
+      </details>
+    </div>
+  </div>
+</div>
+
+<script>
+(function() {
+  const chat = document.getElementById('chat');
+  const input = document.getElementById('user-input');
+  const sendBtn = document.getElementById('send-btn');
+  const statusEl = document.getElementById('status');
+  const compactToggle = document.getElementById('compact-mode');
+  const quickBtns = document.querySelectorAll('.quick-btn');
+
+  let sending = false;
+  let conversationHistory = [];
+
+  // ── Upload modal ──
+  const uploadBtn = document.getElementById('upload-btn');
+  const uploadModal = document.getElementById('upload-modal');
+  const uploadClose = document.getElementById('upload-close');
+  const dropZone = document.getElementById('drop-zone');
+  const fileInput = document.getElementById('file-input');
+  const uploadStatus = document.getElementById('upload-status');
+  const docListItems = document.getElementById('doc-list-items');
+
+  uploadBtn.addEventListener('click', () => {
+    uploadModal.classList.add('open');
+    loadDocList();
+  });
+  uploadClose.addEventListener('click', () => uploadModal.classList.remove('open'));
+  uploadModal.addEventListener('click', (e) => {
+    if (e.target === uploadModal) uploadModal.classList.remove('open');
+  });
+
+  dropZone.addEventListener('click', () => fileInput.click());
+  dropZone.addEventListener('dragover', (e) => { e.preventDefault(); dropZone.classList.add('dragover'); });
+  dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
+  dropZone.addEventListener('drop', (e) => {
+    e.preventDefault();
+    dropZone.classList.remove('dragover');
+    if (e.dataTransfer.files.length > 0) handleFileUpload(e.dataTransfer.files[0]);
+  });
+  fileInput.addEventListener('change', () => {
+    if (fileInput.files.length > 0) handleFileUpload(fileInput.files[0]);
+  });
+
+  async function handleFileUpload(file) {
+    const name = file.name;
+    if (!name.endsWith('.md') && !name.endsWith('.txt')) {
+      showUploadStatus('error', 'Only .md and .txt files are accepted.');
+      return;
+    }
+    if (file.size > 2 * 1024 * 1024) {
+      showUploadStatus('error', 'File too large (max 2 MB).');
+      return;
+    }
+
+    showUploadStatus('loading', 'Uploading and indexing...');
+
+    try {
+      const text = await file.text();
+      const res = await fetch('/api/upload', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/octet-stream', 'X-Filename': name },
+        body: new TextEncoder().encode(text)
+      });
+      const data = await res.json();
+      if (!res.ok) {
+        showUploadStatus('error', data.error || 'Upload failed.');
+        return;
+      }
+      showUploadStatus('success',
+        `${data.filename} indexed: ${data.chunks} chunk(s). Total: ${data.totalChunks} chunks.`);
+      loadDocList();
+      addMessage('agent', `**Document uploaded:** ${data.title} [${data.category}]\n${data.chunks} chunk(s) added to local knowledge base.`);
+    } catch (err) {
+      showUploadStatus('error', 'Upload failed. Is the server running?');
+    }
+    fileInput.value = '';
+  }
+
+  function showUploadStatus(type, msg) {
+    uploadStatus.className = 'upload-status ' + type;
+    uploadStatus.textContent = msg;
+  }
+
+  async function loadDocList() {
+    try {
+      const res = await fetch('/api/docs');
+      const data = await res.json();
+      if (data.docs && data.docs.length > 0) {
+        docListItems.innerHTML = '';
+        for (const d of data.docs) {
+          const item = document.createElement('div');
+          item.className = 'doc-item';
+          const titleSpan = document.createElement('span');
+          titleSpan.textContent = `${d.title} [${d.category}]`;
+          const chunkSpan = document.createElement('span');
+          chunkSpan.className = 'doc-chunks';
+          chunkSpan.textContent = `${d.chunks} chunks`;
+          item.appendChild(titleSpan);
+          item.appendChild(chunkSpan);
+          docListItems.appendChild(item);
+        }
+      } else {
+        docListItems.textContent = 'No documents indexed.';
+      }
+    } catch {
+      docListItems.textContent = 'Could not load document list.';
+    }
+  }
+
+  // ── Init status: connect to SSE stream for model download/load progress ──
+  let modelReady = false;
+
+  function connectStatus() {
+    try {
+      const evtSource = new EventSource('/api/status');
+      evtSource.onmessage = (e) => {
+        try {
+          const status = JSON.parse(e.data);
+          updateInitStatus(status);
+          if (status.phase === 'ready') {
+            modelReady = true;
+            evtSource.close();
+            sendBtn.disabled = false;
+          }
+        } catch { /* ignore parse errors */ }
+      };
+      evtSource.onerror = () => {
+        evtSource.close();
+        if (!modelReady) {
+          statusEl.textContent = 'Connecting...';
+          statusEl.className = 'status loading';
+          setTimeout(connectStatus, 2000);
+        }
+      };
+    } catch {
+      statusEl.textContent = 'Disconnected';
+      statusEl.className = 'status offline';
+    }
+  }
+
+  function updateInitStatus(status) {
+    switch (status.phase) {
+      case 'download': {
+        const pct = status.progress !== undefined ? Math.round(status.progress * 100) : 0;
+        statusEl.textContent = `Downloading ${pct}%`;
+        statusEl.className = 'status loading';
+        const welcomeMsg = chat.querySelector('.message.agent');
+        if (welcomeMsg && !welcomeMsg.dataset.replaced) {
+          welcomeMsg.innerHTML = `<strong>Downloading Model...</strong><br>` +
+            `The AI model is being downloaded for the first time. This only happens once.<br><br>` +
+            `<div style="background:var(--bg);border-radius:var(--radius);padding:8px 12px;margin:8px 0;">` +
+            `<div style="display:flex;justify-content:space-between;margin-bottom:4px;font-size:0.85rem;">` +
+            `<span>Progress</span><span id="dl-pct">${pct}%</span></div>` +
+            `<div style="background:var(--border);border-radius:4px;height:8px;overflow:hidden;">` +
+            `<div id="dl-bar" style="background:var(--accent);height:100%;width:${pct}%;transition:width 0.3s;"></div>` +
+            `</div></div>` +
+            `<em style="color:var(--text-muted);font-size:0.85rem;">The application will be ready once the download completes.</em>`;
+          welcomeMsg.dataset.replaced = 'download';
+        } else if (welcomeMsg && welcomeMsg.dataset.replaced === 'download') {
+          const bar = document.getElementById('dl-bar');
+          const pctEl = document.getElementById('dl-pct');
+          if (bar) bar.style.width = pct + '%';
+          if (pctEl) pctEl.textContent = pct + '%';
+        }
+        break;
+      }
+      case 'loading':
+        statusEl.textContent = 'Loading Model...';
+        statusEl.className = 'status loading';
+        break;
+      case 'catalog':
+      case 'variant':
+      case 'cached':
+      case 'init':
+        statusEl.textContent = status.message;
+        statusEl.className = 'status loading';
+        break;
+      case 'ready':
+        statusEl.textContent = 'Offline Ready';
+        statusEl.className = 'status online';
+        const welcomeMsgReady = chat.querySelector('.message.agent');
+        if (welcomeMsgReady) {
+          welcomeMsgReady.innerHTML = `<strong>Gas Field Support Agent – Ready</strong><br>` +
+            `Running locally on this device using RAG. No internet connection required.<br><br>` +
+            `Ask me about:<ul>` +
+            `<li>Gas leak detection &amp; safety procedures</li>` +
+            `<li>Fault diagnosis &amp; decision trees</li>` +
+            `<li>Equipment maintenance &amp; repair steps</li>` +
+            `<li>Pressure testing &amp; valve inspection</li>` +
+            `<li>PPE requirements &amp; compliance</li></ul>` +
+            `<em style="color:var(--warning);">Always follow your site-specific safety procedures.</em>`;
+          delete welcomeMsgReady.dataset.replaced;
+        }
+        break;
+      case 'error':
+        statusEl.textContent = 'Error';
+        statusEl.className = 'status offline';
+        break;
+    }
+  }
+
+  sendBtn.disabled = true;
+  connectStatus();
+
+  input.addEventListener('input', () => {
+    input.style.height = 'auto';
+    input.style.height = Math.min(input.scrollHeight, 120) + 'px';
+  });
+
+  input.addEventListener('keydown', (e) => {
+    if (e.key === 'Enter' && !e.shiftKey) {
+      e.preventDefault();
+      sendMessage();
+    }
+  });
+
+  sendBtn.addEventListener('click', sendMessage);
+
+  quickBtns.forEach(btn => {
+    btn.addEventListener('click', () => {
+      input.value = btn.dataset.q;
+      sendMessage();
+    });
+  });
+
+  function escapeHtml(str) {
+    const div = document.createElement('div');
+    div.appendChild(document.createTextNode(str));
+    return div.innerHTML;
+  }
+
+  function renderMarkdown(text) {
+    let html = escapeHtml(text)
+      .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
+      .replace(/^### (.+)$/gm, '<h3>$1</h3>')
+      .replace(/^## (.+)$/gm, '<h2>$1</h2>')
+      .replace(/^# (.+)$/gm, '<h1>$1</h1>')
+      .replace(/`([^`]+)`/g, '<code>$1</code>')
+      .replace(/\n/g, '<br>');
+
+    html = html.replace(
+      /(Safety Warning[^<]*|DANGER[^<]*|WARNING[^<]*|EVACUATE[^<]*)/gi,
+      '<span class="safety-warning">$1</span>'
+    );
+
+    return html;
+  }
+
+  function addMessage(role, content, sources) {
+    const div = document.createElement('div');
+    div.className = 'message ' + role;
+
+    if (role === 'agent') {
+      div.innerHTML = renderMarkdown(content);
+
+      if (sources && sources.length > 0) {
+        const srcDiv = document.createElement('div');
+        srcDiv.className = 'sources';
+        const details = document.createElement('details');
+        const summary = document.createElement('summary');
+        summary.textContent = `Sources (${sources.length})`;
+        details.appendChild(summary);
+        for (const s of sources) {
+          const item = document.createElement('div');
+          item.className = 'source-item';
+          const titleSpan = document.createElement('span');
+          titleSpan.textContent = `${s.title} [${s.category}]`;
+          const scoreSpan = document.createElement('span');
+          scoreSpan.className = 'source-score';
+          scoreSpan.textContent = `${(s.score * 100).toFixed(0)}%`;
+          item.appendChild(titleSpan);
+          item.appendChild(scoreSpan);
+          details.appendChild(item);
+        }
+        srcDiv.appendChild(details);
+        div.appendChild(srcDiv);
+      }
+    } else {
+      div.textContent = content;
+    }
+
+    chat.appendChild(div);
+    chat.scrollTop = chat.scrollHeight;
+    return div;
+  }
+
+  function addTyping() {
+    const div = document.createElement('div');
+    div.className = 'message agent typing';
+    div.id = 'typing';
+    div.innerHTML = '<span></span><span></span><span></span>';
+    chat.appendChild(div);
+    chat.scrollTop = chat.scrollHeight;
+  }
+
+  function removeTyping() {
+    const el = document.getElementById('typing');
+    if (el) el.remove();
+  }
+
+  async function sendMessage() {
+    const text = input.value.trim();
+    if (!text || sending || !modelReady) return;
+
+    sending = true;
+    sendBtn.disabled = true;
+    input.value = '';
+    input.style.height = 'auto';
+
+    addMessage('user', text);
+    addTyping();
+
+    conversationHistory.push({ role: 'user', content: text });
+
+    try {
+      const res = await fetch('/api/chat/stream', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({
+          message: text,
+          history: conversationHistory.slice(-6),
+          compact: compactToggle.checked
+        })
+      });
+
+      removeTyping();
+
+      if (!res.ok) {
+        addMessage('agent', 'Error communicating with local model. Check that Foundry Local is running.');
+        return;
+      }
+
+      const reader = res.body.getReader();
+      const decoder = new TextDecoder();
+      let agentText = '';
+      let sources = [];
+      let agentDiv = null;
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        const lines = decoder.decode(value, { stream: true }).split('\n');
+        for (const line of lines) {
+          if (!line.startsWith('data: ')) continue;
+          const payload = line.slice(6).trim();
+          if (payload === '[DONE]') continue;
+
+          try {
+            const chunk = JSON.parse(payload);
+            if (chunk.type === 'sources') {
+              sources = chunk.data;
+            } else if (chunk.type === 'text') {
+              agentText += chunk.data;
+              if (!agentDiv) {
+                agentDiv = addMessage('agent', agentText, null);
+              } else {
+                agentDiv.innerHTML = renderMarkdown(agentText);
+                chat.scrollTop = chat.scrollHeight;
+              }
+            } else if (chunk.type === 'error') {
+              addMessage('agent', chunk.data);
+            }
+          } catch { /* skip malformed chunk */ }
+        }
+      }
+
+      // Append sources after streaming completes
+      if (agentDiv && sources.length > 0) {
+        const srcDiv = document.createElement('div');
+        srcDiv.className = 'sources';
+        const details = document.createElement('details');
+        const summary = document.createElement('summary');
+        summary.textContent = `Sources (${sources.length})`;
+        details.appendChild(summary);
+        for (const s of sources) {
+          const item = document.createElement('div');
+          item.className = 'source-item';
+          const titleSpan = document.createElement('span');
+          titleSpan.textContent = `${s.title} [${s.category}]`;
+          const scoreSpan = document.createElement('span');
+          scoreSpan.className = 'source-score';
+          scoreSpan.textContent = `${(s.score * 100).toFixed(0)}%`;
+          item.appendChild(titleSpan);
+          item.appendChild(scoreSpan);
+          details.appendChild(item);
+        }
+        srcDiv.appendChild(details);
+        agentDiv.appendChild(srcDiv);
+      }
+
+      if (agentText) {
+        conversationHistory.push({ role: 'assistant', content: agentText });
+      }
+
+    } catch (err) {
+      removeTyping();
+      addMessage('agent', 'Connection error. Ensure the server is running locally.');
+      console.error(err);
+    } finally {
+      sending = false;
+      sendBtn.disabled = false;
+      input.focus();
+    }
+  }
+})();
+</script>
+</body>
+</html>
diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js
new file mode 100644
index 00000000..2d878839
--- /dev/null
+++ b/samples/js/local-rag/src/chatEngine.js
@@ -0,0 +1,228 @@
+/**
+ * Foundry Local chat engine.
+ * Uses the Foundry Local SDK to discover, load, and run inference
+ * on a local model. Performs RAG retrieval and generates responses.
+ * Selects the hardware-optimised model variant automatically and
+ * reports download/load progress via a status callback.
+ */
+import { FoundryLocalManager } from "foundry-local-sdk";
+import { VectorStore } from "./vectorStore.js";
+import { config } from "./config.js";
+import { SYSTEM_PROMPT, SYSTEM_PROMPT_COMPACT } from "./prompts.js";
+
+export class ChatEngine {
+  constructor() {
+    this.chatClient = null;
+    this.model = null;
+    this.store = null;
+    this.compactMode = false;
+    this.modelAlias = null;
+    /** @type {(status: {phase: string, message: string, progress?: number}) => void} */
+    this._statusCallback = null;
+  }
+
+  /** Register a callback that receives init status updates for the UI. */
+  onStatus(callback) {
+    this._statusCallback = callback;
+  }
+
+  _emitStatus(phase, message, progress) {
+    const status = { phase, message, ...(progress !== undefined && { progress }) };
+    console.log(`[ChatEngine] ${message}`);
+    if (this._statusCallback) this._statusCallback(status);
+  }
+
+  /**
+   * Initialize the engine: create Foundry Local manager, discover and load
+   * the best model variant for this hardware, and open the vector store.
+   */
+  async init() {
+    this._emitStatus("init", "Initializing Foundry Local SDK...");
+
+    // Create the manager (requires appName)
+    const manager = FoundryLocalManager.create({ appName: "gas-field-local-rag" });
+    const catalog = manager.catalog;
+
+    this._emitStatus("catalog", "Discovering available models...");
+    this.model = await catalog.getModel(config.model);
+    this.modelAlias = this.model.alias;
+
+    // The SDK auto-selects the best variant for this hardware (GPU > NPU > CPU)
+    this._emitStatus("variant", `Selected model: ${this.modelAlias}`);
+
+    // Download the model if not already cached, with progress reporting
+    if (!this.model.isCached) {
+      this._emitStatus("download", `Downloading ${this.modelAlias}... This may take a few minutes on first run.`, 0);
+      await this.model.download((progress) => {
+        const pct = Math.round(progress * 100);
+        this._emitStatus("download", `Downloading ${this.modelAlias}... ${pct}%`, progress);
+      });
+      this._emitStatus("download", `Download complete.`, 1);
+    } else {
+      this._emitStatus("cached", `Model ${this.modelAlias} is already cached.`);
+    }
+
+    // Load the model into memory
+    this._emitStatus("loading", `Loading ${this.modelAlias} into memory...`);
+    await this.model.load();
+
+    // Create the native chat client with performance settings pre-configured
+    this.chatClient = this.model.createChatClient();
+    this.chatClient.settings.temperature = 0.1; // Low for deterministic, safety-critical responses
+    this._emitStatus("ready", `Model ready: ${this.modelAlias}`);
+
+    // Open the local vector store
+    this.store = new VectorStore(config.dbPath);
+    const count = this.store.count();
+    this._emitStatus("ready", `Vector store ready: ${count} chunks indexed.`);
+
+    if (count === 0) {
+      console.warn("[ChatEngine] WARNING: No documents ingested. Run 'npm run ingest' first.");
+    }
+  }
+
+  /** Expose the vector store for direct operations (e.g. upload ingestion). */
+  getStore() {
+    return this.store;
+  }
+
+  /**
+   * Set compact mode for extreme latency / edge devices.
+   */
+  setCompactMode(enabled) {
+    this.compactMode = enabled;
+    console.log(`[ChatEngine] Compact mode: ${enabled ? "ON" : "OFF"}`);
+  }
+
+  /**
+   * Retrieve relevant context from the local knowledge base.
+   */
+  retrieve(query) {
+    const topK = this.compactMode ? Math.min(config.topK, 3) : config.topK;
+    return this.store.search(query, topK);
+  }
+
+  /**
+   * Format retrieved chunks into a context block for the prompt.
+   */
+  _buildContext(chunks) {
+    if (chunks.length === 0) {
+      return "No relevant documents found in local knowledge base.";
+    }
+
+    return chunks
+      .map(
+        (c, i) =>
+          `--- Document ${i + 1}: ${c.title} [${c.category}] ---\n${c.content}`
+      )
+      .join("\n\n");
+  }
+
+  /**
+   * Generate a response for a user query (non-streaming).
+   */
+  async query(userMessage, history = []) {
+    // 1. Retrieve relevant chunks
+    const chunks = this.retrieve(userMessage);
+    const context = this._buildContext(chunks);
+
+    // 2. Build messages array
+    const systemPrompt = this.compactMode ? SYSTEM_PROMPT_COMPACT : SYSTEM_PROMPT;
+    const messages = [
+      { role: "system", content: systemPrompt },
+      {
+        role: "system",
+        content: `Retrieved context from local knowledge base:\n\n${context}`,
+      },
+      ...history,
+      { role: "user", content: userMessage },
+    ];
+
+    // 3. Call the local model via the native chat client
+    this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
+    const response = await this.chatClient.completeChat(messages);
+
+    return {
+      text: response.choices[0].message.content,
+      sources: chunks.map((c) => ({
+        title: c.title,
+        category: c.category,
+        docId: c.doc_id,
+        score: Math.round(c.score * 100) / 100,
+      })),
+    };
+  }
+
+  /**
+   * Generate a streaming response for a user query.
+   * Returns an async iterable of text chunks.
+   */
+  async *queryStream(userMessage, history = []) {
+    // 1. Retrieve relevant chunks
+    const chunks = this.retrieve(userMessage);
+    const context = this._buildContext(chunks);
+
+    // 2. Build messages array
+    const systemPrompt = this.compactMode ? SYSTEM_PROMPT_COMPACT : SYSTEM_PROMPT;
+    const messages = [
+      { role: "system", content: systemPrompt },
+      {
+        role: "system",
+        content: `Retrieved context from local knowledge base:\n\n${context}`,
+      },
+      ...history,
+      { role: "user", content: userMessage },
+    ];
+
+    // 3. Stream from the local model via the SDK's callback-based streaming
+    this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
+
+    // Buffer chunks from the callback and yield them as an async iterable
+    const textChunks = [];
+    let resolve;
+    let done = false;
+
+    const streamPromise = this.chatClient.completeStreamingChat(messages, (chunk) => {
+      textChunks.push(chunk);
+      if (resolve) { resolve(); resolve = null; }
+    }).then(() => {
+      done = true;
+      if (resolve) { resolve(); resolve = null; }
+    });
+
+    // Yield sources metadata first
+    yield {
+      type: "sources",
+      data: chunks.map((c) => ({
+        title: c.title,
+        category: c.category,
+        docId: c.doc_id,
+        score: Math.round(c.score * 100) / 100,
+      })),
+    };
+
+    // Yield text chunks from the SDK streaming callback buffer
+    while (!done || textChunks.length > 0) {
+      if (textChunks.length === 0 && !done) {
+        await new Promise((r) => { resolve = r; });
+      }
+      while (textChunks.length > 0) {
+        const chunk = textChunks.shift();
+        const content = chunk.choices?.[0]?.delta?.content;
+        if (content) {
+          yield { type: "text", data: content };
+        }
+      }
+    }
+
+    // Ensure the stream promise resolves cleanly
+    await streamPromise;
+  }
+
+  close() {
+    if (this.model) {
+      this.model.unload().catch(() => {});
+    }
+    if (this.store) this.store.close();
+  }
+}
diff --git a/samples/js/local-rag/src/chunker.js b/samples/js/local-rag/src/chunker.js
new file mode 100644
index 00000000..3e46be36
--- /dev/null
+++ b/samples/js/local-rag/src/chunker.js
@@ -0,0 +1,74 @@
+/**
+ * Document chunking utility.
+ * Splits markdown documents into overlapping chunks suitable for RAG retrieval.
+ */
+
+/**
+ * Parse front-matter (YAML-like) from a markdown document.
+ */
+export function parseFrontMatter(text) {
+  const match = text.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
+  if (!match) return { meta: {}, body: text };
+
+  const meta = {};
+  for (const line of match[1].split("\n")) {
+    const idx = line.indexOf(":");
+    if (idx > 0) {
+      meta[line.slice(0, idx).trim()] = line.slice(idx + 1).trim();
+    }
+  }
+  return { meta, body: match[2] };
+}
+
+/**
+ * Split text into chunks of approximately `maxTokens` tokens
+ * with `overlapTokens` overlap between consecutive chunks.
+ * Uses whitespace-based token approximation (good enough for local RAG).
+ */
+export function chunkText(text, maxTokens = 400, overlapTokens = 50) {
+  const words = text.split(/\s+/).filter(Boolean);
+  if (words.length <= maxTokens) return [text];
+
+  const chunks = [];
+  let start = 0;
+  while (start < words.length) {
+    const end = Math.min(start + maxTokens, words.length);
+    chunks.push(words.slice(start, end).join(" "));
+    if (end >= words.length) break;
+    start = end - overlapTokens;
+  }
+  return chunks;
+}
+
+/**
+ * Build simple term-frequency vector for a chunk of text.
+ * Returns a Map<term, frequency>.
+ */
+export function termFrequency(text) {
+  const tf = new Map();
+  const tokens = text
+    .toLowerCase()
+    .replace(/[^a-z0-9\-']/g, " ")
+    .split(/\s+/)
+    .filter((t) => t.length > 1);
+  for (const t of tokens) {
+    tf.set(t, (tf.get(t) || 0) + 1);
+  }
+  return tf;
+}
+
+/**
+ * Compute cosine similarity between two term-frequency maps.
+ */
+export function cosineSimilarity(a, b) {
+  let dot = 0;
+  let normA = 0;
+  let normB = 0;
+  for (const [term, freq] of a) {
+    normA += freq * freq;
+    if (b.has(term)) dot += freq * b.get(term);
+  }
+  for (const [, freq] of b) normB += freq * freq;
+  if (normA === 0 || normB === 0) return 0;
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
diff --git a/samples/js/local-rag/src/config.js b/samples/js/local-rag/src/config.js
new file mode 100644
index 00000000..12b2937e
--- /dev/null
+++ b/samples/js/local-rag/src/config.js
@@ -0,0 +1,25 @@
+// Application configuration – all paths relative to project root
+import { fileURLToPath } from "url";
+import path from "path";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, "..");
+
+export const config = {
+  // Model
+  model: "phi-3.5-mini",
+
+  // RAG
+  docsDir: path.join(ROOT, "docs"),
+  dbPath: path.join(ROOT, "data", "rag.db"),
+  chunkSize: 200,       // tokens (approx) – kept small for NPU compatibility
+  chunkOverlap: 25,     // tokens overlap between chunks
+  topK: 3,              // number of chunks to retrieve – limited for NPU context window
+
+  // Server
+  port: 3000,
+  host: "127.0.0.1",
+
+  // UI
+  publicDir: path.join(ROOT, "public"),
+};
diff --git a/samples/js/local-rag/src/ingest.js b/samples/js/local-rag/src/ingest.js
new file mode 100644
index 00000000..b607903b
--- /dev/null
+++ b/samples/js/local-rag/src/ingest.js
@@ -0,0 +1,65 @@
+/**
+ * Ingestion script.
+ * Reads all markdown documents from the docs/ folder,
+ * chunks them, and stores in the local SQLite vector store.
+ *
+ * Usage: node src/ingest.js
+ */
+import fs from "fs";
+import path from "path";
+import { config } from "./config.js";
+import { parseFrontMatter, chunkText } from "./chunker.js";
+import { VectorStore } from "./vectorStore.js";
+
+async function ingest() {
+  console.log("=== Gas Field RAG – Document Ingestion ===\n");
+
+  const docsDir = config.docsDir;
+  if (!fs.existsSync(docsDir)) {
+    console.error(`Docs directory not found: ${docsDir}`);
+    process.exit(1);
+  }
+
+  const files = fs
+    .readdirSync(docsDir)
+    .filter((f) => f.endsWith(".md"))
+    .sort();
+
+  if (files.length === 0) {
+    console.error("No markdown files found in docs/");
+    process.exit(1);
+  }
+
+  console.log(`Found ${files.length} documents.\n`);
+
+  const store = new VectorStore(config.dbPath);
+  store.clear(); // Fresh ingestion each time
+
+  let totalChunks = 0;
+
+  for (const file of files) {
+    const raw = fs.readFileSync(path.join(docsDir, file), "utf-8");
+    const { meta, body } = parseFrontMatter(raw);
+    const docId = meta.id || path.basename(file, ".md");
+    const title = meta.title || file;
+    const category = meta.category || "Uncategorised";
+
+    const chunks = chunkText(body, config.chunkSize, config.chunkOverlap);
+
+    for (let i = 0; i < chunks.length; i++) {
+      store.insert(docId, title, category, i, chunks[i]);
+    }
+
+    console.log(`  ✓ ${file} → ${chunks.length} chunk(s)  [${category}]`);
+    totalChunks += chunks.length;
+  }
+
+  console.log(`\nIngestion complete: ${totalChunks} chunks from ${files.length} documents.`);
+  console.log(`Database: ${config.dbPath}`);
+  store.close();
+}
+
+ingest().catch((err) => {
+  console.error("Ingestion failed:", err);
+  process.exit(1);
+});
diff --git a/samples/js/local-rag/src/prompts.js b/samples/js/local-rag/src/prompts.js
new file mode 100644
index 00000000..dceff398
--- /dev/null
+++ b/samples/js/local-rag/src/prompts.js
@@ -0,0 +1,44 @@
+// Gas Field Agent – System Prompt (optimised for edge/low-latency)
+export const SYSTEM_PROMPT = `You are a local, offline customer services and technical support agent for gas field inspection and maintenance engineers.
+
+Context:
+- You run entirely on-device with no internet connectivity.
+- You are embedded in a field application used during live gas infrastructure inspections and repairs.
+- Your responses must be accurate, concise, safety-first, and aligned with gas engineering standards and field maintenance procedures.
+- You use Retrieval-Augmented Generation (RAG) from a local document database containing approved gas engineering manuals, inspection procedures, fault codes, safety guidance, and maintenance playbooks.
+
+Primary Objectives:
+1. Assist engineers in diagnosing issues encountered during gas field inspections.
+2. Provide step-by-step repair and maintenance guidance.
+3. Surface relevant safety warnings before any action.
+4. Reference applicable standards, procedures, and documentation from the local knowledge base.
+5. Operate reliably in offline, constrained environments.
+
+Behaviour Rules:
+- Always prioritise safety. If a procedure involves risk, explicitly call it out.
+- Do not hallucinate procedures, measurements, tolerances, or legal requirements.
+- If the answer is not present in the local RAG data, say:
+  "This information is not available in the local knowledge base."
+- Use clear, structured responses suitable for field engineers wearing PPE.
+- Prefer bullet points and numbered steps.
+- Assume noisy, time-critical environments.
+- Keep answers SHORT – engineers are in the field.
+
+Response Format:
+- **Summary** (1–2 lines)
+- **Safety Warnings** (if applicable)
+- **Step-by-step Guidance**
+- **Reference** (document name + section)
+
+You must only use information retrieved from the local RAG database.`;
+
+// Compact prompt variant for extreme latency / edge devices
+export const SYSTEM_PROMPT_COMPACT = `You are an offline gas field support agent. Safety-first. Concise answers only.
+
+Rules:
+- Prioritise safety warnings before any action.
+- Use bullet points and numbered steps.
+- If info is missing from RAG data, say: "Not in local knowledge base."
+- Never invent procedures, tolerances, or legal requirements.
+
+Format: Summary → Safety → Steps → Reference.`;
diff --git a/samples/js/local-rag/src/server.js b/samples/js/local-rag/src/server.js
new file mode 100644
index 00000000..8f322be6
--- /dev/null
+++ b/samples/js/local-rag/src/server.js
@@ -0,0 +1,230 @@
+/**
+ * Express server – Gas Field RAG Application.
+ * Serves the web UI and provides the /api/chat endpoint.
+ * Fully offline, connects to Foundry Local on dynamic port.
+ */
+import express from "express";
+import path from "path";
+import fs from "fs";
+import { config } from "./config.js";
+import { ChatEngine } from "./chatEngine.js";
+import { parseFrontMatter, chunkText } from "./chunker.js";
+
+const app = express();
+
+// ── Security headers ──
+app.use((_req, res, next) => {
+  res.setHeader("X-Content-Type-Options", "nosniff");
+  res.setHeader("X-Frame-Options", "DENY");
+  res.setHeader("Referrer-Policy", "no-referrer");
+  res.setHeader("Permissions-Policy", "camera=(), microphone=(), geolocation=()");
+  res.setHeader(
+    "Content-Security-Policy",
+    "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:;"
+  );
+  next();
+});
+
+app.use(express.json({ limit: "1mb" }));
+app.use(express.text({ type: "text/markdown", limit: "2mb" }));
+app.use(express.static(config.publicDir));
+
+// ── Chat engine instance ──
+const engine = new ChatEngine();
+
+// ── API: Chat (non-streaming) ──
+app.post("/api/chat", async (req, res) => {
+  try {
+    const { message, history, compact } = req.body;
+    if (!message || typeof message !== "string") {
+      return res.status(400).json({ error: "message is required" });
+    }
+
+    if (compact !== undefined) engine.setCompactMode(!!compact);
+
+    const result = await engine.query(
+      message,
+      Array.isArray(history) ? history : []
+    );
+    res.json(result);
+  } catch (err) {
+    console.error("[API] Error:", err.message);
+    res.status(500).json({ error: "Internal server error" });
+  }
+});
+
+// ── API: Chat (streaming via SSE) ──
+app.post("/api/chat/stream", async (req, res) => {
+  try {
+    const { message, history, compact } = req.body;
+    if (!message || typeof message !== "string") {
+      return res.status(400).json({ error: "message is required" });
+    }
+
+    if (compact !== undefined) engine.setCompactMode(!!compact);
+
+    res.setHeader("Content-Type", "text/event-stream");
+    res.setHeader("Cache-Control", "no-cache");
+    res.setHeader("Connection", "keep-alive");
+
+    const stream = engine.queryStream(
+      message,
+      Array.isArray(history) ? history : []
+    );
+
+    for await (const chunk of stream) {
+      res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+    }
+
+    res.write("data: [DONE]\n\n");
+    res.end();
+  } catch (err) {
+    console.error("[API] Stream error:", err.message);
+    res.write(`data: ${JSON.stringify({ type: "error", data: "Internal server error" })}\n\n`);
+    res.end();
+  }
+});
+
+// ── API: Upload document ──
+app.post("/api/upload", express.raw({ type: "*/*", limit: "2mb" }), async (req, res) => {
+  try {
+    const filename = req.headers["x-filename"];
+    if (!filename || typeof filename !== "string") {
+      return res.status(400).json({ error: "x-filename header is required" });
+    }
+
+    // Sanitise filename: allow only alphanumeric, hyphens, underscores, dots
+    const safeName = path.basename(filename).replace(/[^a-zA-Z0-9._-]/g, "_");
+    if (!safeName.endsWith(".md") && !safeName.endsWith(".txt")) {
+      return res.status(400).json({ error: "Only .md and .txt files are accepted" });
+    }
+
+    const content = req.body.toString("utf-8");
+    if (!content || content.length < 10) {
+      return res.status(400).json({ error: "Document content is too short" });
+    }
+
+    // Save file to docs/ (verify path stays inside docsDir)
+    const filePath = path.resolve(config.docsDir, safeName);
+    if (!filePath.startsWith(path.resolve(config.docsDir))) {
+      return res.status(400).json({ error: "Invalid filename" });
+    }
+    if (!fs.existsSync(config.docsDir)) {
+      fs.mkdirSync(config.docsDir, { recursive: true });
+    }
+    fs.writeFileSync(filePath, content, "utf-8");
+
+    // Chunk and ingest into vector store
+    const { meta, body } = parseFrontMatter(content);
+    const docId = meta.id || path.basename(safeName, path.extname(safeName));
+    const title = meta.title || safeName;
+    const category = meta.category || "Uploaded";
+
+    // Remove previous version if re-uploading
+    const store = engine.getStore();
+    store.removeByDocId(docId);
+
+    const chunks = chunkText(body, config.chunkSize, config.chunkOverlap);
+    for (let i = 0; i < chunks.length; i++) {
+      store.insert(docId, title, category, i, chunks[i]);
+    }
+
+    console.log(`[Upload] ${safeName} → ${chunks.length} chunk(s) ingested`);
+
+    res.json({
+      success: true,
+      filename: safeName,
+      docId,
+      title,
+      category,
+      chunks: chunks.length,
+      totalChunks: store.count(),
+    });
+  } catch (err) {
+    console.error("[Upload] Error:", err.message);
+    res.status(500).json({ error: "Upload failed" });
+  }
+});
+
+// ── API: List documents ──
+app.get("/api/docs", (_req, res) => {
+  try {
+    const docs = engine.getStore().listDocs();
+    res.json({ docs });
+  } catch (err) {
+    console.error("[API] Docs list error:", err.message);
+    res.status(500).json({ error: "Failed to list documents" });
+  }
+});
+
+// ── API: Health check ──
+let engineReady = false;
+let lastStatus = { phase: "init", message: "Starting..." };
+
+app.get("/api/health", (_req, res) => {
+  res.json({ status: engineReady ? "ok" : "loading", model: config.model, ...lastStatus });
+});
+
+// ── API: Init status stream (SSE) — shows download/load progress to the UI ──
+const statusClients = new Set();
+
+app.get("/api/status", (_req, res) => {
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+
+  // Send current status immediately
+  res.write(`data: ${JSON.stringify(lastStatus)}\n\n`);
+
+  if (engineReady) {
+    res.write(`data: ${JSON.stringify({ phase: "ready", message: "Ready" })}\n\n`);
+    res.end();
+    return;
+  }
+
+  statusClients.add(res);
+  _req.on("close", () => statusClients.delete(res));
+});
+
+function broadcastStatus(status) {
+  lastStatus = status;
+  for (const client of statusClients) {
+    client.write(`data: ${JSON.stringify(status)}\n\n`);
+    if (status.phase === "ready") {
+      client.end();
+    }
+  }
+  if (status.phase === "ready") statusClients.clear();
+}
+
+// ── Fallback: serve index.html for SPA ──
+app.get("*", (_req, res) => {
+  res.sendFile(path.join(config.publicDir, "index.html"));
+});
+
+// ── Start server ──
+async function start() {
+  console.log("=== Gas Field RAG – Local Support Agent ===\n");
+
+  // Register status callback to relay progress to connected UI clients
+  engine.onStatus((status) => broadcastStatus(status));
+
+  // Start the HTTP server first so the UI is immediately accessible
+  app.listen(config.port, config.host, () => {
+    console.log(`[Server] UI available at http://${config.host}:${config.port}`);
+    console.log("[Server] Initializing model in background...\n");
+  });
+
+  // Initialize the engine (downloads model if needed, loads it)
+  await engine.init();
+  engineReady = true;
+  broadcastStatus({ phase: "ready", message: "Ready" });
+
+  console.log("[Server] Fully offline – no outbound connections.\n");
+}
+
+start().catch((err) => {
+  console.error("Failed to start:", err);
+  broadcastStatus({ phase: "error", message: err.message || "Failed to start" });
+  process.exit(1);
+});
diff --git a/samples/js/local-rag/src/vectorStore.js b/samples/js/local-rag/src/vectorStore.js
new file mode 100644
index 00000000..4afc27e7
--- /dev/null
+++ b/samples/js/local-rag/src/vectorStore.js
@@ -0,0 +1,145 @@
+/**
+ * Local vector store backed by SQLite.
+ * Stores document chunks and their term-frequency vectors for offline RAG retrieval.
+ *
+ * Performance optimisations:
+ * - Inverted index: maps terms -> chunk IDs for fast candidate filtering
+ * - Row cache: parsed TF maps kept in memory to avoid JSON.parse on every query
+ * - Prepared statements: reused across calls
+ */
+import Database from "better-sqlite3";
+import path from "path";
+import fs from "fs";
+import { termFrequency, cosineSimilarity } from "./chunker.js";
+
+export class VectorStore {
+  constructor(dbPath) {
+    // Ensure data directory exists
+    const dir = path.dirname(dbPath);
+    if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+
+    this.db = new Database(dbPath);
+    this.db.pragma("journal_mode = WAL");
+    this._init();
+
+    // In-memory caches for fast retrieval
+    this._rowCache = null;   // Array of { id, doc_id, title, category, content, tf }
+    this._invertedIndex = null; // Map<term, Set<rowIndex>>
+  }
+
+  _init() {
+    this.db.exec(`
+      CREATE TABLE IF NOT EXISTS chunks (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        doc_id TEXT NOT NULL,
+        title TEXT,
+        category TEXT,
+        chunk_index INTEGER NOT NULL,
+        content TEXT NOT NULL,
+        tf_json TEXT NOT NULL
+      );
+      CREATE INDEX IF NOT EXISTS idx_doc_id ON chunks(doc_id);
+    `);
+
+    // Prepare reusable statements
+    this._stmtInsert = this.db.prepare(
+      "INSERT INTO chunks (doc_id, title, category, chunk_index, content, tf_json) VALUES (?, ?, ?, ?, ?, ?)"
+    );
+    this._stmtAll = this.db.prepare("SELECT * FROM chunks");
+    this._stmtCount = this.db.prepare("SELECT COUNT(*) as cnt FROM chunks");
+    this._stmtListDocs = this.db.prepare(
+      "SELECT doc_id, title, category, COUNT(*) as chunks FROM chunks GROUP BY doc_id ORDER BY title"
+    );
+    this._stmtDeleteDoc = this.db.prepare("DELETE FROM chunks WHERE doc_id = ?");
+  }
+
+  /** Invalidate in-memory caches (called after any mutation). */
+  _invalidateCache() {
+    this._rowCache = null;
+    this._invertedIndex = null;
+  }
+
+  /** Build or return the in-memory row cache and inverted index. */
+  _ensureCache() {
+    if (this._rowCache) return;
+
+    const rows = this._stmtAll.all();
+    this._rowCache = rows.map((row) => {
+      const tf = new Map(JSON.parse(row.tf_json));
+      return { id: row.id, doc_id: row.doc_id, title: row.title, category: row.category, content: row.content, tf };
+    });
+
+    // Build inverted index: term -> set of row indices
+    this._invertedIndex = new Map();
+    for (let i = 0; i < this._rowCache.length; i++) {
+      for (const term of this._rowCache[i].tf.keys()) {
+        if (!this._invertedIndex.has(term)) {
+          this._invertedIndex.set(term, new Set());
+        }
+        this._invertedIndex.get(term).add(i);
+      }
+    }
+  }
+
+  /** Remove all existing chunks (for fresh re-ingestion). */
+  clear() {
+    this.db.exec("DELETE FROM chunks");
+    this._invalidateCache();
+  }
+
+  /** Insert a single chunk. */
+  insert(docId, title, category, chunkIndex, content) {
+    const tf = termFrequency(content);
+    const tfJson = JSON.stringify([...tf]);
+    this._stmtInsert.run(docId, title, category, chunkIndex, content, tfJson);
+    this._invalidateCache();
+  }
+
+  /** Retrieve top-K most relevant chunks for a query. */
+  search(query, topK = 5) {
+    const queryTf = termFrequency(query);
+    this._ensureCache();
+
+    // Use inverted index to find candidate chunks that share at least one term
+    const candidateIndices = new Set();
+    for (const term of queryTf.keys()) {
+      const indices = this._invertedIndex.get(term);
+      if (indices) {
+        for (const idx of indices) candidateIndices.add(idx);
+      }
+    }
+
+    // Score only candidates instead of all rows
+    const scored = [];
+    for (const idx of candidateIndices) {
+      const row = this._rowCache[idx];
+      const score = cosineSimilarity(queryTf, row.tf);
+      if (score > 0) {
+        scored.push({ ...row, score, tf_json: undefined });
+      }
+    }
+
+    scored.sort((a, b) => b.score - a.score);
+    return scored.slice(0, topK);
+  }
+
+  /** Remove all chunks for a specific document. */
+  removeByDocId(docId) {
+    this._stmtDeleteDoc.run(docId);
+    this._invalidateCache();
+  }
+
+  /** Get total chunk count. */
+  count() {
+    return this._stmtCount.get().cnt;
+  }
+
+  /** List distinct documents in the store. */
+  listDocs() {
+    return this._stmtListDocs.all();
+  }
+
+  close() {
+    this.db.close();
+  }
+}
diff --git a/samples/js/native-chat-completions/app.js b/samples/js/native-chat-completions/app.js
index af566ef7..7efac872 100644
--- a/samples/js/native-chat-completions/app.js
+++ b/samples/js/native-chat-completions/app.js
@@ -1,5 +1,15 @@
 import { FoundryLocalManager } from 'foundry-local-sdk';
 
+/** Render a CLI progress bar for model download. */
+function renderProgressBar(label, progress) {
+    const barWidth = 30;
+    const filled = Math.round((progress / 100) * barWidth);
+    const empty = barWidth - filled;
+    const bar = '█'.repeat(filled) + '░'.repeat(empty);
+    process.stdout.write(`\r${label}: [${bar}] ${progress.toFixed(1)}%`);
+    if (progress >= 100) process.stdout.write('\n');
+}
+
 // Initialize the Foundry Local SDK
 console.log('Initializing Foundry Local SDK...');
 
@@ -10,20 +20,24 @@ const manager = FoundryLocalManager.create({
 console.log('✓ SDK initialized successfully');
 
 // Get the model object
-const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above
+const modelAlias = 'qwen2.5-0.5b';
 const model = await manager.catalog.getModel(modelAlias);
 
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
+// Check cache before downloading — skip download if model is already cached
+if (!model.isCached) {
+    console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`);
+    await model.download((progress) => {
+        renderProgressBar('Downloading', progress);
+    });
+    console.log('✓ Model downloaded');
+} else {
+    console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`);
+}
 
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
+// Load the model into memory
+console.log(`Loading model ${modelAlias}...`);
 await model.load();
-console.log('✓ Model loaded');
+console.log('✓ Model loaded and ready');
 
 // Create chat client
 console.log('\nCreating chat client...');
diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js
index f11eacdd..57fc5d01 100644
--- a/samples/js/tool-calling-foundry-local/src/app.js
+++ b/samples/js/tool-calling-foundry-local/src/app.js
@@ -33,7 +33,18 @@ async function runToolCallingExample() {
     }
 
     console.log(`Loading model ${model.id}...`);
-    await model.download();
+    if (!model.isCached) {
+      console.log('Model not in cache. Downloading...');
+      await model.download((progress) => {
+        const barWidth = 30;
+        const filled = Math.round((progress / 100) * barWidth);
+        const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
+        process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`);
+        if (progress >= 100) process.stdout.write('\n');
+      });
+    } else {
+      console.log('✓ Model already cached — skipping download');
+    }
     await model.load();
     console.log('✓ Model loaded');
 
diff --git a/samples/js/web-server-example/app.js b/samples/js/web-server-example/app.js
index 5e97edfc..0443fdc1 100644
--- a/samples/js/web-server-example/app.js
+++ b/samples/js/web-server-example/app.js
@@ -14,20 +14,28 @@ const manager = FoundryLocalManager.create({
 console.log('✓ SDK initialized successfully');
 
 // Get the model object
-const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above
+const modelAlias = 'qwen2.5-0.5b';
 const model = await manager.catalog.getModel(modelAlias);
 
-// Download the model
-console.log(`\nDownloading model ${modelAlias}...`);
-await model.download((progress) => {
-    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-});
-console.log('\n✓ Model downloaded');
+// Check cache before downloading — skip download if model is already cached
+if (!model.isCached) {
+    console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`);
+    await model.download((progress) => {
+        const barWidth = 30;
+        const filled = Math.round((progress / 100) * barWidth);
+        const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled);
+        process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`);
+        if (progress >= 100) process.stdout.write('\n');
+    });
+    console.log('✓ Model downloaded');
+} else {
+    console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`);
+}
 
-// Load the model
-console.log(`\nLoading model ${modelAlias}...`);
+// Load the model into memory
+console.log(`Loading model ${modelAlias}...`);
 await model.load();
-console.log('✓ Model loaded');
+console.log('✓ Model loaded and ready');
 
 // Start the web service
 console.log('\nStarting web service...');
diff --git a/samples/python/agent-framework/.env.example b/samples/python/agent-framework/.env.example
new file mode 100644
index 00000000..2f0bc87c
--- /dev/null
+++ b/samples/python/agent-framework/.env.example
@@ -0,0 +1,8 @@
+# ── Model ──────────────────────────────────────────────────
+MODEL_ALIAS=qwen2.5-7b
+
+# ── Documents folder ──────────────────────────────────────
+DOCS_PATH=./data
+
+# ── Logging ───────────────────────────────────────────────
+LOG_LEVEL=INFO
diff --git a/samples/python/agent-framework/README.md b/samples/python/agent-framework/README.md
new file mode 100644
index 00000000..23368e94
--- /dev/null
+++ b/samples/python/agent-framework/README.md
@@ -0,0 +1,132 @@
+# Agent Framework + Foundry Local
+
+A multi-agent orchestration sample powered by [Microsoft Agent Framework](https://pypi.org/project/agent-framework-core/) and [Foundry Local](https://foundrylocal.ai). All inference runs **on-device** through Foundry Local's OpenAI-compatible endpoint — no cloud API keys required.
+
+## What It Does
+
+Five specialised agents collaborate in configurable pipelines to research a user question:
+
+| Agent       | Role                                            |
+|-------------|-------------------------------------------------|
+| **Planner** | Breaks the question into 2-4 sub-tasks          |
+| **Retriever** | Searches local documents for relevant excerpts |
+| **Critic**  | Reviews output for gaps and contradictions       |
+| **Writer**  | Synthesises a final report with citations        |
+| **ToolAgent** | Runs deterministic tools (word count, keywords)|
+
+### Orchestration Patterns
+
+- **Sequential** — Planner → Retriever → Critic ⇄ Retriever → Writer
+- **Concurrent** — Retriever ‖ ToolAgent (fan-out with `asyncio.gather`)
+- **Full (hybrid)** — Sequential planning, concurrent retrieval, feedback loop, then synthesis
+
+### Interactive Demos
+
+The web UI also ships five standalone demos: Weather Tools, Math Agent, Sentiment Analyser, Code Reviewer, and Multi-Agent Debate.
+
+## Prerequisites
+
+- **Python 3.10+**
+- **Foundry Local** installed and available on PATH — see [foundrylocal.ai](https://foundrylocal.ai)
+
+## Quick Start
+
+```bash
+# Clone the repo and navigate to this sample
+cd samples/python/agent-framework
+
+# Create a virtual environment (recommended)
+python -m venv .venv
+source .venv/bin/activate   # Linux/macOS
+.venv\Scripts\activate      # Windows
+
+# Install dependencies
+pip install -e ".[dev]"
+
+# (Optional) copy and edit the environment config
+cp .env.example .env
+
+# Run the web UI (starts Flask on http://localhost:5000)
+python -m src.app --web
+
+# Or run a question directly from the CLI
+python -m src.app "What orchestration patterns exist for multi-agent systems?"
+
+# Choose orchestration mode (sequential or full)
+python -m src.app --mode sequential "Explain Foundry Local architecture"
+```
+
+The web UI starts at **http://localhost:5000**. On first run, Foundry Local will download the model if it is not already cached.
+
+## Project Structure
+
+```
+agent-framework/
+├── data/                       # Sample documents loaded by the Retriever agent
+│   ├── agent_framework_guide.md
+│   ├── foundry_local_overview.md
+│   └── orchestration_patterns.md
+├── src/app/
+│   ├── __init__.py
+│   ├── __main__.py             # CLI entrypoint (web / cli / tools)
+│   ├── foundry_boot.py         # Bootstrap Foundry Local, get connection info
+│   ├── agents.py               # Agent factories and tool functions
+│   ├── documents.py            # Document loader with chunking
+│   ├── orchestrator.py         # Sequential, concurrent, and hybrid pipelines
+│   ├── tool_demo.py            # Standalone tool-calling demo
+│   ├── web.py                  # Flask server with SSE streaming
+│   ├── templates/
+│   │   └── index.html          # Web UI with real-time pipeline visualisation
+│   └── demos/                  # Interactive demo modules
+│       ├── __init__.py
+│       ├── registry.py         # Demo registry
+│       ├── weather_tools.py
+│       ├── math_agent.py
+│       ├── sentiment_analyzer.py
+│       ├── code_reviewer.py
+│       └── multi_agent_debate.py
+├── tests/
+│   └── test_smoke.py           # Smoke tests (imports, doc loader, etc.)
+├── pyproject.toml              # Project metadata & dependencies
+├── requirements.txt            # Pip-installable dependencies
+├── .env.example                # Environment variable template
+└── README.md                   # This file
+```
+
+## Configuration
+
+| Variable           | Default        | Description                                 |
+|--------------------|----------------|---------------------------------------------|
+| `MODEL_ALIAS`      | `qwen2.5-0.5b` | Foundry Local model alias                   |
+| `DOCS_PATH`        | `./data`       | Path to documents folder                     |
+| `LOG_LEVEL`        | `INFO`         | Python logging level                         |
+| `FOUNDRY_ENDPOINT` | *(auto)*       | Override to skip local bootstrap              |
+| `FOUNDRY_API_KEY`  | `none`         | API key when using an external endpoint       |
+
+## How It Works
+
+1. **Bootstrap** — `FoundryLocalBootstrapper` starts the Foundry Local service, resolves the model alias, and downloads the model if not cached.
+2. **Document loading** — Markdown and text files from `data/` are chunked and passed as context to the Retriever agent.
+3. **Orchestration** — agents are wired together per the selected pattern. Each step emits structured JSON events.
+4. **Streaming** — the Flask server streams events via SSE so the web UI can render pipeline progress in real time.
+
+## Tests
+
+```bash
+pytest tests/ -v
+```
+
+The smoke tests verify imports, document loading, the bootstrapper's environment override path, and the demo registry.
+
+## Troubleshooting
+
+| Symptom | Fix |
+|---------|-----|
+| `ModuleNotFoundError: agent_framework` | `pip install agent-framework-core==1.0.0b260130` |
+| Model download hangs | Check network and ensure Foundry Local is on PATH |
+| `Connection refused` on port 5273 | Foundry Local service failed to start — run `foundry-local` manually to see errors |
+| Flask port 5000 in use | Set `FLASK_PORT` env var or kill the conflicting process |
+
+## License
+
+This sample is provided under the [MIT License](../../../LICENSE).
diff --git a/samples/python/agent-framework/data/agent_framework_guide.md b/samples/python/agent-framework/data/agent_framework_guide.md
new file mode 100644
index 00000000..5564b9a1
--- /dev/null
+++ b/samples/python/agent-framework/data/agent_framework_guide.md
@@ -0,0 +1,54 @@
+# Microsoft Agent Framework Guide
+
+The Microsoft Agent Framework provides building blocks for creating multi-agent
+applications in Python. Agents are autonomous units that receive instructions,
+process messages, and optionally invoke tools.
+
+## Core Concepts
+
+### ChatAgent
+A `ChatAgent` wraps a language model client with a persona (system instructions),
+an optional set of tools, and a conversation history. You create agents via:
+
+```python
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+
+client = OpenAIChatClient(api_key="...", base_url="...", model_id="phi-4-mini")
+agent = ChatAgent(chat_client=client, name="Planner", instructions="...")
+response = await agent.run("What is the capital of France?")
+```
+
+### Tools
+Tools are plain Python functions annotated with `Annotated[<type>, Field(...)]`
+parameters. The framework automatically generates JSON Schema for tool calling:
+
+```python
+from typing import Annotated
+from pydantic import Field
+
+def calculate_sum(
+    a: Annotated[int, Field(description="First number")],
+    b: Annotated[int, Field(description="Second number")],
+) -> str:
+    return f"The sum is {a + b}"
+```
+
+Register tools when creating an agent: `ChatAgent(..., tools=[calculate_sum])`.
+
+### Orchestration Patterns
+
+| Pattern     | Description                                                      |
+|-------------|------------------------------------------------------------------|
+| Sequential  | Agents run one after another; each receives the previous output. |
+| Concurrent  | Multiple agents run in parallel (fan-out) on the same input.     |
+| Feedback    | A Critic agent reviews output and can request re-processing.     |
+| Hybrid      | Combines sequential, concurrent, and feedback patterns.          |
+
+## Best Practices
+
+1. **Keep instructions focused** — each agent should have a single responsibility.
+2. **Limit context length** — chunk large documents before passing to agents.
+3. **Use tool calling** — delegate structured tasks to deterministic code.
+4. **Set loop limits** — always cap iterative feedback loops to prevent runaway costs.
+5. **Stream results** — use Server-Sent Events (SSE) for real-time UI updates.
diff --git a/samples/python/agent-framework/data/foundry_local_overview.md b/samples/python/agent-framework/data/foundry_local_overview.md
new file mode 100644
index 00000000..f6b55488
--- /dev/null
+++ b/samples/python/agent-framework/data/foundry_local_overview.md
@@ -0,0 +1,42 @@
+# Foundry Local Overview
+
+Foundry Local is a lightweight runtime that lets developers run AI models directly on their
+local machine — no cloud connection required. It is part of the Microsoft AI Foundry family
+and designed for offline-first development, edge scenarios, and privacy-sensitive workloads.
+
+## Key Features
+
+- **Local-first inference** — models run on your device using CPU or GPU acceleration.
+- **Model catalog** — browse and download curated models (Phi, Mistral, Qwen, etc.) via the SDK.
+- **Cache management** — models are cached locally after the first download. The SDK exposes
+  helpers to check cache status before downloading, giving apps instant startup on repeat runs.
+- **OpenAI-compatible endpoint** — Foundry Local exposes a REST API compatible with the
+  OpenAI Chat Completions spec, so existing OpenAI SDK code works with minimal changes.
+- **Multi-language SDKs** — official SDKs for Python, JavaScript/TypeScript, C#, and Rust.
+
+## Architecture
+
+```
+┌──────────────┐        ┌──────────────────────┐
+│  Your App    │──SDK──▶│   Foundry Local       │
+│  (Python,    │        │   Service (REST API)  │
+│   JS, C#,    │        │                       │
+│   Rust)      │        │   ┌────────────────┐  │
+│              │  HTTP   │   │  Loaded Model  │  │
+│              │◀───────│   │  (ONNX / GGUF) │  │
+└──────────────┘        │   └────────────────┘  │
+                        └──────────────────────┘
+```
+
+The SDK handles service bootstrapping, model resolution, downloading, loading, and exposes
+convenience methods for chat completions and audio transcription.
+
+## Typical Lifecycle
+
+1. **Bootstrap** — `FoundryLocalManager` starts (or connects to) the local service.
+2. **Model resolution** — the SDK resolves an alias (e.g. `phi-4-mini`) to a specific variant.
+3. **Cache check** — if the model is already cached, loading is near-instant.
+4. **Download** — if not cached, the model is downloaded with progress feedback.
+5. **Load** — the model is loaded into the inference engine.
+6. **Inference** — your app sends chat messages and receives completions.
+7. **Cleanup** — unload models and stop the service when done.
diff --git a/samples/python/agent-framework/data/orchestration_patterns.md b/samples/python/agent-framework/data/orchestration_patterns.md
new file mode 100644
index 00000000..d269f2c9
--- /dev/null
+++ b/samples/python/agent-framework/data/orchestration_patterns.md
@@ -0,0 +1,67 @@
+# Orchestration Patterns for Multi-Agent Systems
+
+This document describes common orchestration patterns used to coordinate
+multiple AI agents in a workflow.
+
+## 1. Sequential Pipeline
+
+Agents run in a fixed order. Each agent receives the output of the previous one.
+
+```
+User Question → Planner → Retriever → Critic → Writer → Final Report
+```
+
+**When to use:** Research tasks where each step depends on the previous result.
+
+**Trade-offs:** Simple to reason about, but total latency equals the sum of all
+agent execution times.
+
+## 2. Concurrent Fan-Out
+
+Multiple agents process the same input simultaneously using `asyncio.gather()`.
+
+```
+                ┌─ Retriever  ──┐
+Plan Text ──────┤               ├─► Merge
+                └─ ToolAgent   ──┘
+```
+
+**When to use:** Independent sub-tasks that can run in parallel, such as
+document retrieval and text analysis.
+
+**Trade-offs:** Faster than sequential for independent work, but merging results
+requires careful design.
+
+## 3. Critic Feedback Loop
+
+A Critic agent iteratively reviews outputs and may trigger re-processing.
+
+```
+Retriever output ──► Critic ──► Gap found? ──► Yes ──► Re-retrieve ──┐
+                        ▲                                             │
+                        └─────────────────────────────────────────────┘
+                                              │
+                                          No gaps ──► Continue
+```
+
+**When to use:** Tasks requiring quality assurance or iterative refinement.
+
+**Trade-offs:** Improves output quality but adds latency. Always set a maximum
+loop count (e.g. `MAX_CRITIC_LOOPS = 2`) to prevent infinite loops.
+
+## 4. Hybrid Orchestration
+
+Combines sequential, concurrent, and feedback patterns into a single workflow.
+
+```
+Question ──► Planner (seq) ──► Retriever ‖ ToolAgent (concurrent) ──► Critic Loop ──► Writer (seq)
+```
+
+This is the pattern used by the agent-framework sample's `run_full_workflow()`.
+
+## Key Implementation Notes
+
+- Use `async`/`await` throughout for non-blocking execution.
+- Wrap agent calls with timing (`time.perf_counter()`) for observability.
+- Emit structured step events (JSON) for UI streaming via SSE.
+- Handle agent errors gracefully — a single agent failure shouldn't crash the workflow.
diff --git a/samples/python/agent-framework/pyproject.toml b/samples/python/agent-framework/pyproject.toml
new file mode 100644
index 00000000..68feeb61
--- /dev/null
+++ b/samples/python/agent-framework/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=68.0"]
+build-backend = "setuptools.backends._legacy:_Backend"
+
+[project]
+name = "agent-framework-foundry-local"
+version = "0.1.0"
+description = "Multi-agent orchestration with Microsoft Agent Framework + Foundry Local"
+requires-python = ">=3.10"
+dependencies = [
+    "foundry-local-sdk>=0.5.1",
+    "agent-framework-core==1.0.0b260130",
+    "openai>=1.40.0",
+    "python-dotenv>=1.0.0",
+    "rich>=13.0.0",
+    "flask>=3.0.0",
+]
+
+[project.optional-dependencies]
+dev = ["pytest>=7.0", "pytest-asyncio>=0.21"]
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["src*"]
diff --git a/samples/python/agent-framework/requirements.txt b/samples/python/agent-framework/requirements.txt
new file mode 100644
index 00000000..066679a1
--- /dev/null
+++ b/samples/python/agent-framework/requirements.txt
@@ -0,0 +1,6 @@
+foundry-local-sdk>=0.5.1
+agent-framework-core==1.0.0b260130
+openai>=1.40.0
+python-dotenv>=1.0.0
+rich>=13.0.0
+flask>=3.0.0
diff --git a/samples/python/agent-framework/src/app/__init__.py b/samples/python/agent-framework/src/app/__init__.py
new file mode 100644
index 00000000..58134b02
--- /dev/null
+++ b/samples/python/agent-framework/src/app/__init__.py
@@ -0,0 +1 @@
+# src/app — Agent Framework + Foundry Local demo package
diff --git a/samples/python/agent-framework/src/app/__main__.py b/samples/python/agent-framework/src/app/__main__.py
new file mode 100644
index 00000000..c9943511
--- /dev/null
+++ b/samples/python/agent-framework/src/app/__main__.py
@@ -0,0 +1,85 @@
+"""
+CLI Entry Point
+────────────────
+Run as:  python -m src.app
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import logging
+import os
+
+from dotenv import load_dotenv
+from rich.console import Console
+from rich.logging import RichHandler
+
+load_dotenv()
+console = Console()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Agent Framework + Foundry Local — Multi-Agent Research Demo",
+    )
+    parser.add_argument("question", nargs="?", help="Research question (CLI mode)")
+    parser.add_argument("--docs", default=os.getenv("DOCS_PATH", "./data"), help="Documents folder")
+    parser.add_argument("--model", default=os.getenv("MODEL_ALIAS", "qwen2.5-0.5b"), help="Model alias")
+    parser.add_argument("--mode", choices=["sequential", "full"], default="full", help="Orchestration mode")
+    parser.add_argument("--web", action="store_true", help="Start Flask web server")
+    parser.add_argument("--port", type=int, default=5000, help="Web server port")
+    parser.add_argument("--log-level", default=os.getenv("LOG_LEVEL", "INFO"), help="Logging level")
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=getattr(logging, args.log_level.upper(), logging.INFO),
+        format="%(message)s",
+        handlers=[RichHandler(rich_tracebacks=True)],
+    )
+
+    os.environ["DOCS_PATH"] = args.docs
+    os.environ["MODEL_ALIAS"] = args.model
+
+    from .foundry_boot import FoundryLocalBootstrapper
+
+    boot = FoundryLocalBootstrapper(alias=args.model)
+    conn = boot.bootstrap()
+
+    if args.web or args.question is None:
+        # Web mode
+        from .web import create_app
+        app = create_app(conn)
+        console.print(f"\n[bold green]Server running at http://localhost:{args.port}[/]\n")
+        app.run(host="0.0.0.0", port=args.port, debug=False)
+    else:
+        # CLI mode
+        from .documents import load_documents
+        from .orchestrator import run_full_workflow, run_sequential
+
+        docs = load_documents(args.docs)
+        console.print(f"[cyan]Loaded {docs.file_count} files → {len(docs.chunks)} chunks[/]\n")
+
+        async def run_cli():
+            if args.mode == "sequential":
+                gen = run_sequential(conn, docs, args.question)
+            else:
+                gen = run_full_workflow(conn, docs, args.question)
+
+            async for evt in gen:
+                if evt["type"] == "step_start":
+                    console.print(f"\n[yellow]▶ {evt['agent']}:[/] {evt.get('description', '')}")
+                elif evt["type"] == "step_done":
+                    console.print(f"[green]✓ {evt['agent']}[/] ({evt.get('elapsed', '?')}s)")
+                    console.print(evt.get("output", ""))
+                elif evt["type"] == "complete":
+                    console.print("\n[bold green]═══ Final Report ═══[/]")
+                    console.print(evt.get("report", ""))
+                elif evt["type"] == "error":
+                    console.print(f"[red]✗ Error:[/] {evt.get('message', '')}")
+
+        asyncio.run(run_cli())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/samples/python/agent-framework/src/app/agents.py b/samples/python/agent-framework/src/app/agents.py
new file mode 100644
index 00000000..7a4fe4e1
--- /dev/null
+++ b/samples/python/agent-framework/src/app/agents.py
@@ -0,0 +1,116 @@
+"""
+Agent Factories
+────────────────
+Create specialised ChatAgents for the multi-agent research workflow.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Annotated
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+from pydantic import Field
+
+from .foundry_boot import FoundryConnection
+
+
+# ─── Tool Functions ──────────────────────────────────────────────
+
+def word_count(
+    text: Annotated[str, Field(description="Text to count words in")],
+) -> str:
+    """Count the number of words in the given text."""
+    count = len(text.split())
+    return f"Word count: {count}"
+
+
+def extract_keywords(
+    text: Annotated[str, Field(description="Text to extract keywords from")],
+) -> str:
+    """Extract the most frequently repeated keywords from text."""
+    words = re.findall(r"\b\w{4,}\b", text.lower())
+    freq: dict[str, int] = {}
+    for w in words:
+        freq[w] = freq.get(w, 0) + 1
+    repeated = {w: c for w, c in freq.items() if c >= 2}
+    if not repeated:
+        return "Keywords: (none detected)"
+    top = sorted(repeated, key=repeated.get, reverse=True)[:10]
+    return "Keywords: " + ", ".join(top)
+
+
+# ─── Agent Factories ────────────────────────────────────────────
+
+def _make_client(conn: FoundryConnection) -> OpenAIChatClient:
+    return OpenAIChatClient(
+        api_key=conn.api_key,
+        base_url=conn.endpoint,
+        model_id=conn.model_id,
+    )
+
+
+def create_planner(conn: FoundryConnection) -> ChatAgent:
+    """Create the Planner agent that breaks a question into sub-tasks."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Planner",
+        instructions=(
+            "You are a research planner. Given a user question, break it into "
+            "2-4 concise sub-tasks that other agents can handle.\n"
+            "Output a numbered list of sub-tasks, nothing else."
+        ),
+    )
+
+
+def create_retriever(conn: FoundryConnection, documents_text: str) -> ChatAgent:
+    """Create the Retriever agent that finds relevant snippets from documents."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Retriever",
+        instructions=(
+            "You are a document retriever. Given sub-tasks, search the documents "
+            "below and return relevant excerpts with [source: filename] citations.\n\n"
+            "─── DOCUMENTS ───\n" + documents_text
+        ),
+    )
+
+
+def create_critic(conn: FoundryConnection) -> ChatAgent:
+    """Create the Critic agent that reviews output for gaps."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Critic",
+        instructions=(
+            "You are a research critic. Review the plan AND retrieved snippets.\n"
+            "List any gaps, contradictions, or missing sub-topics.\n"
+            "If nothing is missing, respond with exactly: NO_GAPS_FOUND"
+        ),
+    )
+
+
+def create_writer(conn: FoundryConnection) -> ChatAgent:
+    """Create the Writer agent that produces the final report."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Writer",
+        instructions=(
+            "You are a technical writer. Synthesize the plan, retrieved snippets, "
+            "keywords, and critic feedback into a clear, well-structured report.\n"
+            "Include [source: filename] citations where applicable."
+        ),
+    )
+
+
+def create_tool_agent(conn: FoundryConnection) -> ChatAgent:
+    """Create the ToolAgent that uses word_count and extract_keywords tools."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="ToolAgent",
+        instructions=(
+            "You are a text analysis agent. Use the provided tools to count words "
+            "and extract keywords from the text you receive."
+        ),
+        tools=[word_count, extract_keywords],
+    )
diff --git a/samples/python/agent-framework/src/app/demos/__init__.py b/samples/python/agent-framework/src/app/demos/__init__.py
new file mode 100644
index 00000000..f5a7b94a
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/__init__.py
@@ -0,0 +1,20 @@
+# src/app/demos — Demo Modules
+# Each demo showcases different MAF + Foundry Local capabilities
+
+from .weather_tools import WeatherDemo
+from .math_agent import MathAgentDemo
+from .sentiment_analyzer import SentimentDemo
+from .code_reviewer import CodeReviewerDemo
+from .multi_agent_debate import DebateDemo
+from .registry import DEMO_REGISTRY, get_demo, list_demos
+
+__all__ = [
+    "WeatherDemo",
+    "MathAgentDemo",
+    "SentimentDemo",
+    "CodeReviewerDemo",
+    "DebateDemo",
+    "DEMO_REGISTRY",
+    "get_demo",
+    "list_demos",
+]
diff --git a/samples/python/agent-framework/src/app/demos/code_reviewer.py b/samples/python/agent-framework/src/app/demos/code_reviewer.py
new file mode 100644
index 00000000..23860448
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/code_reviewer.py
@@ -0,0 +1,228 @@
+"""
+Demo: Code Reviewer
+───────────────────
+Demonstrates code analysis tools for reviewing code snippets.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Annotated
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+from pydantic import Field
+
+from ..foundry_boot import FoundryConnection
+from .registry import DemoInfo, register_demo
+
+
+# ─── Tool Functions ──────────────────────────────────────────────
+
+def check_code_style(
+    code: Annotated[str, Field(description="Code snippet to check for style issues")],
+    language: Annotated[str, Field(description="Programming language (python, javascript, etc.)")] = "python",
+) -> str:
+    """Check code for common style issues."""
+    issues = []
+    lines = code.split("\n")
+    for i, line in enumerate(lines, 1):
+        if len(line) > 100:
+            issues.append(f"Line {i}: Line too long ({len(line)} chars, max 100)")
+        if line != line.rstrip():
+            issues.append(f"Line {i}: Trailing whitespace")
+        if "\t" in line and "    " in line:
+            issues.append(f"Line {i}: Mixed tabs and spaces")
+
+    if language.lower() == "python":
+        if "import *" in code:
+            issues.append("Avoid 'import *' \u2014 use explicit imports")
+        if re.search(r"except\s*:", code):
+            issues.append("Avoid bare 'except:' \u2014 catch specific exceptions")
+        if re.search(r"def\s+\w+\([^)]*=\s*\[\]", code) or re.search(r"def\s+\w+\([^)]*=\s*\{\}", code):
+            issues.append("Mutable default argument detected \u2014 use None instead")
+
+    if language.lower() == "javascript":
+        if "var " in code:
+            issues.append("Consider using 'let' or 'const' instead of 'var'")
+        if "==" in code and "===" not in code:
+            issues.append("Consider using '===' for strict equality")
+
+    if not issues:
+        return "\u2705 No style issues found!"
+    return "Style issues found:\n  \u2022 " + "\n  \u2022 ".join(issues)
+
+
+def analyze_complexity(
+    code: Annotated[str, Field(description="Code snippet to analyze")],
+) -> str:
+    """Analyze code complexity metrics."""
+    lines = code.split("\n")
+    total_lines = len(lines)
+    code_lines = sum(1 for line in lines if line.strip() and not line.strip().startswith("#"))
+    comment_lines = sum(1 for line in lines if line.strip().startswith("#"))
+    blank_lines = sum(1 for line in lines if not line.strip())
+
+    func_pattern = r"(?:def|function|async function|const\s+\w+\s*=\s*(?:async\s*)?\()"
+    functions = len(re.findall(func_pattern, code))
+    classes = len(re.findall(r"class\s+\w+", code))
+
+    control_keywords = ["if", "elif", "else", "for", "while", "try", "except", "with", "case", "switch"]
+    branches = sum(len(re.findall(rf"\b{kw}\b", code)) for kw in control_keywords)
+
+    if branches <= 5:
+        complexity = "Low"
+    elif branches <= 15:
+        complexity = "Medium"
+    else:
+        complexity = "High"
+
+    return (
+        f"Code Complexity Analysis:\n"
+        f"  Lines: {total_lines} total ({code_lines} code, {comment_lines} comments, {blank_lines} blank)\n"
+        f"  Functions/Methods: {functions}\n"
+        f"  Classes: {classes}\n"
+        f"  Branches: {branches}\n"
+        f"  Estimated complexity: {complexity}"
+    )
+
+
+def find_potential_bugs(
+    code: Annotated[str, Field(description="Code snippet to scan for potential bugs")],
+) -> str:
+    """Scan code for potential bugs and issues."""
+    warnings = []
+    if re.search(r"==\s*None", code):
+        warnings.append("Use 'is None' instead of '== None'")
+    if re.search(r"!=\s*None", code):
+        warnings.append("Use 'is not None' instead of '!= None'")
+    if re.search(r"print\s*\(", code):
+        warnings.append("Debug print statement found \u2014 remove before production")
+    if re.search(r"TODO|FIXME|HACK|XXX", code, re.IGNORECASE):
+        warnings.append("TODO/FIXME comment found \u2014 address before release")
+    if re.search(r"password\s*=\s*[\"'][^\"']+[\"']", code, re.IGNORECASE):
+        warnings.append("\u26a0\ufe0f CRITICAL: Hardcoded password detected!")
+    if re.search(r"api[_-]?key\s*=\s*[\"'][^\"']+[\"']", code, re.IGNORECASE):
+        warnings.append("\u26a0\ufe0f CRITICAL: Hardcoded API key detected!")
+    if "eval(" in code:
+        warnings.append("\u26a0\ufe0f eval() is dangerous \u2014 avoid if possible")
+    if "exec(" in code:
+        warnings.append("\u26a0\ufe0f exec() is dangerous \u2014 avoid if possible")
+    if re.search(r"except[^:]*:\s*\n\s*pass", code):
+        warnings.append("Empty except block found \u2014 handle or log the exception")
+    if not warnings:
+        return "\u2705 No obvious bugs or issues detected!"
+    return "Potential issues found:\n  \u2022 " + "\n  \u2022 ".join(warnings)
+
+
+def suggest_improvements(
+    code: Annotated[str, Field(description="Code snippet to review for improvements")],
+) -> str:
+    """Suggest code improvements and best practices."""
+    suggestions = []
+    func_pattern = r"def\s+\w+\s*\([^)]*\):\s*\n((?:\s+.*\n)*)"
+    for match in re.finditer(func_pattern, code):
+        body = match.group(1)
+        if body.count("\n") > 30:
+            suggestions.append("Consider breaking long functions into smaller ones (>30 lines)")
+            break
+    if re.search(r"def\s+\w+", code) and not re.search(r'""".*?"""', code, re.DOTALL):
+        suggestions.append("Add docstrings to functions for better documentation")
+    if re.search(r"def\s+\w+\s*\([^)]+\)", code):
+        if not re.search(r"def\s+\w+\s*\([^)]*:\s*\w+", code):
+            suggestions.append("Consider adding type hints for better code clarity")
+    if re.search(r"[=<>+\-*/]\s*\d{2,}(?!\d)", code):
+        suggestions.append("Extract magic numbers into named constants")
+    long_lines = sum(1 for line in code.split("\n") if len(line) > 80)
+    if long_lines > 3:
+        suggestions.append(f"Break up {long_lines} long lines for better readability")
+    if re.search(r"\b[a-z]\s*=", code):
+        suggestions.append("Use descriptive variable names instead of single letters")
+    if not suggestions:
+        return "\u2705 Code looks good! No major improvements suggested."
+    return "Suggested improvements:\n  \u2022 " + "\n  \u2022 ".join(suggestions)
+
+
+def count_elements(
+    code: Annotated[str, Field(description="Code snippet to analyze")],
+) -> str:
+    """Count code elements like variables, functions, loops, etc."""
+    elements = {
+        "variables": len(re.findall(r"\b\w+\s*=\s*(?!=)", code)),
+        "functions": len(re.findall(r"\bdef\s+\w+", code)),
+        "classes": len(re.findall(r"\bclass\s+\w+", code)),
+        "if_statements": len(re.findall(r"\bif\s+", code)),
+        "for_loops": len(re.findall(r"\bfor\s+", code)),
+        "while_loops": len(re.findall(r"\bwhile\s+", code)),
+        "try_blocks": len(re.findall(r"\btry\s*:", code)),
+        "imports": len(re.findall(r"\bimport\s+", code)),
+        "returns": len(re.findall(r"\breturn\s+", code)),
+        "comments": len(re.findall(r"#.*$", code, re.MULTILINE)),
+    }
+    lines = ["Code element count:"]
+    for element, count in elements.items():
+        if count > 0:
+            lines.append(f"  {element.replace('_', ' ').title()}: {count}")
+    return "\n".join(lines)
+
+
+# ─── Demo Class ──────────────────────────────────────────────────
+
+class CodeReviewerDemo:
+    def __init__(self, conn: FoundryConnection):
+        self.conn = conn
+        self.agent = self._create_agent()
+
+    def _create_agent(self) -> ChatAgent:
+        client = OpenAIChatClient(
+            api_key=self.conn.api_key,
+            base_url=self.conn.endpoint,
+            model_id=self.conn.model_id,
+        )
+        return ChatAgent(
+            chat_client=client,
+            name="CodeReviewer",
+            instructions=(
+                "You are a code review assistant. Use the provided tools to analyze code:\n\n"
+                "  \u2022 check_code_style: Check for style issues\n"
+                "  \u2022 analyze_complexity: Get complexity metrics\n"
+                "  \u2022 find_potential_bugs: Scan for bugs and issues\n"
+                "  \u2022 suggest_improvements: Get improvement suggestions\n"
+                "  \u2022 count_elements: Count code elements\n\n"
+                "When given code to review, use multiple tools to provide a comprehensive "
+                "review. Summarize your findings clearly."
+            ),
+            tools=[check_code_style, analyze_complexity, find_potential_bugs, suggest_improvements, count_elements],
+        )
+
+    async def run(self, prompt: str) -> dict:
+        import time
+        t0 = time.perf_counter()
+        result = await self.agent.run(prompt)
+        elapsed = time.perf_counter() - t0
+        text = re.sub(r"<tool_call>.*?</tool_call>\s*", "", str(result), flags=re.DOTALL).strip()
+        return {
+            "prompt": prompt,
+            "response": text,
+            "elapsed": round(elapsed, 2),
+            "tools_available": ["check_code_style", "analyze_complexity", "find_potential_bugs", "suggest_improvements", "count_elements"],
+        }
+
+
+# ─── Register ────────────────────────────────────────────────────
+
+async def run_code_review_demo(conn: FoundryConnection, prompt: str) -> dict:
+    demo = CodeReviewerDemo(conn)
+    return await demo.run(prompt)
+
+
+register_demo(DemoInfo(
+    id="code_reviewer",
+    name="Code Reviewer",
+    description="Code analysis agent that checks style, complexity, potential bugs, and suggests improvements.",
+    icon="\ud83d\udc68\u200d\ud83d\udcbb",
+    category="Tool Calling",
+    runner=run_code_review_demo,
+    tags=["tools", "function-calling", "code-analysis", "single-agent"],
+    suggested_prompt="Review this Python code:\n\ndef calc(x,y,z):\n    result = x + y\n    if result == None:\n        return 0\n    return result / z",
+))
diff --git a/samples/python/agent-framework/src/app/demos/math_agent.py b/samples/python/agent-framework/src/app/demos/math_agent.py
new file mode 100644
index 00000000..f2ff00c9
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/math_agent.py
@@ -0,0 +1,201 @@
+"""
+Demo: Math Agent
+────────────────
+Demonstrates calculation tools and step-by-step reasoning.
+"""
+
+from __future__ import annotations
+
+import math
+import re
+from typing import Annotated
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+from pydantic import Field
+
+from ..foundry_boot import FoundryConnection
+from .registry import DemoInfo, register_demo
+
+
+# ─── Tool Functions ──────────────────────────────────────────────
+
+def calculate(
+    expression: Annotated[str, Field(description="Math expression to evaluate, e.g. '(5 + 3) * 2'")],
+) -> str:
+    """Safely evaluate a mathematical expression."""
+    allowed = set("0123456789+-*/.() ")
+    if not all(c in allowed for c in expression):
+        return f"Error: Expression contains invalid characters. Use only numbers and operators: + - * / ( )"
+    try:
+        result = eval(expression, {"__builtins__": {}}, {})  # noqa: S307
+        if isinstance(result, float) and result.is_integer():
+            result = int(result)
+        return f"Result: {expression} = {result}"
+    except Exception as e:
+        return f"Error evaluating '{expression}': {e}"
+
+
+def percentage(
+    value: Annotated[float, Field(description="The value to calculate percentage of")],
+    percent: Annotated[float, Field(description="The percentage to apply")],
+) -> str:
+    """Calculate a percentage of a value."""
+    result = value * (percent / 100)
+    return f"{percent}% of {value} = {result}"
+
+
+def percentage_change(
+    old_value: Annotated[float, Field(description="The original value")],
+    new_value: Annotated[float, Field(description="The new value")],
+) -> str:
+    """Calculate the percentage change between two values."""
+    if old_value == 0:
+        return "Error: Cannot calculate percentage change from zero"
+    change = ((new_value - old_value) / old_value) * 100
+    direction = "increase" if change > 0 else "decrease" if change < 0 else "no change"
+    return f"From {old_value} to {new_value}: {abs(change):.2f}% {direction}"
+
+
+def convert_units(
+    value: Annotated[float, Field(description="The value to convert")],
+    from_unit: Annotated[str, Field(description="Source unit (e.g., 'km', 'miles', 'kg', 'lbs', 'celsius', 'fahrenheit')")],
+    to_unit: Annotated[str, Field(description="Target unit")],
+) -> str:
+    """Convert between common units."""
+    conversions = {
+        ("km", "miles"): lambda x: x * 0.621371,
+        ("miles", "km"): lambda x: x * 1.60934,
+        ("kg", "lbs"): lambda x: x * 2.20462,
+        ("lbs", "kg"): lambda x: x * 0.453592,
+        ("celsius", "fahrenheit"): lambda x: (x * 9 / 5) + 32,
+        ("fahrenheit", "celsius"): lambda x: (x - 32) * 5 / 9,
+        ("meters", "feet"): lambda x: x * 3.28084,
+        ("feet", "meters"): lambda x: x * 0.3048,
+        ("liters", "gallons"): lambda x: x * 0.264172,
+        ("gallons", "liters"): lambda x: x * 3.78541,
+    }
+    key = (from_unit.lower(), to_unit.lower())
+    if key not in conversions:
+        available = ", ".join(f"{f}\u2192{t}" for f, t in conversions)
+        return f"Conversion not supported. Available: {available}"
+    result = conversions[key](value)
+    return f"{value} {from_unit} = {result:.4f} {to_unit}"
+
+
+def compound_interest(
+    principal: Annotated[float, Field(description="Initial investment amount")],
+    rate: Annotated[float, Field(description="Annual interest rate as percentage (e.g., 5 for 5%)")],
+    years: Annotated[int, Field(description="Number of years")],
+    compounds_per_year: Annotated[int, Field(description="Times interest compounds per year (1=annual, 12=monthly, 365=daily)")] = 12,
+) -> str:
+    """Calculate compound interest."""
+    r = rate / 100
+    n = compounds_per_year
+    t = years
+    amount = principal * (1 + r / n) ** (n * t)
+    interest = amount - principal
+    return (
+        f"Compound Interest Calculation:\n"
+        f"  Principal: ${principal:,.2f}\n"
+        f"  Rate: {rate}% per year\n"
+        f"  Time: {years} years\n"
+        f"  Compounds: {n}x per year\n"
+        f"  \u2192 Final amount: ${amount:,.2f}\n"
+        f"  \u2192 Interest earned: ${interest:,.2f}"
+    )
+
+
+def statistics(
+    numbers: Annotated[str, Field(description="Comma-separated list of numbers, e.g., '1, 2, 3, 4, 5'")],
+) -> str:
+    """Calculate basic statistics for a list of numbers."""
+    try:
+        nums = [float(n.strip()) for n in numbers.split(",")]
+    except ValueError:
+        return "Error: Invalid number format. Use comma-separated numbers like '1, 2, 3'"
+    if not nums:
+        return "Error: No numbers provided"
+    n = len(nums)
+    mean = sum(nums) / n
+    sorted_nums = sorted(nums)
+    if n % 2 == 0:
+        median = (sorted_nums[n // 2 - 1] + sorted_nums[n // 2]) / 2
+    else:
+        median = sorted_nums[n // 2]
+    variance = sum((x - mean) ** 2 for x in nums) / n
+    std_dev = math.sqrt(variance)
+    return (
+        f"Statistics for {n} numbers:\n"
+        f"  Sum: {sum(nums)}\n"
+        f"  Mean: {mean:.2f}\n"
+        f"  Median: {median:.2f}\n"
+        f"  Min: {min(nums)}\n"
+        f"  Max: {max(nums)}\n"
+        f"  Range: {max(nums) - min(nums)}\n"
+        f"  Std Dev: {std_dev:.2f}"
+    )
+
+
+# ─── Demo Class ──────────────────────────────────────────────────
+
+class MathAgentDemo:
+    def __init__(self, conn: FoundryConnection):
+        self.conn = conn
+        self.agent = self._create_agent()
+
+    def _create_agent(self) -> ChatAgent:
+        client = OpenAIChatClient(
+            api_key=self.conn.api_key,
+            base_url=self.conn.endpoint,
+            model_id=self.conn.model_id,
+        )
+        return ChatAgent(
+            chat_client=client,
+            name="MathAssistant",
+            instructions=(
+                "You are a precise math assistant. ALWAYS use the provided tools "
+                "for calculations \u2014 never compute in your head.\n\n"
+                "Available tools:\n"
+                "  \u2022 calculate: Evaluate math expressions\n"
+                "  \u2022 percentage: Calculate percentages\n"
+                "  \u2022 percentage_change: Calculate % change between values\n"
+                "  \u2022 convert_units: Convert between units\n"
+                "  \u2022 compound_interest: Calculate investment growth\n"
+                "  \u2022 statistics: Compute stats for a list of numbers\n\n"
+                "Show your work by using tools step-by-step for complex problems."
+            ),
+            tools=[calculate, percentage, percentage_change, convert_units, compound_interest, statistics],
+        )
+
+    async def run(self, prompt: str) -> dict:
+        import time
+        t0 = time.perf_counter()
+        result = await self.agent.run(prompt)
+        elapsed = time.perf_counter() - t0
+        text = re.sub(r"<tool_call>.*?</tool_call>\s*", "", str(result), flags=re.DOTALL).strip()
+        return {
+            "prompt": prompt,
+            "response": text,
+            "elapsed": round(elapsed, 2),
+            "tools_available": ["calculate", "percentage", "percentage_change", "convert_units", "compound_interest", "statistics"],
+        }
+
+
+# ─── Register ────────────────────────────────────────────────────
+
+async def run_math_demo(conn: FoundryConnection, prompt: str) -> dict:
+    demo = MathAgentDemo(conn)
+    return await demo.run(prompt)
+
+
+register_demo(DemoInfo(
+    id="math_agent",
+    name="Math Calculator",
+    description="Precise calculation agent with tools for arithmetic, percentages, unit conversions, compound interest, and statistics.",
+    icon="\ud83d\udd22",
+    category="Tool Calling",
+    runner=run_math_demo,
+    tags=["tools", "function-calling", "calculations", "single-agent"],
+    suggested_prompt="If I invest $10,000 at 7% annual interest compounded monthly for 15 years, how much will I have? Also convert that to euros assuming 1 USD = 0.92 EUR.",
+))
diff --git a/samples/python/agent-framework/src/app/demos/multi_agent_debate.py b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py
new file mode 100644
index 00000000..c0ec7ddb
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py
@@ -0,0 +1,189 @@
+"""
+Demo: Multi-Agent Debate
+────────────────────────
+Demonstrates multi-agent orchestration with opposing viewpoints.
+Three agents debate a topic: Proponent, Opponent, and Moderator.
+"""
+
+from __future__ import annotations
+
+import re
+import time
+from dataclasses import dataclass
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+
+from ..foundry_boot import FoundryConnection
+from .registry import DemoInfo, register_demo
+
+
+# ─── Debate Participants ─────────────────────────────────────────
+
+def _make_client(conn: FoundryConnection) -> OpenAIChatClient:
+    return OpenAIChatClient(
+        api_key=conn.api_key,
+        base_url=conn.endpoint,
+        model_id=conn.model_id,
+    )
+
+
+def create_proponent(conn: FoundryConnection) -> ChatAgent:
+    """Create agent that argues FOR the topic."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Proponent",
+        instructions=(
+            "You are a skilled debater arguing IN FAVOR of the given topic.\n\n"
+            "Rules:\n"
+            "  \u2022 Present 2-3 strong arguments supporting the position\n"
+            "  \u2022 Use logic, examples, and evidence\n"
+            "  \u2022 Be persuasive but respectful\n"
+            "  \u2022 Keep your response to 3-4 paragraphs max\n\n"
+            "Start with: 'I argue IN FAVOR because...'"
+        ),
+    )
+
+
+def create_opponent(conn: FoundryConnection) -> ChatAgent:
+    """Create agent that argues AGAINST the topic."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Opponent",
+        instructions=(
+            "You are a skilled debater arguing AGAINST the given topic.\n\n"
+            "Rules:\n"
+            "  \u2022 Present 2-3 strong counter-arguments\n"
+            "  \u2022 Respond to the previous speaker's points where relevant\n"
+            "  \u2022 Use logic, examples, and evidence\n"
+            "  \u2022 Keep your response to 3-4 paragraphs max\n\n"
+            "Start with: 'I argue AGAINST because...'"
+        ),
+    )
+
+
+def create_moderator(conn: FoundryConnection) -> ChatAgent:
+    """Create moderator agent that summarizes the debate."""
+    return ChatAgent(
+        chat_client=_make_client(conn),
+        name="Moderator",
+        instructions=(
+            "You are an impartial debate moderator.\n\n"
+            "Your job:\n"
+            "  1. Summarize the key points from BOTH sides\n"
+            "  2. Identify the strongest argument from each side\n"
+            "  3. Declare which side presented a more compelling case\n"
+            "  4. Explain your reasoning briefly\n\n"
+            "Be fair and objective. Format:\n"
+            "  \u2022 FOR side summary: ...\n"
+            "  \u2022 AGAINST side summary: ...\n"
+            "  \u2022 Verdict: [FOR/AGAINST] wins because..."
+        ),
+    )
+
+
+# ─── Debate Results ──────────────────────────────────────────────
+
+@dataclass
+class DebateRound:
+    speaker: str
+    position: str
+    argument: str
+    elapsed: float
+
+
+# ─── Demo Class ──────────────────────────────────────────────────
+
+class DebateDemo:
+    def __init__(self, conn: FoundryConnection):
+        self.conn = conn
+        self.proponent = create_proponent(conn)
+        self.opponent = create_opponent(conn)
+        self.moderator = create_moderator(conn)
+
+    async def _run_agent(self, agent: ChatAgent, prompt: str) -> tuple[str, float]:
+        t0 = time.perf_counter()
+        result = await agent.run(prompt)
+        elapsed = time.perf_counter() - t0
+        text = re.sub(r"<tool_call>.*?</tool_call>\s*", "", str(result), flags=re.DOTALL).strip()
+        return text, elapsed
+
+    async def run(self, topic: str) -> dict:
+        t0_total = time.perf_counter()
+        rounds = []
+
+        # Round 1: Proponent opens
+        pro_prompt = f'Topic for debate: "{topic}"\n\nPresent your opening arguments IN FAVOR of this topic.'
+        pro_argument, pro_time = await self._run_agent(self.proponent, pro_prompt)
+        rounds.append(DebateRound("Proponent", "FOR", pro_argument, pro_time))
+
+        # Round 2: Opponent responds
+        opp_prompt = (
+            f'Topic for debate: "{topic}"\n\n'
+            f"The speaker FOR this topic argued:\n{pro_argument}\n\n"
+            "Present your counter-arguments AGAINST this topic."
+        )
+        opp_argument, opp_time = await self._run_agent(self.opponent, opp_prompt)
+        rounds.append(DebateRound("Opponent", "AGAINST", opp_argument, opp_time))
+
+        # Final: Moderator verdict
+        mod_prompt = (
+            f'Topic for debate: "{topic}"\n\n'
+            f"=== Arguments FOR ===\n{pro_argument}\n\n"
+            f"=== Arguments AGAINST ===\n{opp_argument}\n\n"
+            "Please summarize the debate and declare a winner."
+        )
+        verdict_text, mod_time = await self._run_agent(self.moderator, mod_prompt)
+        rounds.append(DebateRound("Moderator", "VERDICT", verdict_text, mod_time))
+
+        total_time = time.perf_counter() - t0_total
+
+        # Extract verdict
+        verdict = "TIE"
+        if "for wins" in verdict_text.lower() or "proponent wins" in verdict_text.lower():
+            verdict = "FOR"
+        elif "against wins" in verdict_text.lower() or "opponent wins" in verdict_text.lower():
+            verdict = "AGAINST"
+
+        response_parts = []
+        for r in rounds:
+            response_parts.append(f"=== {r.speaker} ({r.position}) ===\n{r.argument}")
+        response_text = "\n\n".join(response_parts)
+        if verdict != "TIE":
+            response_text += f"\n\nVerdict: {verdict}"
+
+        return {
+            "response": response_text,
+            "topic": topic,
+            "rounds": [
+                {
+                    "speaker": r.speaker,
+                    "position": r.position,
+                    "argument": r.argument,
+                    "elapsed": round(r.elapsed, 2),
+                }
+                for r in rounds
+            ],
+            "verdict": verdict,
+            "total_time": round(total_time, 2),
+            "agents_used": ["Proponent (FOR)", "Opponent (AGAINST)", "Moderator"],
+        }
+
+
+# ─── Register ────────────────────────────────────────────────────
+
+async def run_debate_demo(conn: FoundryConnection, prompt: str) -> dict:
+    demo = DebateDemo(conn)
+    return await demo.run(prompt)
+
+
+register_demo(DemoInfo(
+    id="multi_agent_debate",
+    name="Multi-Agent Debate",
+    description="Three agents debate a topic: one argues FOR, one argues AGAINST, and a moderator declares a winner.",
+    icon="\ud83c\udfad",
+    category="Multi-Agent",
+    runner=run_debate_demo,
+    tags=["multi-agent", "orchestration", "sequential", "debate"],
+    suggested_prompt="Remote work should become the default for all knowledge workers",
+))
diff --git a/samples/python/agent-framework/src/app/demos/registry.py b/samples/python/agent-framework/src/app/demos/registry.py
new file mode 100644
index 00000000..1ceed6bd
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/registry.py
@@ -0,0 +1,42 @@
+"""
+Demo Registry
+─────────────
+Central registry of all available demos with metadata.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Callable
+
+
+@dataclass
+class DemoInfo:
+    """Metadata for a demo."""
+    id: str
+    name: str
+    description: str
+    icon: str
+    category: str
+    runner: Callable[..., Any]
+    tags: list[str]
+    suggested_prompt: str = ""
+
+
+# Registry populated by each demo module
+DEMO_REGISTRY: dict[str, DemoInfo] = {}
+
+
+def register_demo(info: DemoInfo) -> None:
+    """Register a demo in the global registry."""
+    DEMO_REGISTRY[info.id] = info
+
+
+def get_demo(demo_id: str) -> DemoInfo | None:
+    """Get demo info by ID."""
+    return DEMO_REGISTRY.get(demo_id)
+
+
+def list_demos() -> list[DemoInfo]:
+    """List all demos with their metadata."""
+    return list(DEMO_REGISTRY.values())
diff --git a/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py
new file mode 100644
index 00000000..abee8526
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py
@@ -0,0 +1,246 @@
+"""
+Demo: Sentiment Analyzer
+────────────────────────
+Demonstrates text analysis tools for sentiment and emotion detection.
+"""
+
+from __future__ import annotations
+
+import re
+from collections import Counter
+from typing import Annotated
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+from pydantic import Field
+
+from ..foundry_boot import FoundryConnection
+from .registry import DemoInfo, register_demo
+
+# ─── Lexicon ─────────────────────────────────────────────────────
+
+POSITIVE_WORDS = {
+    "good", "great", "excellent", "amazing", "wonderful", "fantastic",
+    "happy", "love", "best", "awesome", "brilliant", "perfect",
+    "beautiful", "outstanding", "superb", "delightful", "pleased",
+    "satisfied", "excited", "thankful", "grateful", "impressed",
+}
+
+NEGATIVE_WORDS = {
+    "bad", "terrible", "awful", "horrible", "poor", "worst",
+    "sad", "hate", "disappointed", "angry", "frustrated", "annoyed",
+    "ugly", "boring", "waste", "useless", "broken", "failed",
+    "difficult", "confusing", "slow", "expensive", "problem",
+}
+
+EMOTION_PATTERNS = {
+    "joy": ["happy", "excited", "delighted", "thrilled", "pleased", "love", "wonderful"],
+    "sadness": ["sad", "disappointed", "unhappy", "depressed", "lonely", "miss", "sorry"],
+    "anger": ["angry", "frustrated", "annoyed", "furious", "mad", "hate", "outraged"],
+    "fear": ["afraid", "scared", "worried", "anxious", "nervous", "terrified", "panic"],
+    "surprise": ["surprised", "amazed", "astonished", "shocked", "unexpected", "wow"],
+    "trust": ["trust", "believe", "reliable", "confident", "safe", "secure", "honest"],
+}
+
+
+# ─── Tool Functions ──────────────────────────────────────────────
+
+def analyze_sentiment(
+    text: Annotated[str, Field(description="The text to analyze for sentiment")],
+) -> str:
+    """Analyze the overall sentiment of text."""
+    words = re.findall(r"\b\w+\b", text.lower())
+    pos_count = sum(1 for w in words if w in POSITIVE_WORDS)
+    neg_count = sum(1 for w in words if w in NEGATIVE_WORDS)
+    total = pos_count + neg_count
+    if total == 0:
+        sentiment, confidence, score = "neutral", 0.5, 0.0
+    else:
+        score = (pos_count - neg_count) / total
+        if score > 0.2:
+            sentiment = "positive"
+        elif score < -0.2:
+            sentiment = "negative"
+        else:
+            sentiment = "neutral"
+        confidence = min(0.95, 0.5 + abs(score) * 0.5)
+    return (
+        f"Sentiment Analysis:\n"
+        f"  Overall: {sentiment.upper()}\n"
+        f"  Score: {score:+.2f} (range: -1.0 to +1.0)\n"
+        f"  Confidence: {confidence:.0%}\n"
+        f"  Positive words found: {pos_count}\n"
+        f"  Negative words found: {neg_count}"
+    )
+
+
+def detect_emotions(
+    text: Annotated[str, Field(description="The text to analyze for emotions")],
+) -> str:
+    """Detect specific emotions present in the text."""
+    words = set(re.findall(r"\b\w+\b", text.lower()))
+    detected = []
+    for emotion, keywords in EMOTION_PATTERNS.items():
+        matches = words.intersection(keywords)
+        if matches:
+            detected.append((emotion, len(matches), list(matches)))
+    if not detected:
+        return "No strong emotions detected in the text."
+    detected.sort(key=lambda x: x[1], reverse=True)
+    lines = ["Emotions detected:"]
+    for emotion, count, matches in detected:
+        intensity = "strong" if count >= 2 else "mild"
+        lines.append(f"  \u2022 {emotion.title()} ({intensity}): triggered by '{', '.join(matches)}'")
+    return "\n".join(lines)
+
+
+def extract_key_phrases(
+    text: Annotated[str, Field(description="The text to extract key phrases from")],
+) -> str:
+    """Extract and rate important phrases from text."""
+    sentences = re.split(r"[.!?]+", text)
+    results = []
+    for sent in sentences:
+        sent = sent.strip()
+        if len(sent) < 10:
+            continue
+        words = re.findall(r"\b\w+\b", sent.lower())
+        pos = sum(1 for w in words if w in POSITIVE_WORDS)
+        neg = sum(1 for w in words if w in NEGATIVE_WORDS)
+        if pos > neg:
+            rating = "positive"
+        elif neg > pos:
+            rating = "negative"
+        else:
+            rating = "neutral"
+        display = sent[:80] + "\u2026" if len(sent) > 80 else sent
+        results.append(f'  [{rating:^8}] "{display}"')
+    if not results:
+        return "No significant phrases found."
+    return "Key phrases:\n" + "\n".join(results[:5])
+
+
+def compare_sentiment(
+    text1: Annotated[str, Field(description="First text to compare")],
+    text2: Annotated[str, Field(description="Second text to compare")],
+) -> str:
+    """Compare sentiment between two texts."""
+    def score_text(text):
+        words = re.findall(r"\b\w+\b", text.lower())
+        pos = sum(1 for w in words if w in POSITIVE_WORDS)
+        neg = sum(1 for w in words if w in NEGATIVE_WORDS)
+        total = pos + neg
+        return (pos - neg) / total if total > 0 else 0
+
+    s1 = score_text(text1)
+    s2 = score_text(text2)
+
+    def label(s):
+        if s > 0.2:
+            return "positive"
+        if s < -0.2:
+            return "negative"
+        return "neutral"
+
+    diff = abs(s1 - s2)
+    if diff < 0.1:
+        comparison = "Both texts have similar sentiment"
+    elif s1 > s2:
+        comparison = f"Text 1 is more positive (by {diff:.2f})"
+    else:
+        comparison = f"Text 2 is more positive (by {diff:.2f})"
+    return (
+        f"Sentiment Comparison:\n"
+        f"  Text 1: {label(s1)} ({s1:+.2f})\n"
+        f"  Text 2: {label(s2)} ({s2:+.2f})\n"
+        f"  \u2192 {comparison}"
+    )
+
+
+def word_frequency(
+    text: Annotated[str, Field(description="The text to analyze")],
+    top_n: Annotated[int, Field(description="Number of top words to return")] = 10,
+) -> str:
+    """Get the most frequent meaningful words in text."""
+    stopwords = {
+        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "must", "shall", "can", "need", "dare",
+        "to", "of", "in", "for", "on", "with", "at", "by", "from", "up",
+        "about", "into", "over", "after", "it", "its", "this", "that",
+        "and", "but", "or", "nor", "so", "yet", "both", "either", "neither",
+        "i", "me", "my", "myself", "we", "our", "you", "your", "he", "she",
+    }
+    words = re.findall(r"\b\w+\b", text.lower())
+    words = [w for w in words if w not in stopwords and len(w) > 2]
+    counter = Counter(words)
+    top = counter.most_common(top_n)
+    if not top:
+        return "No significant words found."
+    lines = [f"Top {min(top_n, len(top))} words:"]
+    for word, count in top:
+        bar = "\u2588" * min(count, 20)
+        lines.append(f"  {word:15} {count:3} {bar}")
+    return "\n".join(lines)
+
+
+# ─── Demo Class ──────────────────────────────────────────────────
+
+class SentimentDemo:
+    def __init__(self, conn: FoundryConnection):
+        self.conn = conn
+        self.agent = self._create_agent()
+
+    def _create_agent(self) -> ChatAgent:
+        client = OpenAIChatClient(
+            api_key=self.conn.api_key,
+            base_url=self.conn.endpoint,
+            model_id=self.conn.model_id,
+        )
+        return ChatAgent(
+            chat_client=client,
+            name="SentimentAnalyst",
+            instructions=(
+                "You are a text analysis expert. Use the provided tools to analyze text:\n\n"
+                "  \u2022 analyze_sentiment: Get overall sentiment score\n"
+                "  \u2022 detect_emotions: Find specific emotions\n"
+                "  \u2022 extract_key_phrases: Find important phrases\n"
+                "  \u2022 compare_sentiment: Compare two texts\n"
+                "  \u2022 word_frequency: Find common words\n\n"
+                "When asked to analyze text, use the appropriate tool(s). "
+                "Summarize findings in plain language after using tools."
+            ),
+            tools=[analyze_sentiment, detect_emotions, extract_key_phrases, compare_sentiment, word_frequency],
+        )
+
+    async def run(self, prompt: str) -> dict:
+        import time
+        t0 = time.perf_counter()
+        result = await self.agent.run(prompt)
+        elapsed = time.perf_counter() - t0
+        text = re.sub(r"<tool_call>.*?</tool_call>\s*", "", str(result), flags=re.DOTALL).strip()
+        return {
+            "prompt": prompt,
+            "response": text,
+            "elapsed": round(elapsed, 2),
+            "tools_available": ["analyze_sentiment", "detect_emotions", "extract_key_phrases", "compare_sentiment", "word_frequency"],
+        }
+
+
+# ─── Register ────────────────────────────────────────────────────
+
+async def run_sentiment_demo(conn: FoundryConnection, prompt: str) -> dict:
+    demo = SentimentDemo(conn)
+    return await demo.run(prompt)
+
+
+register_demo(DemoInfo(
+    id="sentiment_analyzer",
+    name="Sentiment Analyzer",
+    description="Text analysis agent that detects sentiment, emotions, key phrases, and word frequency.",
+    icon="\ud83d\udcac",
+    category="Tool Calling",
+    runner=run_sentiment_demo,
+    tags=["tools", "function-calling", "text-analysis", "single-agent"],
+    suggested_prompt="Analyze this review: 'The product arrived quickly and the quality exceeded my expectations. However, the packaging was disappointing and customer support was slow to respond. Overall I'm satisfied but not thrilled.'",
+))
diff --git a/samples/python/agent-framework/src/app/demos/weather_tools.py b/samples/python/agent-framework/src/app/demos/weather_tools.py
new file mode 100644
index 00000000..89ab6579
--- /dev/null
+++ b/samples/python/agent-framework/src/app/demos/weather_tools.py
@@ -0,0 +1,177 @@
+"""
+Demo: Weather Tools
+───────────────────
+Demonstrates function/tool calling with multiple weather tools.
+"""
+
+from __future__ import annotations
+
+import random
+import re
+from typing import Annotated
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+from pydantic import Field
+
+from ..foundry_boot import FoundryConnection
+from .registry import DemoInfo, register_demo
+
+# ─── Mock weather data ───────────────────────────────────────────
+
+WEATHER_CONDITIONS = ["sunny", "cloudy", "rainy", "partly cloudy", "stormy", "foggy", "snowy"]
+CITIES_DATA = {
+    "london": {"lat": 51.5, "country": "UK"},
+    "new york": {"lat": 40.7, "country": "USA"},
+    "tokyo": {"lat": 35.7, "country": "Japan"},
+    "sydney": {"lat": -33.9, "country": "Australia"},
+    "paris": {"lat": 48.9, "country": "France"},
+    "seattle": {"lat": 47.6, "country": "USA"},
+    "berlin": {"lat": 52.5, "country": "Germany"},
+}
+
+
+def _mock_temp(city: str) -> int:
+    info = CITIES_DATA.get(city.lower(), {"lat": 45})
+    base = 25 - abs(info["lat"] - 25) * 0.3
+    return int(base + random.randint(-5, 5))
+
+
+# ─── Tool Functions ──────────────────────────────────────────────
+
+def get_current_weather(
+    city: Annotated[str, Field(description="Name of the city to get weather for")],
+) -> str:
+    """Get the current weather conditions for a city."""
+    city_lower = city.lower()
+    if city_lower not in CITIES_DATA:
+        return f"Weather data not available for '{city}'. Try: London, New York, Tokyo, Sydney, Paris, Seattle, Berlin."
+    temp = _mock_temp(city)
+    condition = random.choice(WEATHER_CONDITIONS)
+    humidity = random.randint(30, 90)
+    wind = random.randint(5, 30)
+    return (
+        f"Current weather in {city.title()}:\n"
+        f"  \u2022 Temperature: {temp}\u00b0C\n"
+        f"  \u2022 Condition: {condition}\n"
+        f"  \u2022 Humidity: {humidity}%\n"
+        f"  \u2022 Wind: {wind} km/h"
+    )
+
+
+def get_forecast(
+    city: Annotated[str, Field(description="Name of the city")],
+    days: Annotated[int, Field(description="Number of days (1-5)", ge=1, le=5)] = 3,
+) -> str:
+    """Get a weather forecast for the next N days."""
+    city_lower = city.lower()
+    if city_lower not in CITIES_DATA:
+        return f"Forecast not available for '{city}'."
+    lines = [f"Weather forecast for {city.title()} ({days} days):"]
+    for i in range(days):
+        temp = _mock_temp(city) + random.randint(-3, 3)
+        condition = random.choice(WEATHER_CONDITIONS)
+        lines.append(f"  Day {i + 1}: {temp}\u00b0C, {condition}")
+    return "\n".join(lines)
+
+
+def compare_weather(
+    city1: Annotated[str, Field(description="First city to compare")],
+    city2: Annotated[str, Field(description="Second city to compare")],
+) -> str:
+    """Compare current weather between two cities."""
+    temp1 = _mock_temp(city1)
+    temp2 = _mock_temp(city2)
+    cond1 = random.choice(WEATHER_CONDITIONS)
+    cond2 = random.choice(WEATHER_CONDITIONS)
+    diff = abs(temp1 - temp2)
+    warmer = city1 if temp1 > temp2 else city2
+    return (
+        f"Weather comparison:\n"
+        f"  {city1.title()}: {temp1}\u00b0C, {cond1}\n"
+        f"  {city2.title()}: {temp2}\u00b0C, {cond2}\n"
+        f"  \u2192 {warmer.title()} is {diff}\u00b0C warmer"
+    )
+
+
+def recommend_activity(
+    city: Annotated[str, Field(description="City to get activity recommendations for")],
+) -> str:
+    """Recommend outdoor activities based on current weather."""
+    temp = _mock_temp(city)
+    condition = random.choice(WEATHER_CONDITIONS)
+    activities = []
+    if "sunny" in condition or "partly" in condition:
+        activities.extend(["hiking", "picnic", "cycling", "sightseeing"])
+    if "cloudy" in condition:
+        activities.extend(["museum visit", "walking tour", "photography"])
+    if "rainy" in condition or "stormy" in condition:
+        activities.extend(["visit indoor attractions", "try local caf\u00e9s", "shopping"])
+    if temp > 25:
+        activities.extend(["swimming", "beach"])
+    if temp < 10:
+        activities.extend(["hot chocolate tour", "indoor sports"])
+    return (
+        f"Activity recommendations for {city.title()} ({temp}\u00b0C, {condition}):\n"
+        f"  \u2022 {', '.join(activities[:4])}"
+    )
+
+
+# ─── Demo Class ──────────────────────────────────────────────────
+
+class WeatherDemo:
+    def __init__(self, conn: FoundryConnection):
+        self.conn = conn
+        self.agent = self._create_agent()
+
+    def _create_agent(self) -> ChatAgent:
+        client = OpenAIChatClient(
+            api_key=self.conn.api_key,
+            base_url=self.conn.endpoint,
+            model_id=self.conn.model_id,
+        )
+        return ChatAgent(
+            chat_client=client,
+            name="WeatherAssistant",
+            instructions=(
+                "You are a helpful weather assistant. Use the available tools to:\n"
+                "  \u2022 Get current weather for cities\n"
+                "  \u2022 Provide forecasts\n"
+                "  \u2022 Compare weather between locations\n"
+                "  \u2022 Recommend activities\n\n"
+                "Always use the tools when asked about weather. Be concise."
+            ),
+            tools=[get_current_weather, get_forecast, compare_weather, recommend_activity],
+        )
+
+    async def run(self, prompt: str) -> dict:
+        import time
+        t0 = time.perf_counter()
+        result = await self.agent.run(prompt)
+        elapsed = time.perf_counter() - t0
+        text = re.sub(r"<tool_call>.*?</tool_call>\s*", "", str(result), flags=re.DOTALL).strip()
+        return {
+            "prompt": prompt,
+            "response": text,
+            "elapsed": round(elapsed, 2),
+            "tools_available": ["get_current_weather", "get_forecast", "compare_weather", "recommend_activity"],
+        }
+
+
+# ─── Register ────────────────────────────────────────────────────
+
+async def run_weather_demo(conn: FoundryConnection, prompt: str) -> dict:
+    demo = WeatherDemo(conn)
+    return await demo.run(prompt)
+
+
+register_demo(DemoInfo(
+    id="weather_tools",
+    name="Weather Tools",
+    description="Multi-tool agent that provides weather information, forecasts, city comparisons, and activity recommendations.",
+    icon="\ud83c\udf24\ufe0f",
+    category="Tool Calling",
+    runner=run_weather_demo,
+    tags=["tools", "function-calling", "single-agent"],
+    suggested_prompt="What's the weather in Seattle and San Francisco? Compare them and recommend activities for the warmer city.",
+))
diff --git a/samples/python/agent-framework/src/app/documents.py b/samples/python/agent-framework/src/app/documents.py
new file mode 100644
index 00000000..166925a5
--- /dev/null
+++ b/samples/python/agent-framework/src/app/documents.py
@@ -0,0 +1,89 @@
+"""
+Document Loader
+────────────────
+Load and chunk local text/markdown files for the retriever agent.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+SUPPORTED_EXTENSIONS = {".txt", ".md", ".markdown"}
+
+
+@dataclass
+class DocumentChunk:
+    """A chunk of text from a source file."""
+    source: str
+    text: str
+    index: int
+
+
+@dataclass
+class LoadedDocuments:
+    """All loaded document chunks and metadata."""
+    chunks: list[DocumentChunk] = field(default_factory=list)
+    file_count: int = 0
+    combined_text: str = ""
+
+
+def load_documents(
+    docs_path: str,
+    max_chars_per_chunk: int = 2000,
+) -> LoadedDocuments:
+    """Load all supported files from *docs_path* and split into chunks."""
+    folder = Path(docs_path)
+    if not folder.is_dir():
+        log.warning("Documents folder not found: %s", docs_path)
+        return LoadedDocuments()
+
+    chunks: list[DocumentChunk] = []
+    file_count = 0
+
+    for fp in sorted(folder.iterdir()):
+        if fp.suffix.lower() not in SUPPORTED_EXTENSIONS:
+            continue
+        try:
+            content = fp.read_text(encoding="utf-8")
+        except Exception as exc:
+            log.warning("Skipping %s: %s", fp.name, exc)
+            continue
+
+        file_count += 1
+
+        # Split into chunks of roughly max_chars_per_chunk on line boundaries
+        lines = content.splitlines(keepends=True)
+        buf: list[str] = []
+        buf_len = 0
+        idx = 0
+
+        for line in lines:
+            if buf_len + len(line) > max_chars_per_chunk and buf:
+                chunks.append(DocumentChunk(
+                    source=fp.name,
+                    text="".join(buf),
+                    index=idx,
+                ))
+                idx += 1
+                buf = []
+                buf_len = 0
+            buf.append(line)
+            buf_len += len(line)
+
+        if buf:
+            chunks.append(DocumentChunk(
+                source=fp.name,
+                text="".join(buf),
+                index=idx,
+            ))
+
+    combined = "\n\n".join(
+        f"[{c.source} chunk {c.index}]\n{c.text}" for c in chunks
+    )
+
+    log.info("Loaded %d files → %d chunks", file_count, len(chunks))
+    return LoadedDocuments(chunks=chunks, file_count=file_count, combined_text=combined)
diff --git a/samples/python/agent-framework/src/app/foundry_boot.py b/samples/python/agent-framework/src/app/foundry_boot.py
new file mode 100644
index 00000000..3bde3388
--- /dev/null
+++ b/samples/python/agent-framework/src/app/foundry_boot.py
@@ -0,0 +1,76 @@
+"""
+Foundry Local Bootstrapper
+───────────────────────────
+Manages Foundry Local service lifecycle: starts the service,
+checks/downloads/loads the model, and returns connection info.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from dataclasses import dataclass
+
+from dotenv import load_dotenv
+from foundry_local import FoundryLocalManager
+from rich.console import Console
+
+load_dotenv()
+log = logging.getLogger(__name__)
+console = Console()
+
+
+@dataclass
+class FoundryConnection:
+    """Connection details returned after bootstrap."""
+    endpoint: str
+    api_key: str
+    model_id: str
+    model_alias: str
+
+
+class FoundryLocalBootstrapper:
+    """Bootstrap Foundry Local: start service → resolve model → download → load."""
+
+    def __init__(self, alias: str | None = None):
+        self.alias = alias or os.getenv("MODEL_ALIAS", "qwen2.5-0.5b")
+
+    def bootstrap(self) -> FoundryConnection:
+        """Start Foundry Local and return a ready-to-use connection."""
+        endpoint_override = os.getenv("FOUNDRY_ENDPOINT")
+
+        if endpoint_override:
+            # External endpoint provided — skip local bootstrap
+            console.print(f"[cyan]Using external endpoint:[/] {endpoint_override}")
+            return FoundryConnection(
+                endpoint=endpoint_override,
+                api_key=os.getenv("FOUNDRY_API_KEY", "none"),
+                model_id=self.alias,
+                model_alias=self.alias,
+            )
+
+        console.print(f"[cyan]Bootstrapping Foundry Local with alias:[/] {self.alias}")
+
+        # FoundryLocalManager(alias) auto-starts service + resolves model
+        manager = FoundryLocalManager(self.alias)
+
+        endpoint = manager.endpoint
+        api_key = manager.api_key
+
+        # List cached models to find the resolved variant
+        cached = manager.list_cached_models()
+        model_id = self.alias
+        for m in cached:
+            if self.alias in str(m):
+                model_id = str(m)
+                break
+
+        console.print(f"[green]✓ Foundry Local ready[/]  endpoint={endpoint}")
+        log.info("Foundry Local ready: endpoint=%s model=%s", endpoint, model_id)
+
+        return FoundryConnection(
+            endpoint=endpoint,
+            api_key=api_key,
+            model_id=model_id,
+            model_alias=self.alias,
+        )
diff --git a/samples/python/agent-framework/src/app/orchestrator.py b/samples/python/agent-framework/src/app/orchestrator.py
new file mode 100644
index 00000000..dbea57d7
--- /dev/null
+++ b/samples/python/agent-framework/src/app/orchestrator.py
@@ -0,0 +1,182 @@
+"""
+Orchestrator
+─────────────
+Three orchestration patterns for the multi-agent workflow:
+  1. Sequential   — Planner → Retriever → Critic ⇄ Retriever → Writer
+  2. Concurrent   — Retriever ‖ ToolAgent (fan-out)
+  3. Full         — Combines sequential + concurrent
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+import time
+from typing import AsyncGenerator
+
+from .agents import (
+    create_critic,
+    create_planner,
+    create_retriever,
+    create_tool_agent,
+    create_writer,
+)
+from .documents import LoadedDocuments
+from .foundry_boot import FoundryConnection
+
+log = logging.getLogger(__name__)
+
+MAX_CRITIC_LOOPS = 2
+
+
+def _critic_found_gaps(critique: str) -> bool:
+    """Return True if the critic found gaps (i.e. didn't say NO_GAPS_FOUND)."""
+    return "NO_GAPS_FOUND" not in critique.upper().replace(" ", "")
+
+
+# ─── Streaming helpers ───────────────────────────────────────────
+
+StepEvent = dict  # {"type": str, ...}
+
+
+async def run_sequential(
+    conn: FoundryConnection,
+    docs: LoadedDocuments,
+    question: str,
+) -> AsyncGenerator[StepEvent, None]:
+    """Sequential pipeline: Planner → Retriever → Critic → Writer."""
+
+    planner = create_planner(conn)
+    retriever = create_retriever(conn, docs.combined_text)
+    critic = create_critic(conn)
+    writer = create_writer(conn)
+
+    # ── Planner ──
+    yield {"type": "step_start", "agent": "Planner", "description": "Breaking question into sub-tasks"}
+    t0 = time.perf_counter()
+    plan = await planner.run(question)
+    plan_text = str(plan)
+    elapsed = round(time.perf_counter() - t0, 2)
+    yield {"type": "step_done", "agent": "Planner", "output": plan_text, "elapsed": elapsed}
+
+    # ── Retriever ──
+    yield {"type": "step_start", "agent": "Retriever", "description": "Searching documents"}
+    t0 = time.perf_counter()
+    snippets = await retriever.run(plan_text)
+    snippets_text = str(snippets)
+    elapsed = round(time.perf_counter() - t0, 2)
+    yield {"type": "step_done", "agent": "Retriever", "output": snippets_text, "elapsed": elapsed}
+
+    # ── Critic loop ──
+    combined = f"Plan:\n{plan_text}\n\nRetrieved:\n{snippets_text}"
+    for loop in range(MAX_CRITIC_LOOPS):
+        yield {"type": "step_start", "agent": "Critic", "description": f"Reviewing for gaps (round {loop + 1})"}
+        t0 = time.perf_counter()
+        critique = await critic.run(combined)
+        critique_text = str(critique)
+        elapsed = round(time.perf_counter() - t0, 2)
+        yield {"type": "step_done", "agent": "Critic", "output": critique_text, "elapsed": elapsed}
+
+        if not _critic_found_gaps(critique_text):
+            break
+
+        # Re-retrieve with critic feedback
+        yield {"type": "step_start", "agent": "Retriever", "description": "Re-searching based on critic feedback"}
+        t0 = time.perf_counter()
+        snippets = await retriever.run(f"{plan_text}\n\nCritic feedback:\n{critique_text}")
+        snippets_text = str(snippets)
+        elapsed = round(time.perf_counter() - t0, 2)
+        yield {"type": "step_done", "agent": "Retriever", "output": snippets_text, "elapsed": elapsed}
+
+        combined = f"Plan:\n{plan_text}\n\nRetrieved:\n{snippets_text}\n\nCritique:\n{critique_text}"
+
+    # ── Writer ──
+    yield {"type": "step_start", "agent": "Writer", "description": "Synthesising final report"}
+    t0 = time.perf_counter()
+    report = await writer.run(combined)
+    report_text = str(report)
+    elapsed = round(time.perf_counter() - t0, 2)
+    yield {"type": "step_done", "agent": "Writer", "output": report_text, "elapsed": elapsed}
+
+    yield {"type": "complete", "report": report_text}
+
+
+async def run_concurrent_retrieval(
+    conn: FoundryConnection,
+    docs: LoadedDocuments,
+    plan_text: str,
+) -> AsyncGenerator[StepEvent, None]:
+    """Concurrent fan-out: Retriever ‖ ToolAgent on the same plan."""
+
+    retriever = create_retriever(conn, docs.combined_text)
+    tool_agent = create_tool_agent(conn)
+
+    yield {"type": "step_start", "agent": "Concurrent", "description": "Retriever + ToolAgent in parallel"}
+    t0 = time.perf_counter()
+    snippets_task = retriever.run(plan_text)
+    keywords_task = tool_agent.run(f"Analyze this text:\n{docs.combined_text[:3000]}")
+    snippets, keywords = await asyncio.gather(snippets_task, keywords_task)
+    elapsed = round(time.perf_counter() - t0, 2)
+
+    yield {
+        "type": "step_done",
+        "agent": "Concurrent",
+        "output": f"**Retriever:**\n{snippets}\n\n**ToolAgent:**\n{keywords}",
+        "elapsed": elapsed,
+    }
+
+
+async def run_full_workflow(
+    conn: FoundryConnection,
+    docs: LoadedDocuments,
+    question: str,
+) -> AsyncGenerator[StepEvent, None]:
+    """Full hybrid: Sequential plan → Concurrent retrieve → Sequential critique + write."""
+
+    planner = create_planner(conn)
+    critic = create_critic(conn)
+    writer = create_writer(conn)
+
+    # ── Planner (sequential) ──
+    yield {"type": "step_start", "agent": "Planner", "description": "Breaking question into sub-tasks"}
+    t0 = time.perf_counter()
+    plan = await planner.run(question)
+    plan_text = str(plan)
+    elapsed = round(time.perf_counter() - t0, 2)
+    yield {"type": "step_done", "agent": "Planner", "output": plan_text, "elapsed": elapsed}
+
+    # ── Concurrent fan-out ──
+    snippets_text = ""
+    keywords_text = ""
+    async for evt in run_concurrent_retrieval(conn, docs, plan_text):
+        yield evt
+        if evt["type"] == "step_done" and evt["agent"] == "Concurrent":
+            # Parse out retriever/tool output
+            output = evt.get("output", "")
+            snippets_text = output
+            keywords_text = ""
+
+    # ── Critic (sequential) ──
+    combined = f"Plan:\n{plan_text}\n\nRetrieved + Keywords:\n{snippets_text}"
+    for loop in range(MAX_CRITIC_LOOPS):
+        yield {"type": "step_start", "agent": "Critic", "description": f"Reviewing for gaps (round {loop + 1})"}
+        t0 = time.perf_counter()
+        critique = await critic.run(combined)
+        critique_text = str(critique)
+        elapsed = round(time.perf_counter() - t0, 2)
+        yield {"type": "step_done", "agent": "Critic", "output": critique_text, "elapsed": elapsed}
+
+        if not _critic_found_gaps(critique_text):
+            break
+        combined += f"\n\nCritique:\n{critique_text}"
+
+    # ── Writer (sequential) ──
+    yield {"type": "step_start", "agent": "Writer", "description": "Synthesising final report"}
+    t0 = time.perf_counter()
+    report = await writer.run(combined)
+    report_text = str(report)
+    elapsed = round(time.perf_counter() - t0, 2)
+    yield {"type": "step_done", "agent": "Writer", "output": report_text, "elapsed": elapsed}
+
+    yield {"type": "complete", "report": report_text}
diff --git a/samples/python/agent-framework/src/app/templates/index.html b/samples/python/agent-framework/src/app/templates/index.html
new file mode 100644
index 00000000..ee81f780
--- /dev/null
+++ b/samples/python/agent-framework/src/app/templates/index.html
@@ -0,0 +1,628 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta name="description" content="Local Research & Synthesis Desk - Multi-Agent Orchestration Demo using Microsoft Agent Framework and Foundry Local">
+    <meta name="theme-color" content="#0f1117">
+    <meta name="referrer" content="strict-origin-when-cross-origin">
+    <title>Local Research &amp; Synthesis Desk</title>
+    <style>
+        *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+        :root {
+            --bg: #0f1117; --surface: #1a1d27; --surface-raised: #232736;
+            --border: #2d3348; --text: #e4e6ed; --text-dim: #8b8fa3;
+            --accent: #5b7ff5; --accent-glow: rgba(91, 127, 245, 0.15);
+            --green: #3dd68c; --yellow: #f5c542; --red: #f55b5b;
+            --cyan: #4ac6e0; --purple: #a678f5; --orange: #f5a442;
+            --radius: 12px; --transition: 0.2s ease;
+        }
+        body { font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; min-height: 100vh; }
+        .app { max-width: 1200px; margin: 0 auto; padding: 24px; }
+        header { text-align: center; margin-bottom: 32px; padding: 32px 0 24px; }
+        header h1 { font-size: 2rem; font-weight: 700; background: linear-gradient(135deg, var(--accent), var(--cyan)); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; margin-bottom: 8px; }
+        header p { color: var(--text-dim); font-size: 0.95rem; }
+        .badge-row { display: flex; justify-content: center; gap: 8px; margin-top: 12px; flex-wrap: wrap; }
+        .badge { display: inline-block; padding: 3px 10px; border-radius: 16px; font-size: 0.75rem; font-weight: 600; border: 1px solid var(--border); background: var(--surface); }
+        .badge.model { color: var(--cyan); border-color: var(--cyan); }
+        .badge.status { color: var(--green); border-color: var(--green); }
+        .badge.docs { color: var(--purple); border-color: var(--purple); }
+
+        /* Tabs */
+        .tabs { display: flex; gap: 4px; margin-bottom: 24px; background: var(--surface); border-radius: var(--radius); padding: 4px; border: 1px solid var(--border); }
+        .tab-btn { flex: 1; padding: 10px 16px; background: transparent; color: var(--text-dim); border: none; border-radius: 8px; cursor: pointer; font-size: 0.9rem; font-weight: 500; transition: var(--transition); }
+        .tab-btn:hover { color: var(--text); background: var(--surface-raised); }
+        .tab-btn.active { background: var(--accent); color: #fff; }
+        .tab-panel { display: none; }
+        .tab-panel.active { display: block; }
+
+        /* Input */
+        .input-area { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 20px; margin-bottom: 24px; }
+        .input-row { display: flex; gap: 12px; align-items: stretch; }
+        .input-row textarea { flex: 1; background: var(--surface-raised); border: 1px solid var(--border); border-radius: 8px; color: var(--text); padding: 12px 16px; font-size: 0.95rem; font-family: inherit; resize: none; height: 52px; transition: border-color var(--transition); }
+        .input-row textarea:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 3px var(--accent-glow); }
+        .input-row textarea::placeholder { color: var(--text-dim); }
+        .controls { display: flex; gap: 12px; margin-top: 12px; align-items: center; flex-wrap: wrap; }
+        .mode-toggle { display: flex; background: var(--surface-raised); border-radius: 8px; border: 1px solid var(--border); overflow: hidden; }
+        .mode-btn { padding: 8px 16px; background: transparent; color: var(--text-dim); border: none; cursor: pointer; font-size: 0.85rem; font-weight: 500; transition: var(--transition); }
+        .mode-btn:hover { color: var(--text); }
+        .mode-btn.active { background: var(--accent); color: #fff; }
+        .btn-primary { background: var(--accent); color: #fff; border: none; border-radius: 8px; padding: 10px 24px; font-size: 0.95rem; font-weight: 600; cursor: pointer; transition: var(--transition); display: flex; align-items: center; gap: 8px; }
+        .btn-primary:hover { filter: brightness(1.1); transform: translateY(-1px); }
+        .btn-primary:disabled { opacity: 0.5; cursor: not-allowed; transform: none; }
+        .btn-secondary { background: var(--surface-raised); color: var(--text); border: 1px solid var(--border); border-radius: 8px; padding: 10px 20px; font-size: 0.9rem; font-weight: 500; cursor: pointer; transition: var(--transition); }
+        .btn-secondary:hover { border-color: var(--accent); }
+
+        /* Pipeline */
+        .pipeline { display: none; align-items: center; gap: 4px; margin-bottom: 24px; overflow-x: auto; padding: 12px 0; flex-wrap: wrap; justify-content: center; }
+        .pipe-node { display: flex; align-items: center; gap: 6px; padding: 8px 14px; border-radius: 8px; font-size: 0.8rem; font-weight: 600; background: var(--surface); border: 1px solid var(--border); color: var(--text-dim); transition: var(--transition); white-space: nowrap; }
+        .pipe-node.active { border-color: var(--yellow); color: var(--yellow); box-shadow: 0 0 12px rgba(245, 197, 66, 0.2); animation: pulse 1.5s ease-in-out infinite; }
+        .pipe-node.done { border-color: var(--green); color: var(--green); background: rgba(61, 214, 140, 0.08); }
+        .pipe-arrow { color: var(--text-dim); font-size: 1.2rem; }
+        .pipe-parallel { display: flex; flex-direction: column; gap: 4px; align-items: center; }
+        .pipe-parallel-label { font-size: 0.65rem; text-transform: uppercase; letter-spacing: 0.05em; color: var(--cyan); font-weight: 700; }
+        .pipe-parallel-group { display: flex; gap: 4px; }
+        .pipeline.visible { display: flex; }
+        @keyframes pulse { 0%, 100% { box-shadow: 0 0 8px rgba(245, 197, 66, 0.15); } 50% { box-shadow: 0 0 20px rgba(245, 197, 66, 0.35); } }
+
+        /* Steps */
+        .steps-container { display: flex; flex-direction: column; gap: 16px; }
+        .step-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; transition: var(--transition); }
+        .step-card.active { border-color: var(--yellow); }
+        .step-card.done { border-color: var(--green); }
+        .step-header { display: flex; align-items: center; justify-content: space-between; padding: 12px 16px; cursor: pointer; user-select: none; }
+        .step-header-left { display: flex; align-items: center; gap: 10px; }
+        .step-icon { width: 32px; height: 32px; border-radius: 8px; display: flex; align-items: center; justify-content: center; font-size: 1rem; }
+        .step-icon.planner { background: rgba(91, 127, 245, 0.15); }
+        .step-icon.retriever { background: rgba(61, 214, 140, 0.15); }
+        .step-icon.toolagent { background: rgba(74, 198, 224, 0.15); }
+        .step-icon.critic { background: rgba(245, 197, 66, 0.15); }
+        .step-icon.writer { background: rgba(166, 120, 245, 0.15); }
+        .step-icon.concurrent { background: rgba(245, 164, 66, 0.15); }
+        .step-name { font-weight: 600; font-size: 0.95rem; }
+        .step-description { font-size: 0.8rem; color: var(--text-dim); }
+        .step-meta { display: flex; align-items: center; gap: 10px; font-size: 0.8rem; color: var(--text-dim); }
+        .step-time { font-variant-numeric: tabular-nums; }
+        .step-status { width: 8px; height: 8px; border-radius: 50%; background: var(--border); }
+        .step-status.active { background: var(--yellow); animation: pulse-dot 1s infinite; }
+        .step-status.done { background: var(--green); }
+        @keyframes pulse-dot { 0%, 100% { opacity: 1; } 50% { opacity: 0.4; } }
+        .step-body { max-height: 0; overflow: hidden; transition: max-height 0.4s ease; }
+        .step-body.open { max-height: 50000px; }
+        .step-content { padding: 0 16px 16px; font-size: 0.88rem; line-height: 1.7; word-wrap: break-word; color: var(--text); border-top: 1px solid var(--border); padding-top: 12px; }
+        .step-content h1, .step-content h2, .step-content h3, .step-content h4 { color: var(--accent); margin: 0.8em 0 0.4em; font-weight: 600; }
+        .step-content p { margin: 0.4em 0; }
+        .step-content ul, .step-content ol { margin: 0.4em 0; padding-left: 1.5em; }
+        .step-content code { background: rgba(255,255,255,0.08); padding: 0.15em 0.4em; border-radius: 4px; font-family: 'Cascadia Code', 'Fira Code', monospace; font-size: 0.9em; }
+        .step-content pre { background: #0d0f14; border: 1px solid var(--border); border-radius: 8px; padding: 12px 16px; overflow-x: auto; margin: 0.6em 0; }
+        .step-content pre code { background: none; padding: 0; font-size: 0.85em; line-height: 1.5; }
+        .step-content strong { color: #e2e8f0; }
+
+        /* Report */
+        .report-card { background: var(--surface); border: 2px solid var(--accent); border-radius: var(--radius); padding: 24px; margin-top: 24px; display: none; }
+        .report-card.visible { display: block; }
+        .report-card h3 { font-size: 1.1rem; margin-bottom: 16px; display: flex; align-items: center; gap: 8px; }
+        .report-content { font-size: 0.92rem; line-height: 1.8; word-wrap: break-word; }
+        .report-content h1, .report-content h2, .report-content h3 { color: var(--accent); margin: 0.8em 0 0.4em; }
+        .report-content code { background: rgba(255,255,255,0.08); padding: 0.15em 0.4em; border-radius: 4px; font-family: 'Cascadia Code', monospace; }
+        .report-content pre { background: #0d0f14; border: 1px solid var(--border); border-radius: 8px; padding: 12px 16px; overflow-x: auto; margin: 0.6em 0; }
+        .report-content pre code { background: none; padding: 0; }
+        .report-content strong { color: #e2e8f0; }
+
+        /* Tool demo */
+        .tool-results { display: flex; flex-direction: column; gap: 12px; }
+        .tool-result-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 16px; }
+        .tool-result-card .test-name { font-weight: 600; margin-bottom: 8px; display: flex; align-items: center; gap: 8px; }
+        .tool-result-card .test-output { font-size: 0.88rem; color: var(--text-dim); font-family: 'Cascadia Code', monospace; white-space: pre-wrap; }
+        .test-badge { font-size: 0.75rem; padding: 2px 8px; border-radius: 12px; }
+        .test-badge.pass { background: rgba(61, 214, 140, 0.15); color: var(--green); }
+        .test-badge.fail { background: rgba(245, 91, 91, 0.15); color: var(--red); }
+
+        /* Demos */
+        .demo-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 16px; margin-bottom: 24px; }
+        .demo-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 16px; cursor: pointer; transition: var(--transition); }
+        .demo-card:hover { border-color: var(--accent); transform: translateY(-2px); box-shadow: 0 4px 12px rgba(91, 127, 245, 0.15); }
+        .demo-card.selected { border-color: var(--accent); background: var(--accent-glow); }
+        .demo-card-header { display: flex; align-items: center; gap: 12px; margin-bottom: 8px; }
+        .demo-card-icon { font-size: 1.5rem; width: 40px; height: 40px; display: flex; align-items: center; justify-content: center; background: var(--surface-raised); border-radius: 8px; }
+        .demo-card-title { font-weight: 600; font-size: 1rem; }
+        .demo-card-category { font-size: 0.7rem; color: var(--cyan); text-transform: uppercase; letter-spacing: 0.05em; }
+        .demo-card-description { font-size: 0.85rem; color: var(--text-dim); line-height: 1.5; margin-bottom: 12px; }
+        .demo-card-tags { display: flex; flex-wrap: wrap; gap: 4px; }
+        .demo-tag { font-size: 0.7rem; padding: 2px 8px; border-radius: 12px; background: var(--surface-raised); color: var(--text-dim); border: 1px solid var(--border); }
+        .demo-run-area { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 20px; display: none; }
+        .demo-run-area.visible { display: block; }
+        .demo-run-header { display: flex; align-items: center; gap: 12px; margin-bottom: 16px; }
+        .demo-run-icon { font-size: 1.5rem; }
+        .demo-run-title { font-weight: 600; font-size: 1.1rem; }
+        .demo-run-description { font-size: 0.9rem; color: var(--text-dim); margin-bottom: 16px; padding: 12px; background: var(--surface-raised); border-radius: 8px; }
+        .demo-tip { background: linear-gradient(135deg, rgba(91, 127, 245, 0.1), rgba(74, 198, 224, 0.1)); border: 1px solid var(--accent); border-radius: var(--radius); padding: 12px 16px; margin-bottom: 16px; display: none; }
+        .demo-tip.visible { display: block; }
+        .demo-tip-header { display: flex; align-items: center; gap: 8px; margin-bottom: 8px; }
+        .demo-tip-label { font-weight: 600; color: var(--accent); font-size: 0.85rem; flex: 1; }
+        .demo-tip-copy { background: var(--accent); color: #fff; border: none; border-radius: 6px; padding: 4px 12px; font-size: 0.8rem; font-weight: 600; cursor: pointer; transition: var(--transition); }
+        .demo-tip-copy:hover { filter: brightness(1.1); }
+        .demo-tip-text { font-size: 0.88rem; color: var(--text); background: var(--surface-raised); padding: 10px 12px; border-radius: 6px; font-family: 'Cascadia Code', monospace; white-space: pre-wrap; word-wrap: break-word; line-height: 1.5; }
+        .demo-results { margin-top: 24px; }
+
+        /* Spinner */
+        .spinner { display: inline-block; width: 16px; height: 16px; border: 2px solid var(--border); border-top-color: var(--accent); border-radius: 50%; animation: spin 0.8s linear infinite; }
+        @keyframes spin { to { transform: rotate(360deg); } }
+
+        /* Empty state */
+        .empty-state { text-align: center; padding: 60px 20px; color: var(--text-dim); }
+        .empty-state .icon { font-size: 3rem; margin-bottom: 16px; }
+        .empty-state h3 { font-size: 1.1rem; color: var(--text); margin-bottom: 8px; }
+        .empty-state p { font-size: 0.9rem; max-width: 400px; margin: 0 auto; }
+
+        /* About */
+        .about-section { line-height: 1.8; }
+        .about-section h3 { margin-bottom: 12px; }
+        .about-section .section-spaced { margin-top: 20px; }
+        .about-section ul { margin: 12px 0 12px 24px; }
+        .about-section table { width: 100%; border-collapse: collapse; margin-bottom: 16px; }
+        .about-section tr { border-bottom: 1px solid var(--border); }
+        .about-section td { padding: 8px; }
+        .about-section td:first-child { font-weight: 600; }
+        .about-section td:last-child { color: var(--text-dim); }
+        .about-section a { color: var(--accent); }
+        .section-desc { color: var(--text-dim); margin-bottom: 16px; }
+        .section-title-sm { margin-bottom: 8px; }
+
+        /* Responsive */
+        @media (max-width: 768px) {
+            .app { padding: 16px; }
+            header h1 { font-size: 1.5rem; }
+            .tabs { flex-wrap: wrap; }
+            .tab-btn { flex: 1 1 45%; font-size: 0.8rem; }
+            .input-row { flex-direction: column; }
+            .controls { flex-direction: column; align-items: stretch; }
+            .demo-grid { grid-template-columns: 1fr; }
+        }
+
+        /* Scrollbar */
+        ::-webkit-scrollbar { width: 6px; }
+        ::-webkit-scrollbar-track { background: var(--bg); }
+        ::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
+
+        /* Accessibility */
+        .visually-hidden { position: absolute; width: 1px; height: 1px; padding: 0; margin: -1px; overflow: hidden; clip: rect(0,0,0,0); white-space: nowrap; border: 0; }
+        @media (prefers-reduced-motion: reduce) { *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; } }
+    </style>
+</head>
+<body>
+    <div class="app">
+        <header>
+            <h1>&#x1F9E0; Local Research &amp; Synthesis Desk</h1>
+            <p>Multi-Agent Orchestration &bull; Microsoft Agent Framework + Foundry Local</p>
+            <div class="badge-row" id="badges">
+                <span class="badge status" id="badge-status">Connecting&hellip;</span>
+            </div>
+        </header>
+
+        <div class="tabs" role="tablist">
+            <button class="tab-btn active" data-tab="workflow" role="tab" aria-selected="true">&#x1F50D; Research</button>
+            <button class="tab-btn" data-tab="demos" role="tab" aria-selected="false">&#x1F3AE; Demos</button>
+            <button class="tab-btn" data-tab="tools" role="tab" aria-selected="false">&#x1F527; Tools</button>
+            <button class="tab-btn" data-tab="about" role="tab" aria-selected="false">&#x2139;&#xFE0F; About</button>
+        </div>
+
+        <!-- Workflow Tab -->
+        <div class="tab-panel active" id="tab-workflow" role="tabpanel">
+            <div class="input-area">
+                <div class="input-row">
+                    <textarea id="question" placeholder="Type your research question&hellip;" rows="1"></textarea>
+                    <button class="btn-primary" id="btn-run" onclick="runWorkflow()">&#x25B6; Run</button>
+                </div>
+                <div class="controls">
+                    <div class="mode-toggle">
+                        <button class="mode-btn active" data-mode="full" onclick="setMode('full', this)">Full</button>
+                        <button class="mode-btn" data-mode="sequential" onclick="setMode('sequential', this)">Sequential</button>
+                    </div>
+                    <button class="btn-secondary" onclick="clearResults()">Clear</button>
+                </div>
+            </div>
+            <div id="pipeline-full" class="pipeline">
+                <div class="pipe-node planner" id="pipe-Planner">&#x1F5C2; Planner</div>
+                <span class="pipe-arrow">&#x2192;</span>
+                <div class="pipe-parallel">
+                    <span class="pipe-parallel-label">concurrent</span>
+                    <div class="pipe-parallel-group">
+                        <div class="pipe-node retriever" id="pipe-Retriever">&#x1F50D; Retriever</div>
+                        <div class="pipe-node toolagent" id="pipe-ToolAgent">&#x1F527; ToolAgent</div>
+                    </div>
+                </div>
+                <span class="pipe-arrow">&#x2192;</span>
+                <div class="pipe-node critic" id="pipe-Critic">&#x1F9D0; Critic</div>
+                <span class="pipe-arrow">&#x2192;</span>
+                <div class="pipe-node writer" id="pipe-Writer">&#x270D;&#xFE0F; Writer</div>
+            </div>
+            <div id="pipeline-seq" class="pipeline">
+                <div class="pipe-node planner" id="pipe-seq-Planner">&#x1F5C2; Planner</div>
+                <span class="pipe-arrow">&#x2192;</span>
+                <div class="pipe-node retriever" id="pipe-seq-Retriever">&#x1F50D; Retriever</div>
+                <span class="pipe-arrow">&#x2192;</span>
+                <div class="pipe-node critic" id="pipe-seq-Critic">&#x1F9D0; Critic</div>
+                <span class="pipe-arrow">&#x2192;</span>
+                <div class="pipe-node writer" id="pipe-seq-Writer">&#x270D;&#xFE0F; Writer</div>
+            </div>
+            <div id="steps" class="steps-container">
+                <div class="empty-state" id="empty-state">
+                    <div class="icon">&#x1F4DD;</div>
+                    <h3>Ready to Research</h3>
+                    <p>Type a question above and click Run.</p>
+                </div>
+            </div>
+            <div class="report-card" id="report-card">
+                <h3>&#x1F4CB; Final Report</h3>
+                <div class="report-content" id="report-content"></div>
+            </div>
+        </div>
+
+        <!-- Demos Tab -->
+        <div class="tab-panel" id="tab-demos" role="tabpanel">
+            <div class="input-area">
+                <h3 class="section-title-sm">&#x1F3AE; MAF Tool Calling Demos</h3>
+                <p class="section-desc">Explore different ways the Microsoft Agent Framework uses tool calling and multi-agent orchestration.</p>
+            </div>
+            <div class="demo-grid" id="demo-grid"><div class="empty-state" id="demo-loading"><div class="spinner"></div><h3>Loading demos...</h3></div></div>
+            <div class="demo-run-area" id="demo-run-area">
+                <div class="demo-run-header">
+                    <span class="demo-run-icon" id="demo-run-icon">&#x1F527;</span>
+                    <div>
+                        <div class="demo-run-title" id="demo-run-title">Select a Demo</div>
+                        <div class="demo-card-category" id="demo-run-category">category</div>
+                    </div>
+                </div>
+                <div class="demo-run-description" id="demo-run-description">Select a demo from above.</div>
+                <div class="demo-tip" id="demo-tip">
+                    <div class="demo-tip-header">
+                        <span>&#x1F4A1;</span>
+                        <span class="demo-tip-label">Try this prompt:</span>
+                        <button class="demo-tip-copy" onclick="copySuggestedPrompt()">&#x1F4CB; Use This</button>
+                    </div>
+                    <div class="demo-tip-text" id="demo-tip-text"></div>
+                </div>
+                <div class="input-row">
+                    <textarea id="demo-prompt" placeholder="Enter your prompt..." rows="2"></textarea>
+                    <button class="btn-primary" id="btn-demo-run" onclick="runSelectedDemo()">&#x25B6; Run Demo</button>
+                </div>
+                <div class="demo-results" id="demo-results"></div>
+            </div>
+        </div>
+
+        <!-- Tools Tab -->
+        <div class="tab-panel" id="tab-tools" role="tabpanel">
+            <div class="input-area">
+                <p class="section-desc">Validate function/tool calling with Foundry Local + MAF.</p>
+                <button class="btn-primary" id="btn-tools" onclick="runToolDemo()">&#x1F527; Run Tool Demo</button>
+            </div>
+            <div id="tool-results" class="tool-results"></div>
+        </div>
+
+        <!-- About Tab -->
+        <div class="tab-panel" id="tab-about" role="tabpanel">
+            <div class="input-area about-section">
+                <h3>About This Demo</h3>
+                <p>This demo shows <strong>multi-agent orchestration</strong> using two Microsoft technologies:</p>
+                <ul>
+                    <li><strong>Microsoft Agent Framework (MAF)</strong> &mdash; unified SDK for building AI agents with tool calling, orchestration patterns, and session management.</li>
+                    <li><strong>Foundry Local</strong> &mdash; on-device AI inference runtime. Models run on your GPU/NPU/CPU with no cloud connection needed.</li>
+                </ul>
+                <h3 class="section-spaced">Agents</h3>
+                <table>
+                    <tr><td>&#x1F5C2; Planner</td><td>Breaks your question into 2-4 sub-tasks</td></tr>
+                    <tr><td>&#x1F50D; Retriever</td><td>Reads local files, extracts relevant snippets</td></tr>
+                    <tr><td>&#x1F527; ToolAgent</td><td>Computes word counts, keyword extraction via function calling</td></tr>
+                    <tr><td>&#x1F9D0; Critic</td><td>Reviews for gaps and contradictions</td></tr>
+                    <tr><td>&#x270D;&#xFE0F; Writer</td><td>Produces the final report with citations</td></tr>
+                </table>
+                <h3>Orchestration Patterns</h3>
+                <p><strong>Sequential</strong>: Planner &rarr; Retriever &rarr; Critic &rarr; Writer.</p>
+                <p><strong>Full</strong>: Sequential plan &rarr; Concurrent (Retriever + ToolAgent) &rarr; Critic &rarr; Writer.</p>
+                <h3 class="section-spaced">References</h3>
+                <ul>
+                    <li><a href="https://foundrylocal.ai" target="_blank" rel="noopener">Foundry Local</a></li>
+                    <li><a href="https://github.com/microsoft/Foundry-Local" target="_blank" rel="noopener">Foundry Local GitHub</a></li>
+                    <li><a href="https://learn.microsoft.com/en-us/agent-framework/" target="_blank" rel="noopener">Microsoft Agent Framework</a></li>
+                    <li><a href="https://pypi.org/project/agent-framework-core/" target="_blank" rel="noopener">agent-framework-core (PyPI)</a></li>
+                </ul>
+            </div>
+        </div>
+    </div>
+
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <script>
+        let currentMode = 'full';
+        let isRunning = false;
+
+        // Tabs
+        document.querySelectorAll('.tab-btn').forEach(btn => {
+            btn.addEventListener('click', () => {
+                document.querySelectorAll('.tab-btn').forEach(b => { b.classList.remove('active'); b.setAttribute('aria-selected', 'false'); });
+                document.querySelectorAll('.tab-panel').forEach(p => p.classList.remove('active'));
+                btn.classList.add('active');
+                btn.setAttribute('aria-selected', 'true');
+                document.getElementById('tab-' + btn.dataset.tab).classList.add('active');
+            });
+        });
+
+        function setMode(mode, btn) {
+            currentMode = mode;
+            document.querySelectorAll('.mode-btn').forEach(b => b.classList.remove('active'));
+            btn.classList.add('active');
+        }
+
+        // Status
+        async function fetchStatus() {
+            try {
+                const res = await fetch('/api/status');
+                const d = await res.json();
+                if (d.status === 'ok') {
+                    document.getElementById('badges').innerHTML = `
+                        <span class="badge status">&#x2705; Connected</span>
+                        <span class="badge model">&#x1F916; ${d.model_alias}</span>
+                        <span class="badge docs">&#x1F4C4; ${d.documents} documents</span>`;
+                }
+            } catch (e) {
+                document.getElementById('badges').innerHTML = '<span class="badge" style="color:var(--yellow);border-color:var(--yellow);">&#x23F3; Bootstrapping&hellip;</span>';
+                setTimeout(fetchStatus, 5000);
+            }
+        }
+        fetchStatus();
+
+        // Workflow
+        function runWorkflow() {
+            const question = document.getElementById('question').value.trim();
+            if (!question || isRunning) return;
+            isRunning = true;
+            const btn = document.getElementById('btn-run');
+            btn.disabled = true;
+            btn.innerHTML = '<span class="spinner"></span> Running&hellip;';
+            clearResults();
+            const pFull = document.getElementById('pipeline-full');
+            const pSeq = document.getElementById('pipeline-seq');
+            pFull.classList.toggle('visible', currentMode === 'full');
+            pSeq.classList.toggle('visible', currentMode === 'sequential');
+            document.getElementById('empty-state').style.display = 'none';
+
+            fetch('/api/run', { method: 'POST', headers: {'Content-Type':'application/json'}, body: JSON.stringify({question, mode: currentMode}) })
+            .then(response => {
+                const reader = response.body.getReader();
+                const decoder = new TextDecoder();
+                let buffer = '';
+                function read() {
+                    reader.read().then(({done, value}) => {
+                        if (done) { finish(); return; }
+                        buffer += decoder.decode(value, {stream: true});
+                        const lines = buffer.split('\n');
+                        buffer = lines.pop();
+                        for (const line of lines) {
+                            if (line.startsWith('data: ')) {
+                                try { handleEvent(JSON.parse(line.slice(6))); } catch(e) {}
+                            }
+                        }
+                        read();
+                    });
+                }
+                read();
+            }).catch(err => { addStepCard('Error', err.message, 'error'); finish(); });
+        }
+
+        function handleEvent(evt) {
+            switch(evt.type) {
+                case 'step_start': setPipeState(evt.agent, 'active'); addStepCard(evt.agent, evt.description || 'Processing&hellip;', 'active'); break;
+                case 'step_done': setPipeState(evt.agent, 'done'); updateStepCard(evt.agent, String(evt.output||''), evt.elapsed, 'done'); break;
+                case 'complete': showReport(String(evt.report||'')); break;
+                case 'error': addStepCard('Error', String(evt.message||'Unknown error'), 'error'); break;
+            }
+        }
+
+        function finish() {
+            isRunning = false;
+            const btn = document.getElementById('btn-run');
+            btn.disabled = false;
+            btn.innerHTML = '&#x25B6; Run';
+        }
+
+        function setPipeState(agent, state) {
+            [`#pipe-${agent}`, `#pipe-seq-${agent}`].forEach(sel => {
+                const el = document.querySelector(sel);
+                if (el) el.className = el.className.replace(/ active| done/g, '') + ' ' + state;
+            });
+            if (agent === 'Concurrent') { setPipeState('Retriever', state); setPipeState('ToolAgent', state); }
+        }
+
+        const agentIcons = { Planner:'&#x1F5C2;', Retriever:'&#x1F50D;', Concurrent:'&#x26A1;', ToolAgent:'&#x1F527;', Critic:'&#x1F9D0;', Writer:'&#x270D;&#xFE0F;', Error:'&#x274C;' };
+        const agentClasses = { Planner:'planner', Retriever:'retriever', Concurrent:'concurrent', ToolAgent:'toolagent', Critic:'critic', Writer:'writer', Error:'' };
+
+        function addStepCard(agent, desc, state) {
+            const container = document.getElementById('steps');
+            const id = `step-${agent}`;
+            if (document.getElementById(id)) return;
+            const card = document.createElement('div');
+            card.className = `step-card ${state}`;
+            card.id = id;
+            card.innerHTML = `
+                <div class="step-header" onclick="toggleStep('${id}')">
+                    <div class="step-header-left">
+                        <div class="step-icon ${agentClasses[agent]||''}">${agentIcons[agent]||'&#x25CF;'}</div>
+                        <div><div class="step-name">${escapeHtml(agent)}</div><div class="step-description">${escapeHtml(desc)}</div></div>
+                    </div>
+                    <div class="step-meta"><span class="step-time" id="${id}-time"></span><div class="step-status ${state}"></div></div>
+                </div>
+                <div class="step-body" id="${id}-body"><div class="step-content" id="${id}-content">${state==='active'?'<span class="spinner"></span> Working&hellip;':escapeHtml(desc)}</div></div>`;
+            container.appendChild(card);
+        }
+
+        function updateStepCard(agent, output, elapsed, state) {
+            const id = `step-${agent}`;
+            if (!document.getElementById(id)) addStepCard(agent, '', state);
+            const c = document.getElementById(id);
+            if (c) {
+                c.className = `step-card ${state}`;
+                const s = c.querySelector('.step-status'); if (s) s.className = `step-status ${state}`;
+                const t = document.getElementById(`${id}-time`); if (t && elapsed != null) t.textContent = `${elapsed}s`;
+                const content = document.getElementById(`${id}-content`); if (content) content.innerHTML = renderMarkdown(output);
+                const body = document.getElementById(`${id}-body`); if (body) body.classList.add('open');
+            }
+        }
+
+        function toggleStep(id) { const body = document.getElementById(id+'-body'); if (body) body.classList.toggle('open'); }
+
+        function showReport(text) {
+            document.getElementById('report-content').innerHTML = renderMarkdown(text);
+            document.getElementById('report-card').classList.add('visible');
+        }
+
+        function clearResults() {
+            document.getElementById('steps').innerHTML = '<div class="empty-state" id="empty-state"><div class="icon">&#x1F4DD;</div><h3>Ready to Research</h3><p>Type a question above and click Run.</p></div>';
+            document.getElementById('report-card').classList.remove('visible');
+            document.getElementById('pipeline-full').classList.remove('visible');
+            document.getElementById('pipeline-seq').classList.remove('visible');
+            document.querySelectorAll('.pipe-node').forEach(n => { n.className = n.className.replace(/ active| done| error/g, ''); });
+        }
+
+        // Tool Demo
+        async function runToolDemo() {
+            const btn = document.getElementById('btn-tools');
+            btn.disabled = true;
+            btn.innerHTML = '<span class="spinner"></span> Running&hellip;';
+            document.getElementById('tool-results').innerHTML = '';
+            try {
+                const res = await fetch('/api/tools', {method:'POST'});
+                const d = await res.json();
+                const container = document.getElementById('tool-results');
+                if (d.status === 'ok') {
+                    d.results.forEach(r => {
+                        const txt = typeof r.result === 'object' ? JSON.stringify(r.result, null, 2) : String(r.result||'');
+                        container.innerHTML += `<div class="tool-result-card"><div class="test-name">${escapeHtml(r.test)} <span class="test-badge ${r.status}">${r.status==='pass'?'&#x2705; PASS':'&#x274C; FAIL'}</span> ${r.elapsed?`<span style="color:var(--text-dim);font-weight:400;font-size:0.8rem;">${r.elapsed}s</span>`:''}</div><div class="test-output">${escapeHtml(txt)}</div></div>`;
+                    });
+                }
+            } catch(e) {
+                document.getElementById('tool-results').innerHTML = `<div class="tool-result-card" style="border-color:var(--red);"><div class="test-name">&#x274C; Error</div><div class="test-output">${escapeHtml(e.message)}</div></div>`;
+            }
+            btn.disabled = false;
+            btn.innerHTML = '&#x1F527; Run Tool Demo';
+        }
+
+        function escapeHtml(s) {
+            if (s == null) return '';
+            if (typeof s === 'object') { try { s = JSON.stringify(s,null,2); } catch(e) { s = String(s); } } else { s = String(s); }
+            const d = document.createElement('div'); d.textContent = s; return d.innerHTML;
+        }
+
+        function renderMarkdown(s) {
+            if (s == null) return '';
+            if (typeof s === 'object') { try { s = JSON.stringify(s,null,2); } catch(e) { s = String(s); } } else { s = String(s); }
+            if (typeof marked !== 'undefined' && marked.parse) { try { return marked.parse(s); } catch(e) {} }
+            return escapeHtml(s);
+        }
+
+        document.getElementById('question').addEventListener('keydown', e => { if (e.key==='Enter'&&!e.shiftKey) { e.preventDefault(); runWorkflow(); } });
+        document.getElementById('demo-prompt').addEventListener('keydown', e => { if (e.key==='Enter'&&!e.shiftKey) { e.preventDefault(); runSelectedDemo(); } });
+
+        // Demos
+        let demos = [], selectedDemo = null, isDemoRunning = false;
+
+        async function loadDemos() {
+            try {
+                const res = await fetch('/api/demos');
+                const d = await res.json();
+                if (d.status === 'ok') { demos = d.demos; renderDemoGrid(); }
+            } catch(e) {}
+        }
+
+        function renderDemoGrid() {
+            const container = document.getElementById('demo-grid');
+            if (!demos.length) { container.innerHTML = '<div class="empty-state"><h3>No demos available</h3></div>'; return; }
+            container.innerHTML = demos.map(d => `
+                <div class="demo-card" data-demo-id="${d.id}" onclick="selectDemo('${d.id}')" tabindex="0">
+                    <div class="demo-card-header"><span class="demo-card-icon">${d.icon}</span><div><div class="demo-card-title">${escapeHtml(d.name)}</div><div class="demo-card-category">${escapeHtml(d.category)}</div></div></div>
+                    <div class="demo-card-description">${escapeHtml(d.description)}</div>
+                    <div class="demo-card-tags">${d.tags.map(t=>`<span class="demo-tag">${escapeHtml(t)}</span>`).join('')}</div>
+                </div>`).join('');
+        }
+
+        function selectDemo(demoId) {
+            const demo = demos.find(d => d.id === demoId);
+            if (!demo) return;
+            selectedDemo = demo;
+            document.querySelectorAll('.demo-card').forEach(c => { c.classList.toggle('selected', c.dataset.demoId === demoId); });
+            const ra = document.getElementById('demo-run-area'); ra.classList.add('visible');
+            document.getElementById('demo-run-icon').textContent = demo.icon;
+            document.getElementById('demo-run-title').textContent = demo.name;
+            document.getElementById('demo-run-category').textContent = demo.category;
+            document.getElementById('demo-run-description').textContent = demo.description;
+            const tip = document.getElementById('demo-tip');
+            if (demo.suggested_prompt) { document.getElementById('demo-tip-text').textContent = demo.suggested_prompt; tip.classList.add('visible'); }
+            else { tip.classList.remove('visible'); }
+            document.getElementById('demo-prompt').value = '';
+            document.getElementById('demo-results').innerHTML = '';
+        }
+
+        function copySuggestedPrompt() {
+            if (!selectedDemo || !selectedDemo.suggested_prompt) return;
+            const f = document.getElementById('demo-prompt');
+            f.value = selectedDemo.suggested_prompt;
+            f.focus();
+            const btn = document.querySelector('.demo-tip-copy');
+            const orig = btn.innerHTML;
+            btn.innerHTML = '&#x2705; Copied!';
+            btn.style.background = 'var(--green)';
+            setTimeout(() => { btn.innerHTML = orig; btn.style.background = ''; }, 1500);
+        }
+
+        function runSelectedDemo() {
+            if (!selectedDemo || isDemoRunning) return;
+            const prompt = document.getElementById('demo-prompt').value.trim();
+            if (!prompt) { alert('Please enter a prompt.'); return; }
+            isDemoRunning = true;
+            const btn = document.getElementById('btn-demo-run');
+            btn.disabled = true;
+            btn.innerHTML = '<span class="spinner"></span> Running...';
+            const rc = document.getElementById('demo-results');
+            rc.innerHTML = '<div class="steps-container" id="demo-steps"><div class="step-card active"><div class="step-header"><div class="step-header-left"><div class="step-icon" style="background:var(--accent-glow);">'+selectedDemo.icon+'</div><div><div class="step-name">'+escapeHtml(selectedDemo.name)+'</div><div class="step-description">Processing...</div></div></div><div class="step-meta"><div class="step-status active"></div></div></div><div class="step-body open"><div class="step-content"><span class="spinner"></span> Working...</div></div></div></div>';
+
+            fetch(`/api/demo/${selectedDemo.id}/run`, { method:'POST', headers:{'Content-Type':'application/json'}, body: JSON.stringify({prompt}) })
+            .then(response => {
+                const reader = response.body.getReader();
+                const decoder = new TextDecoder();
+                let buffer = '';
+                function read() {
+                    reader.read().then(({done,value}) => {
+                        if (done) { finishDemo(); return; }
+                        buffer += decoder.decode(value, {stream:true});
+                        const lines = buffer.split('\n');
+                        buffer = lines.pop();
+                        for (const line of lines) {
+                            if (line.startsWith('data: ')) { try { handleDemoEvent(JSON.parse(line.slice(6))); } catch(e){} }
+                        }
+                        read();
+                    });
+                }
+                read();
+            }).catch(err => { rc.innerHTML = `<div class="tool-result-card" style="border-color:var(--red);"><div class="test-name">&#x274C; Error</div><div class="test-output">${escapeHtml(err.message)}</div></div>`; finishDemo(); });
+        }
+
+        function handleDemoEvent(evt) {
+            const container = document.getElementById('demo-steps');
+            if (!container) return;
+            if (evt.type === 'step_done') {
+                container.innerHTML = `<div class="step-card done"><div class="step-header" onclick="toggleStep('demo-step-main')"><div class="step-header-left"><div class="step-icon" style="background:rgba(61,214,140,0.15);">${selectedDemo?selectedDemo.icon:'&#x2705;'}</div><div><div class="step-name">${escapeHtml(selectedDemo?selectedDemo.name:'Done')}</div><div class="step-description">Completed</div></div></div><div class="step-meta"><span class="step-time">${evt.elapsed!=null?evt.elapsed+'s':''}</span><div class="step-status done"></div></div></div><div class="step-body open" id="demo-step-main-body"><div class="step-content">${renderMarkdown(evt.output)}</div></div></div>`;
+            } else if (evt.type === 'error') {
+                container.innerHTML = `<div class="tool-result-card" style="border-color:var(--red);"><div class="test-name">&#x274C; Error</div><div class="test-output">${escapeHtml(evt.message)}</div></div>`;
+            }
+        }
+
+        function finishDemo() {
+            isDemoRunning = false;
+            const btn = document.getElementById('btn-demo-run');
+            btn.disabled = false;
+            btn.innerHTML = '&#x25B6; Run Demo';
+        }
+
+        loadDemos();
+    </script>
+</body>
+</html>
diff --git a/samples/python/agent-framework/src/app/tool_demo.py b/samples/python/agent-framework/src/app/tool_demo.py
new file mode 100644
index 00000000..738572d1
--- /dev/null
+++ b/samples/python/agent-framework/src/app/tool_demo.py
@@ -0,0 +1,97 @@
+"""
+Tool Demo
+──────────
+Standalone validation: run tool-calling with Foundry Local to verify
+that both direct invocation and LLM-driven function calling work.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+
+from agent_framework import ChatAgent
+from agent_framework.openai import OpenAIChatClient
+from rich.console import Console
+
+from .agents import extract_keywords, word_count
+from .foundry_boot import FoundryConnection
+
+console = Console()
+
+
+async def run_tool_demo(conn: FoundryConnection) -> list[dict]:
+    """Run direct + LLM-driven tool tests and return results."""
+    results: list[dict] = []
+
+    # ── Direct function calls ────────────────────────────
+    t0 = time.perf_counter()
+    wc = word_count("Foundry Local runs models on device")
+    results.append({
+        "test": "Direct: word_count",
+        "result": wc,
+        "status": "pass" if "6" in wc else "fail",
+        "elapsed": round(time.perf_counter() - t0, 4),
+    })
+
+    t0 = time.perf_counter()
+    kw = extract_keywords("foundry foundry local local model model agent")
+    results.append({
+        "test": "Direct: extract_keywords",
+        "result": kw,
+        "status": "pass" if "foundry" in kw.lower() else "fail",
+        "elapsed": round(time.perf_counter() - t0, 4),
+    })
+
+    # ── LLM-driven tool call ─────────────────────────────
+    client = OpenAIChatClient(
+        api_key=conn.api_key,
+        base_url=conn.endpoint,
+        model_id=conn.model_id,
+    )
+    agent = ChatAgent(
+        chat_client=client,
+        name="ToolTester",
+        instructions="Use the provided tools to answer.",
+        tools=[word_count, extract_keywords],
+    )
+
+    t0 = time.perf_counter()
+    try:
+        result = await agent.run("Count the words in: 'hello world from foundry local'")
+        result_text = str(result)
+        results.append({
+            "test": "LLM: word_count via agent",
+            "result": result_text,
+            "status": "pass" if any(c.isdigit() for c in result_text) else "fail",
+            "elapsed": round(time.perf_counter() - t0, 2),
+        })
+    except Exception as exc:
+        results.append({
+            "test": "LLM: word_count via agent",
+            "result": str(exc),
+            "status": "fail",
+            "elapsed": round(time.perf_counter() - t0, 2),
+        })
+
+    t0 = time.perf_counter()
+    try:
+        result = await agent.run(
+            "Extract keywords from: 'foundry foundry local local model model inference inference'"
+        )
+        result_text = str(result)
+        results.append({
+            "test": "LLM: extract_keywords via agent",
+            "result": result_text,
+            "status": "pass" if "foundry" in result_text.lower() or "keyword" in result_text.lower() else "fail",
+            "elapsed": round(time.perf_counter() - t0, 2),
+        })
+    except Exception as exc:
+        results.append({
+            "test": "LLM: extract_keywords via agent",
+            "result": str(exc),
+            "status": "fail",
+            "elapsed": round(time.perf_counter() - t0, 2),
+        })
+
+    return results
diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py
new file mode 100644
index 00000000..5074e3b0
--- /dev/null
+++ b/samples/python/agent-framework/src/app/web.py
@@ -0,0 +1,179 @@
+"""
+Flask Web Server
+─────────────────
+Serves the web UI and exposes API endpoints with SSE streaming
+for real-time agent pipeline visualisation.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import traceback
+
+from flask import Flask, Response, jsonify, render_template, request
+
+from .documents import load_documents
+from .foundry_boot import FoundryConnection, FoundryLocalBootstrapper
+from .orchestrator import run_full_workflow, run_sequential
+from .tool_demo import run_tool_demo
+
+log = logging.getLogger(__name__)
+
+# ── Global state (set in create_app) ────────────────────────────
+_conn: FoundryConnection | None = None
+_docs_path: str = "./data"
+_docs = None
+
+
+def create_app(conn: FoundryConnection | None = None) -> Flask:
+    """Create and configure the Flask application."""
+    global _conn, _docs, _docs_path
+
+    app = Flask(__name__, template_folder="templates")
+
+    _docs_path = os.getenv("DOCS_PATH", "./data")
+
+    if conn is not None:
+        _conn = conn
+    else:
+        boot = FoundryLocalBootstrapper()
+        _conn = boot.bootstrap()
+
+    _docs = load_documents(_docs_path)
+
+    # ── Routes ───────────────────────────────────────────
+
+    @app.route("/")
+    def index():
+        return render_template("index.html")
+
+    @app.route("/api/status")
+    def api_status():
+        if _conn is None:
+            return jsonify({"status": "error", "message": "Not bootstrapped"})
+        return jsonify({
+            "status": "ok",
+            "model_alias": _conn.model_alias,
+            "model_id": _conn.model_id,
+            "endpoint": _conn.endpoint,
+            "documents": _docs.file_count if _docs else 0,
+        })
+
+    @app.route("/api/run", methods=["POST"])
+    def api_run():
+        """Run the research workflow and stream events via SSE."""
+        if _conn is None:
+            return jsonify({"status": "error", "message": "Not bootstrapped"}), 503
+
+        data = request.get_json(silent=True) or {}
+        question = data.get("question", "").strip()
+        mode = data.get("mode", "full")
+
+        if not question:
+            return jsonify({"status": "error", "message": "No question provided"}), 400
+
+        def generate():
+            loop = asyncio.new_event_loop()
+            try:
+                if mode == "sequential":
+                    gen = run_sequential(_conn, _docs, question)
+                else:
+                    gen = run_full_workflow(_conn, _docs, question)
+
+                async def drain():
+                    events = []
+                    async for evt in gen:
+                        events.append(evt)
+                    return events
+
+                events = loop.run_until_complete(drain())
+                for evt in events:
+                    yield f"data: {json.dumps(evt)}\n\n"
+            except Exception as exc:
+                log.exception("Workflow error")
+                yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n"
+            finally:
+                loop.close()
+
+        return Response(generate(), mimetype="text/event-stream")
+
+    @app.route("/api/tools", methods=["POST"])
+    def api_tools():
+        """Run the tool demo and return results."""
+        if _conn is None:
+            return jsonify({"status": "error", "message": "Not bootstrapped"}), 503
+
+        loop = asyncio.new_event_loop()
+        try:
+            results = loop.run_until_complete(run_tool_demo(_conn))
+            return jsonify({"status": "ok", "results": results})
+        except Exception as exc:
+            log.exception("Tool demo error")
+            return jsonify({"status": "error", "message": str(exc)}), 500
+        finally:
+            loop.close()
+
+    @app.route("/api/documents")
+    def api_documents():
+        return jsonify({
+            "status": "ok",
+            "file_count": _docs.file_count if _docs else 0,
+            "chunk_count": len(_docs.chunks) if _docs else 0,
+            "files": list({c.source for c in _docs.chunks}) if _docs else [],
+        })
+
+    @app.route("/api/demos")
+    def api_demos():
+        from .demos import list_demos
+        return jsonify({
+            "status": "ok",
+            "demos": [
+                {
+                    "id": d.id,
+                    "name": d.name,
+                    "description": d.description,
+                    "icon": d.icon,
+                    "category": d.category,
+                    "tags": d.tags,
+                    "suggested_prompt": d.suggested_prompt,
+                }
+                for d in list_demos()
+            ],
+        })
+
+    @app.route("/api/demo/<demo_id>/run", methods=["POST"])
+    def api_demo_run(demo_id: str):
+        """Run a specific demo and stream results via SSE."""
+        from .demos import get_demo
+
+        if _conn is None:
+            return jsonify({"status": "error", "message": "Not bootstrapped"}), 503
+
+        demo = get_demo(demo_id)
+        if demo is None:
+            return jsonify({"status": "error", "message": f"Demo '{demo_id}' not found"}), 404
+
+        data = request.get_json(silent=True) or {}
+        prompt = data.get("prompt", "").strip()
+        if not prompt:
+            return jsonify({"status": "error", "message": "No prompt provided"}), 400
+
+        def generate():
+            loop = asyncio.new_event_loop()
+            try:
+                yield f"data: {json.dumps({'type': 'step_start', 'agent': demo.name})}\n\n"
+                result = loop.run_until_complete(demo.runner(_conn, prompt))
+                yield f"data: {json.dumps({'type': 'step_done', 'agent': demo.name, 'output': result.get('response', ''), 'elapsed': result.get('elapsed')})}\n\n"
+                yield f"data: {json.dumps({'type': 'complete', 'report': result.get('response', '')})}\n\n"
+            except Exception as exc:
+                log.exception("Demo error: %s", demo_id)
+                yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n"
+            finally:
+                loop.close()
+
+        return Response(generate(), mimetype="text/event-stream")
+
+    return app
diff --git a/samples/python/agent-framework/tests/test_smoke.py b/samples/python/agent-framework/tests/test_smoke.py
new file mode 100644
index 00000000..9b86ec6e
--- /dev/null
+++ b/samples/python/agent-framework/tests/test_smoke.py
@@ -0,0 +1,88 @@
+"""
+Smoke Tests
+────────────
+Quick tests to verify imports, document loading, and bootstrapper configuration.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import pytest
+
+
+def test_imports():
+    """All core modules can be imported."""
+    from src.app.agents import create_planner, create_retriever, create_critic, create_writer, create_tool_agent
+    from src.app.documents import load_documents, LoadedDocuments
+    from src.app.foundry_boot import FoundryLocalBootstrapper, FoundryConnection
+    from src.app.orchestrator import run_sequential, run_full_workflow
+    from src.app.web import create_app
+
+
+def test_document_loader():
+    """Document loader reads data/ folder and produces chunks."""
+    from src.app.documents import load_documents
+
+    data_dir = Path(__file__).resolve().parent.parent / "data"
+    if not data_dir.is_dir():
+        pytest.skip("data/ directory not found")
+
+    docs = load_documents(str(data_dir))
+    assert docs.file_count > 0, "Expected at least one document file"
+    assert len(docs.chunks) > 0, "Expected at least one chunk"
+    assert len(docs.combined_text) > 0, "Expected non-empty combined text"
+
+
+def test_document_loader_missing_dir():
+    """Document loader returns empty result for missing directory."""
+    from src.app.documents import load_documents
+
+    docs = load_documents("/nonexistent/path/nothing/here")
+    assert docs.file_count == 0
+    assert len(docs.chunks) == 0
+
+
+def test_foundry_connection_dataclass():
+    """FoundryConnection stores fields correctly."""
+    from src.app.foundry_boot import FoundryConnection
+
+    conn = FoundryConnection(
+        endpoint="http://localhost:5273",
+        api_key="test-key",
+        model_id="phi-4-mini-onnx-cpu",
+        model_alias="phi-4-mini",
+    )
+    assert conn.endpoint == "http://localhost:5273"
+    assert conn.model_alias == "phi-4-mini"
+
+
+def test_bootstrapper_uses_env_override(monkeypatch):
+    """Bootstrapper returns external endpoint when FOUNDRY_ENDPOINT is set."""
+    from src.app.foundry_boot import FoundryLocalBootstrapper
+
+    monkeypatch.setenv("FOUNDRY_ENDPOINT", "http://remote:8080/v1")
+    monkeypatch.setenv("FOUNDRY_API_KEY", "my-key")
+
+    boot = FoundryLocalBootstrapper(alias="test-model")
+    conn = boot.bootstrap()
+
+    assert conn.endpoint == "http://remote:8080/v1"
+    assert conn.api_key == "my-key"
+    assert conn.model_alias == "test-model"
+
+
+def test_demo_registry():
+    """Demo registry imports and has demos registered."""
+    from src.app.demos.registry import list_demos, DEMO_REGISTRY
+
+    # Import demo modules to trigger registration
+    import src.app.demos.weather_tools
+    import src.app.demos.math_agent
+    import src.app.demos.sentiment_analyzer
+    import src.app.demos.code_reviewer
+    import src.app.demos.multi_agent_debate
+
+    demos = list_demos()
+    assert len(demos) >= 1, "Expected at least one registered demo"
diff --git a/samples/python/functioncalling/README.md b/samples/python/functioncalling/README.md
index 71048eae..44068fe1 100644
--- a/samples/python/functioncalling/README.md
+++ b/samples/python/functioncalling/README.md
@@ -2,10 +2,20 @@
 
 This guide walks you through enabling function calling support in Foundry Local with Phi-4-mini.
 
+## Features
+
+- **Cache-aware**: The notebook checks the local model cache before downloading — if the model is already cached, the download is skipped automatically.
+- **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening.
+- **Parallel & single function calling**: Demonstrates both multi-tool and single-tool invocation patterns.
+
 ## Prerequisites
 
 - Foundry Local version 0.5.100 or higher
-- Access to modify model configuration files
+- Python packages: `foundry-local-sdk`, `openai`
+
+  ```bash
+  pip install foundry-local-sdk openai
+  ```
 
 ## Setup Instructions
 
@@ -40,11 +50,11 @@ foundry service restart
 
 ### Step 4: Test the Configuration
 
-Run the provided [Notebook](./fl_tools..ipynb) to test and validate the function calling functionality.
+Run the provided [Notebook](./fl_tools.ipynb) to test and validate the function calling functionality.
 
 ## Related Resources
 
-- **Test Notebook**: [fl_tools.ipynb](./fl_tools..ipynb)
+- **Test Notebook**: [fl_tools.ipynb](./fl_tools.ipynb)
 
 ## Notes
 
diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb
index 0f9c76ed..2b226e2c 100644
--- a/samples/python/functioncalling/fl_tools.ipynb
+++ b/samples/python/functioncalling/fl_tools.ipynb
@@ -113,35 +113,44 @@
    "id": "9335da67",
    "metadata": {},
    "source": [
-    "Define the model alias that will be used throughout this example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "503f23fa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "alias = \"phi-4-mini\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5a9b1ecf",
-   "metadata": {},
-   "source": [
-    "Create a FoundryLocalManager instance using the specified model alias:"
+    "Initialize the Foundry Local manager and ensure the model is downloaded and loaded.\n",
+    "\n",
+    "The SDK checks the local cache first — if the model is already cached, it skips the download:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "804611d5",
    "metadata": {},
    "outputs": [],
    "source": [
-    "manager = FoundryLocalManager(alias)"
+    "alias = \"phi-4-mini\"\n",
+    "\n",
+    "# Initialize manager without auto-bootstrapping so we can show each step\n",
+    "manager = FoundryLocalManager(bootstrap=False)\n",
+    "\n",
+    "# Start the Foundry Local service\n",
+    "print(\"Starting Foundry Local service...\")\n",
+    "manager.start_service()\n",
+    "print(\"  ✓ Service is running\")\n",
+    "\n",
+    "# Check if the model is already cached\n",
+    "cached_models = manager.list_cached_models()\n",
+    "cached_ids = {m.id for m in cached_models}\n",
+    "model_info = manager.get_model_info(alias)\n",
+    "\n",
+    "if model_info.id in cached_ids:\n",
+    "    print(f\"  ✓ Model '{alias}' is already cached — skipping download\")\n",
+    "else:\n",
+    "    print(f\"  Downloading model '{alias}'...\")\n",
+    "    manager.download_model(alias)\n",
+    "    print(f\"  ✓ Download complete\")\n",
+    "\n",
+    "# Load the model into memory\n",
+    "print(f\"  Loading model '{alias}'...\")\n",
+    "manager.load_model(alias)\n",
+    "print(f\"  ✓ Model loaded and ready\")"
    ]
   },
   {
diff --git a/samples/python/hello-foundry-local/README.md b/samples/python/hello-foundry-local/README.md
index c7753a88..68c7cb12 100644
--- a/samples/python/hello-foundry-local/README.md
+++ b/samples/python/hello-foundry-local/README.md
@@ -2,6 +2,13 @@
 
 This is a simple example of how to use the Foundry Local SDK to run a model locally and make requests to it. The example demonstrates how to set up the SDK, initialize a model, and make a request to the model.
 
+## Features
+
+- **Cache-aware**: Checks the local model cache before downloading — if the model is already cached, the download is skipped automatically.
+- **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening.
+
+## Setup
+
 Install the Foundry Local SDK and OpenAI packages using pip:
 
 ```bash
@@ -11,7 +18,7 @@ pip install foundry-local-sdk openai
 > [!TIP]
 > We recommend using a virtual environment to manage your Python packages using `venv` or `conda` to avoid conflicts with other packages.
 
-Run the application using Python:
+## Run
 
 ```bash
 python src/app.py
diff --git a/samples/python/hello-foundry-local/requirements.txt b/samples/python/hello-foundry-local/requirements.txt
new file mode 100644
index 00000000..2a12d4eb
--- /dev/null
+++ b/samples/python/hello-foundry-local/requirements.txt
@@ -0,0 +1,2 @@
+foundry-local-sdk>=0.5.1
+openai>=1.0.0
diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py
index 8bd21c62..4d47f5eb 100644
--- a/samples/python/hello-foundry-local/src/app.py
+++ b/samples/python/hello-foundry-local/src/app.py
@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
+import sys
 import openai
 from foundry_local import FoundryLocalManager
 
@@ -8,26 +9,49 @@
 # to your end-user's device.
 alias = "qwen2.5-coder-0.5b"
 
-# Create a FoundryLocalManager instance. This will start the Foundry
-# Local service if it is not already running and load the specified model.
-manager = FoundryLocalManager(alias)
+# Create a FoundryLocalManager instance without bootstrapping
+# so we can show each step visually.
+print("Initializing Foundry Local...")
+manager = FoundryLocalManager(bootstrap=False)
+manager.start_service()
+print("✓ Service started")
 
-# The remaining code uses the OpenAI Python SDK to interact with the local model.
+# Check if the model is already cached (downloaded)
+cached_models = manager.list_cached_models()
+cached_ids = {m.id for m in cached_models}
+model_info = manager.get_model_info(alias)
+if model_info is None:
+    print(f"✗ Model \"{alias}\" not found in catalog")
+    sys.exit(1)
 
-# Configure the client to use the local Foundry service
+if model_info.id in cached_ids:
+    print(f"✓ Model \"{alias}\" ({model_info.id}) already cached — skipping download")
+else:
+    print(f"Model \"{alias}\" not found in cache. Downloading {model_info.id}...")
+    manager.download_model(alias)
+    print(f"✓ Model downloaded")
+
+# Load the model into memory
+print(f"Loading model {model_info.id}...")
+manager.load_model(alias)
+print("✓ Model loaded and ready")
+
+# Configure the OpenAI client to use the local Foundry service
 client = openai.OpenAI(
     base_url=manager.endpoint,
     api_key=manager.api_key,  # API key is not required for local usage
 )
 
-# Set the model to use and generate a streaming response
+# Generate a streaming response
 stream = client.chat.completions.create(
-    model=manager.get_model_info(alias).id,
+    model=model_info.id,
     messages=[{"role": "user", "content": "What is the golden ratio?"}],
     stream=True,
 )
 
 # Print the streaming response
+print("\nAssistant: ", end="")
 for chunk in stream:
     if chunk.choices[0].delta.content is not None:
         print(chunk.choices[0].delta.content, end="", flush=True)
+print()
diff --git a/samples/python/summarize/README.md b/samples/python/summarize/README.md
index 9fa753d1..a944ecff 100644
--- a/samples/python/summarize/README.md
+++ b/samples/python/summarize/README.md
@@ -2,6 +2,12 @@
 
 A simple command-line utility that uses Foundry Local to generate summaries of text files or direct text input.
 
+## Features
+
+- **Cache-aware**: Checks the local model cache before downloading — if the model is already cached, the download is skipped automatically.
+- **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening.
+- **Flexible model selection**: Use `--model` to pick a specific model alias, or let the script default to `phi-4-mini` (falls back to the first cached model if unavailable).
+
 ## Setup
 
 1. Install the required dependencies:
diff --git a/samples/python/summarize/requirements.txt b/samples/python/summarize/requirements.txt
index 7b37f256..e29653d7 100644
--- a/samples/python/summarize/requirements.txt
+++ b/samples/python/summarize/requirements.txt
@@ -1,3 +1,3 @@
 openai>=1.0.0
 python-dotenv>=0.19.0 
-foundry-local-sdk>=0.3.1
+foundry-local-sdk>=0.5.1
diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py
index c2b00ba7..9b2a330a 100644
--- a/samples/python/summarize/summarize.py
+++ b/samples/python/summarize/summarize.py
@@ -42,28 +42,44 @@ def main():
     parser.add_argument("--model", help="Model alias to use for summarization")
     args = parser.parse_args()
 
-    fl_manager = FoundryLocalManager()
-
+    # Initialize Foundry Local without bootstrapping for visibility
+    print("Initializing Foundry Local...")
+    fl_manager = FoundryLocalManager(bootstrap=False)
     fl_manager.start_service()
+    print("✓ Service started")
 
-    model_list = fl_manager.list_cached_models()
-
-    if not model_list:
-        print("No downloaded models available")
-        sys.exit(1)
+    # Check what's available in cache
+    cached_models = fl_manager.list_cached_models()
+    cached_ids = {m.id for m in cached_models}
 
-    # Select model based on alias or use first one
     if args.model:
-        selected_model = next((model for model in model_list if model.alias == args.model), None)
-        if selected_model:
-            model_name = selected_model.id
+        # User specified a model — check cache, download if needed
+        model_info = fl_manager.get_model_info(args.model)
+        if model_info is None:
+            print(f"✗ Model alias '{args.model}' not found in catalog")
+            sys.exit(1)
+
+        if model_info.id in cached_ids:
+            print(f"✓ Model \"{args.model}\" ({model_info.id}) already cached — skipping download")
         else:
-            model_name = model_list[0].id
-            print(f"Model alias '{args.model}' not found, using default model: {model_name}")
+            print(f"Model \"{args.model}\" not in cache. Downloading {model_info.id}...")
+            fl_manager.download_model(args.model)
+            print("✓ Model downloaded")
+
+        print(f"Loading model {model_info.id}...")
+        fl_manager.load_model(args.model)
+        model_name = model_info.id
     else:
-        model_name = model_list[0].id
+        # No model specified — use the first cached model, or fail
+        if not cached_models:
+            print("No downloaded models available. Run with --model <alias> to download one.")
+            sys.exit(1)
+
+        model_name = cached_models[0].id
+        print(f"✓ Using cached model: {model_name}")
+        fl_manager.load_model(model_name)
 
-    print(f"Using model: {model_name}")
+    print(f"✓ Model loaded and ready\n")
 
     # Initialize OpenAI client
     client = OpenAI(base_url=fl_manager.endpoint, api_key=fl_manager.api_key)
@@ -76,7 +92,7 @@ def main():
 
     # Get and print summary
     summary = get_summary(text, client, model_name)
-    print("\nSummary:")
+    print("Summary:")
     print("-" * 50)
     print(summary)
     print("-" * 50)
diff --git a/samples/rag/README.md b/samples/rag/README.md
index 2225fd01..11cf0116 100644
--- a/samples/rag/README.md
+++ b/samples/rag/README.md
@@ -2,14 +2,14 @@
 
 ## Overview
 
-This guide demonstrates how to build a complete offline RAG (Retrieval-Augmented Generation) solution using Foundry Local, combining local embedding models with vector search capabilities for enhanced AI inference on edge devices.
+This guide demonstrates how to build a complete offline RAG (Retrieval-Augmented Generation) solution using Foundry Local with the **Foundry Local C# SDK**, combining local embedding models with vector search capabilities for enhanced AI inference on edge devices. The SDK manages the full model lifecycle — cache checking, downloading, loading, and providing an OpenAI-compatible endpoint.
 
 ## Prerequisites
 
-- **Qdrant**: Local vector database installation
+- **Qdrant**: Local vector database — `docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant`
 - **.NET 8+**: Runtime environment
 - **.NET Interactive Notebook**: For development and testing
-- **Foundry Local 0.5.100+**: Local AI model execution platform
+- **Foundry Local**: Latest — see [foundrylocal.ai](https://foundrylocal.ai)
 
 ### Hardware Considerations
 
@@ -45,9 +45,40 @@ Download and place these files in a `./jina/` directory:
 #r "nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha"
 #r "nuget: Microsoft.SemanticKernel.Connectors.Qdrant, 1.60.0-preview"
 #r "nuget: Qdrant.Client, 1.14.1"
+#r "nuget: Microsoft.AI.Foundry.Local"
 ```
 
-### 2. Kernel Configuration
+### 2. SDK Initialization and Model Lifecycle
+
+```csharp
+using Microsoft.AI.Foundry.Local;
+using Microsoft.Extensions.Logging.Abstractions;
+
+// Initialize the SDK with web service support
+await FoundryLocalManager.CreateAsync(
+    new Configuration
+    {
+        AppName = "rag-notebook",
+        Web = new Configuration.WebService { Urls = "http://127.0.0.1:0" }
+    },
+    NullLogger.Instance);
+
+var manager = FoundryLocalManager.Instance;
+
+// Look up model by alias — SDK auto-selects the best variant
+var catalog = await manager.GetCatalogAsync();
+var model = await catalog.GetModelAsync("qwen2.5-0.5b");
+
+// Cache-aware download: only downloads on first run
+if (!await model.IsCachedAsync())
+    await model.DownloadAsync(progress => Console.Write($"\rDownload: {progress:F1}%"));
+
+await model.LoadAsync();
+await manager.StartWebServiceAsync();
+var endpoint = manager.Urls![0];
+```
+
+### 3. Kernel Configuration
 
 ```csharp
 var builder = Kernel.CreateBuilder();
@@ -55,11 +86,11 @@ var builder = Kernel.CreateBuilder();
 // Local embedding model
 builder.AddBertOnnxEmbeddingGenerator("./jina/model.onnx", "./jina/vocab.txt");
 
-// Foundry Local chat completion
+// Foundry Local chat completion — endpoint and variant from SDK
 builder.AddOpenAIChatCompletion(
-    "qwen2.5-0.5b-instruct-generic-gpu", 
-    new Uri("http://localhost:5273/v1"), 
-    apiKey: "", 
+    model.SelectedVariant.Id,
+    new Uri($"{endpoint}/v1"),
+    apiKey: "",
     serviceId: "qwen2.5-0.5b");
 
 var kernel = builder.Build();
diff --git a/samples/rag/rag_foundrylocal_demo.ipynb b/samples/rag/rag_foundrylocal_demo.ipynb
index d12cd5d1..9ae6be67 100644
--- a/samples/rag/rag_foundrylocal_demo.ipynb
+++ b/samples/rag/rag_foundrylocal_demo.ipynb
@@ -7,11 +7,13 @@
    "source": [
     "# Foundry Local RAG Implementation Guide\n",
     "\n",
-    "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using Foundry Local with Semantic Kernel, ONNX embeddings, and Qdrant vector database.\n",
+    "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using **Foundry Local** with the **Foundry Local C# SDK**, Semantic Kernel, ONNX embeddings, and Qdrant vector database.\n",
+    "\n",
+    "The Foundry Local SDK manages the model lifecycle (cache check, download, load) and provides an OpenAI-compatible endpoint for Semantic Kernel to use — no hardcoded URLs or variant IDs needed.\n",
     "\n",
     "## Package Installation\n",
     "\n",
-    "First, we install the required NuGet packages for Semantic Kernel and related components."
+    "First, we install the required NuGet packages."
    ]
   },
   {
@@ -88,43 +90,6 @@
     "Installing the ONNX connector package which enables using ONNX models for embeddings generation in Semantic Kernel."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "bc62e7be",
-   "metadata": {
-    "language_info": {
-     "name": "polyglot-notebook"
-    },
-    "polyglot_notebook": {
-     "kernelName": "csharp"
-    }
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div><div></div><div></div><div><strong>Installed Packages</strong><ul><li><span>Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha</span></li></ul></div></div>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "#r \"nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "70bff756",
-   "metadata": {},
-   "source": [
-    "### Duplicate ONNX Connector Installation\n",
-    "\n",
-    "Note: This is a duplicate installation of the ONNX connector package (same as the previous cell)."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 4,
@@ -199,7 +164,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
+   "id": "d051a66f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#r \"nuget: Microsoft.AI.Foundry.Local\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e649c627",
+   "metadata": {},
+   "source": [
+    "### Install Foundry Local SDK\n",
+    "\n",
+    "Installing the Foundry Local C# SDK which manages model lifecycle — cache checking, downloading, loading, and providing an OpenAI-compatible endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "id": "6ab040e4",
    "metadata": {
     "language_info": {
@@ -211,7 +196,10 @@
    },
    "outputs": [],
    "source": [
-    "using Microsoft.SemanticKernel;"
+    "using Microsoft.SemanticKernel;\n",
+    "using Microsoft.AI.Foundry.Local;\n",
+    "using Microsoft.Extensions.Logging;\n",
+    "using Microsoft.Extensions.Logging.Abstractions;"
    ]
   },
   {
@@ -221,9 +209,9 @@
    "source": [
     "## Setup and Configuration\n",
     "\n",
-    "### Import Semantic Kernel\n",
+    "### Import Namespaces\n",
     "\n",
-    "Importing the core Semantic Kernel namespace to access the main functionality."
+    "Importing core Semantic Kernel, Foundry Local SDK, and logging namespaces."
    ]
   },
   {
@@ -253,6 +241,83 @@
     "Creating a kernel builder instance which will be used to configure and build the Semantic Kernel with various services."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "9d88b6ee",
+   "metadata": {},
+   "source": [
+    "### Initialize Foundry Local SDK\n",
+    "\n",
+    "Initialize the Foundry Local SDK singleton with a web service configuration. The SDK will manage the model lifecycle and provide an OpenAI-compatible endpoint for Semantic Kernel."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f039c8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// Initialize the Foundry Local SDK with web service support\n",
+    "await FoundryLocalManager.CreateAsync(\n",
+    "    new Configuration\n",
+    "    {\n",
+    "        AppName = \"rag-notebook\",\n",
+    "        Web = new Configuration.WebService { Urls = \"http://127.0.0.1:0\" }  // port 0 = auto-assign\n",
+    "    },\n",
+    "    NullLogger.Instance);\n",
+    "\n",
+    "var manager = FoundryLocalManager.Instance;\n",
+    "Console.WriteLine(\"Foundry Local SDK initialized.\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9ff58f97",
+   "metadata": {},
+   "source": [
+    "### Model Lifecycle — Cache Check, Download, Load\n",
+    "\n",
+    "Look up the model by alias, check whether it is already cached locally, download if needed (with progress), then load into memory and start the web service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "06ecda82",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "// Look up the model by alias (SDK auto-selects the best variant for the hardware)\n",
+    "var modelAlias = \"qwen2.5-0.5b\";\n",
+    "var catalog = await manager.GetCatalogAsync();\n",
+    "var model = await catalog.GetModelAsync(modelAlias)\n",
+    "    ?? throw new Exception($\"Model '{modelAlias}' not found in catalog.\");\n",
+    "\n",
+    "Console.WriteLine($\"Model: {model.Alias} — Variant: {model.SelectedVariant.Id}\");\n",
+    "\n",
+    "// Check if cached, download if needed\n",
+    "var isCached = await model.IsCachedAsync();\n",
+    "Console.WriteLine($\"Cached: {isCached}\");\n",
+    "\n",
+    "if (!isCached)\n",
+    "{\n",
+    "    Console.WriteLine(\"Downloading model (first time only)...\");\n",
+    "    await model.DownloadAsync(progress =>\n",
+    "        Console.Write($\"\\rDownload: {progress:F1}%   \"));\n",
+    "    Console.WriteLine(\"\\nDownload complete.\");\n",
+    "}\n",
+    "\n",
+    "// Load the model into memory\n",
+    "await model.LoadAsync();\n",
+    "Console.WriteLine(\"Model loaded.\");\n",
+    "\n",
+    "// Start the web service — Semantic Kernel will connect to this endpoint\n",
+    "await manager.StartWebServiceAsync();\n",
+    "var endpoint = manager.Urls![0];\n",
+    "Console.WriteLine($\"Foundry Local endpoint: {endpoint}/v1\");"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -267,8 +332,10 @@
    },
    "outputs": [],
    "source": [
-    "var embeddModelPath = \"Your Jinaai jina-embeddings-v2-base-en onnx model path\";\n",
-    "var embedVocab = \"Your Jinaai ina-embeddings-v2-base-en vocab file path\";"
+    "// Download from https://huggingface.co/jinaai/jina-embeddings-v2-base-en\n",
+    "// Place model.onnx and vocab.txt in a ./jina/ directory relative to this notebook\n",
+    "var embeddModelPath = \"./jina/model.onnx\";\n",
+    "var embedVocab = \"./jina/vocab.txt\";"
    ]
   },
   {
@@ -278,12 +345,14 @@
    "source": [
     "### Define Embedding Model Paths\n",
     "\n",
-    "Setting up file paths for the JINA embedding model files - the ONNX model file and vocabulary file needed for text embeddings."
+    "Setting up file paths for the JINA embedding model. Download both files from [HuggingFace](https://huggingface.co/jinaai/jina-embeddings-v2-base-en) and place them in a `./jina/` directory:\n",
+    "- `model.onnx` — [download](https://huggingface.co/jinaai/jina-embeddings-v2-base-en/resolve/main/model.onnx)\n",
+    "- `vocab.txt` — [download](https://huggingface.co/jinaai/jina-embeddings-v2-base-en/resolve/main/vocab.txt)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "f48625de",
    "metadata": {
     "language_info": {
@@ -296,7 +365,14 @@
    "outputs": [],
    "source": [
     "builder.AddBertOnnxEmbeddingGenerator(embeddModelPath, embedVocab);\n",
-    "builder.AddOpenAIChatCompletion(\"qwen2.5-0.5b-instruct-generic-gpu\", new Uri(\"http://localhost:5273/v1\"), apiKey: \"\", serviceId: \"qwen2.5-0.5b\");"
+    "\n",
+    "// Use the SDK-managed endpoint instead of a hardcoded URL.\n",
+    "// The model variant ID is obtained from the SDK (no hardcoded variant names).\n",
+    "builder.AddOpenAIChatCompletion(\n",
+    "    model.SelectedVariant.Id,\n",
+    "    new Uri($\"{endpoint}/v1\"),\n",
+    "    apiKey: \"\",\n",
+    "    serviceId: modelAlias);"
    ]
   },
   {
@@ -306,7 +382,7 @@
    "source": [
     "### Configure AI Services\n",
     "\n",
-    "Adding the BERT ONNX embedding generator and OpenAI-compatible chat completion service to the kernel builder. The chat service connects to a local Foundry Local instance running the Qwen2.5 model."
+    "Adding the BERT ONNX embedding generator for local embeddings and the OpenAI-compatible chat completion service. The chat endpoint and model variant ID are obtained from the Foundry Local SDK — no hardcoded URLs or variant names."
    ]
   },
   {
@@ -764,7 +840,7 @@
    },
    "outputs": [],
    "source": [
-    "var chatService = kernel.GetRequiredService<IChatCompletionService>(serviceKey: \"qwen2.5-0.5b\");\n",
+    "var chatService = kernel.GetRequiredService<IChatCompletionService>(serviceKey: modelAlias);\n",
     "var embeddingService = kernel.GetRequiredService<IEmbeddingGenerator<string, Embedding<float>>>();"
    ]
   },
@@ -782,7 +858,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {
     "language_info": {
      "name": "polyglot-notebook"
@@ -793,8 +869,10 @@
    },
    "outputs": [],
    "source": [
+    "// Ensure Qdrant is running locally: docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant\n",
+    "var qdrantEndpoint = \"http://localhost:6334\";\n",
     "var vectorStoreService = new VectorStoreService(\n",
-    "    \"http://localhost:6334\",\n",
+    "    qdrantEndpoint,\n",
     "    \"\",\n",
     "    \"demodocs\");\n",
     "\n",
@@ -1014,6 +1092,28 @@
     "\n",
     "Displaying the final answer generated by the RAG system, which should contain information about Foundry Local based on the ingested document."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da1bc7e9",
+   "metadata": {},
+   "source": [
+    "## Cleanup\n",
+    "\n",
+    "Stop the web service and dispose of the Foundry Local SDK when done."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f8a3f84",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "await manager.StopWebServiceAsync();\n",
+    "manager.Dispose();\n",
+    "Console.WriteLine(\"Foundry Local SDK cleaned up.\");"
+   ]
   }
  ],
  "metadata": {

From 78206d0a8ab01c8d2a5b14417760c49dc91f3916 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Tue, 24 Mar 2026 16:43:41 -0700
Subject: [PATCH 02/13] Update

---
 .../Services/FoundryModelService.cs             |  5 ++++-
 .../Services/TranscriptionService.cs            |  9 +++++----
 samples/js/local-cag/src/modelSelector.js       | 11 ++++++-----
 samples/js/local-rag/src/chatEngine.js          |  4 ++--
 .../agent-framework/src/app/foundry_boot.py     | 10 +++-------
 samples/python/agent-framework/src/app/web.py   | 17 ++++++++---------
 samples/python/summarize/summarize.py           |  5 +++--
 7 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
index 97f34bbe..2b22c3c8 100644
--- a/samples/cs/whisper-transcription/Services/FoundryModelService.cs
+++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
@@ -68,10 +68,13 @@ public async Task EnsureModelReadyAsync(Model model)
         if (!await model.IsCachedAsync())
         {
             _logger.LogInformation("Model \"{ModelId}\" not cached — downloading...", model.Id);
+            var lastLoggedBucket = -1;
             await model.DownloadAsync(progress =>
             {
-                if (progress % 10 == 0)
+                var bucket = (int)Math.Floor(progress / 10);
+                if (bucket > lastLoggedBucket)
                 {
+                    lastLoggedBucket = bucket;
                     _logger.LogInformation("Download progress: {Progress:F0}%", progress);
                 }
             });
diff --git a/samples/cs/whisper-transcription/Services/TranscriptionService.cs b/samples/cs/whisper-transcription/Services/TranscriptionService.cs
index bef7001e..e533be1f 100644
--- a/samples/cs/whisper-transcription/Services/TranscriptionService.cs
+++ b/samples/cs/whisper-transcription/Services/TranscriptionService.cs
@@ -15,20 +15,21 @@ public TranscriptionService(
         _logger = logger;
     }
 
-    public async Task<TranscriptionResult> TranscribeAsync(string filePath, string? modelAlias = null)
+    public async Task<TranscriptionResult> TranscribeAsync(string filePath, string? modelAlias = null,
+        CancellationToken ct = default)
     {
         var model = await _modelService.GetModelAsync(modelAlias);
         await _modelService.EnsureModelReadyAsync(model);
 
-        var audioClient = await model.GetAudioClientAsync()
+        var audioClient = await model.GetAudioClientAsync(ct)
             ?? throw new InvalidOperationException("Failed to get audio client");
 
         _logger.LogInformation("Transcribing \"{FilePath}\" with model {ModelId}", filePath, model.Id);
 
         // Use streaming transcription for real-time output
         var textParts = new List<string>();
-        var response = audioClient.TranscribeAudioStreamingAsync(filePath, CancellationToken.None);
-        await foreach (var chunk in response)
+        var response = audioClient.TranscribeAudioStreamingAsync(filePath, ct);
+        await foreach (var chunk in response.WithCancellation(ct))
         {
             if (!string.IsNullOrEmpty(chunk.Text))
             {
diff --git a/samples/js/local-cag/src/modelSelector.js b/samples/js/local-cag/src/modelSelector.js
index 36d98910..ba2c26a9 100644
--- a/samples/js/local-cag/src/modelSelector.js
+++ b/samples/js/local-cag/src/modelSelector.js
@@ -68,9 +68,10 @@ export async function selectBestModel(catalog, opts = {}) {
   // Filter to chat-completion models that fit within the RAM budget
   const candidates = [];
   for (const m of allModels) {
-    const info = m.selectedVariant?._modelInfo;
-    if (!info) continue;
-    if (info.task !== "chat-completion") continue;
+    // Use the public API: iterate model.variants and use variant.modelInfo
+    const variant = m.variants.find(v => v.modelInfo?.task === "chat-completion");
+    if (!variant) continue;
+    const info = variant.modelInfo;
     if (SKIP_ALIASES.has(info.alias)) continue;
     if (info.fileSizeMb > budgetMb) {
       console.log(`[ModelSelector]   skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > RAM budget)`);
@@ -97,7 +98,7 @@ export async function selectBestModel(catalog, opts = {}) {
     const qualityScore = rankIndex >= 0
       ? (QUALITY_RANK.length - rankIndex) * 10
       : 1;
-    const cacheBonus = info.cached ? 5 : 0;
+    const cacheBonus = model.isCached ? 5 : 0;
     const score = qualityScore + cacheBonus;
     return { model, info, score };
   });
@@ -107,7 +108,7 @@ export async function selectBestModel(catalog, opts = {}) {
   const best = scored[0];
   const reason =
     `auto-selected (${(best.info.fileSizeMb / 1024).toFixed(1)} GB, ` +
-    `${best.info.cached ? "cached" : "will download"}, ` +
+    `${best.model.isCached ? "cached" : "will download"}, ` +
     `rank ${scored.indexOf(best) + 1}/${scored.length})`;
 
   console.log(`[ModelSelector] Selected: ${best.info.alias} – ${reason}`);
diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js
index 2d878839..0684a5a1 100644
--- a/samples/js/local-rag/src/chatEngine.js
+++ b/samples/js/local-rag/src/chatEngine.js
@@ -54,8 +54,8 @@ export class ChatEngine {
     if (!this.model.isCached) {
       this._emitStatus("download", `Downloading ${this.modelAlias}... This may take a few minutes on first run.`, 0);
       await this.model.download((progress) => {
-        const pct = Math.round(progress * 100);
-        this._emitStatus("download", `Downloading ${this.modelAlias}... ${pct}%`, progress);
+        const pct = Math.round(progress);
+        this._emitStatus("download", `Downloading ${this.modelAlias}... ${pct}%`, progress / 100);
       });
       this._emitStatus("download", `Download complete.`, 1);
     } else {
diff --git a/samples/python/agent-framework/src/app/foundry_boot.py b/samples/python/agent-framework/src/app/foundry_boot.py
index 3bde3388..78cf1c51 100644
--- a/samples/python/agent-framework/src/app/foundry_boot.py
+++ b/samples/python/agent-framework/src/app/foundry_boot.py
@@ -57,13 +57,9 @@ def bootstrap(self) -> FoundryConnection:
         endpoint = manager.endpoint
         api_key = manager.api_key
 
-        # List cached models to find the resolved variant
-        cached = manager.list_cached_models()
-        model_id = self.alias
-        for m in cached:
-            if self.alias in str(m):
-                model_id = str(m)
-                break
+        # Resolve alias to the actual model ID via the SDK's catalog API
+        model_info = manager.get_model_info(self.alias)
+        model_id = model_info.id if model_info else self.alias
 
         console.print(f"[green]✓ Foundry Local ready[/]  endpoint={endpoint}")
         log.info("Foundry Local ready: endpoint=%s model=%s", endpoint, model_id)
diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py
index 5074e3b0..05e5d737 100644
--- a/samples/python/agent-framework/src/app/web.py
+++ b/samples/python/agent-framework/src/app/web.py
@@ -83,15 +83,14 @@ def generate():
                 else:
                     gen = run_full_workflow(_conn, _docs, question)
 
-                async def drain():
-                    events = []
-                    async for evt in gen:
-                        events.append(evt)
-                    return events
-
-                events = loop.run_until_complete(drain())
-                for evt in events:
-                    yield f"data: {json.dumps(evt)}\n\n"
+                # Stream each event as it arrives instead of buffering
+                agen = gen.__aiter__()
+                while True:
+                    try:
+                        evt = loop.run_until_complete(agen.__anext__())
+                        yield f"data: {json.dumps(evt)}\n\n"
+                    except StopAsyncIteration:
+                        break
             except Exception as exc:
                 log.exception("Workflow error")
                 yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n"
diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py
index 9b2a330a..6c5943e8 100644
--- a/samples/python/summarize/summarize.py
+++ b/samples/python/summarize/summarize.py
@@ -75,9 +75,10 @@ def main():
             print("No downloaded models available. Run with --model <alias> to download one.")
             sys.exit(1)
 
+        cached_alias = cached_models[0].alias
         model_name = cached_models[0].id
-        print(f"✓ Using cached model: {model_name}")
-        fl_manager.load_model(model_name)
+        print(f"✓ Using cached model: {cached_alias} ({model_name})")
+        fl_manager.load_model(cached_alias)
 
     print(f"✓ Model loaded and ready\n")
 

From 0f5e7a9e1ffd1eada41d6b13c91e1e4ea0561f9a Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Tue, 24 Mar 2026 16:50:53 -0700
Subject: [PATCH 03/13] fix: address review feedback - thread safety, README
 accuracy, TF-IDF claims

- FoundryModelService.cs: add SemaphoreSlim for thread-safe InitializeAsync
  to prevent concurrent callers from double-initializing in ASP.NET
- summarize/README.md: align docs with code (uses first cached model,
  not phi-4-mini default)
- local-rag/README.md: replace 'TF-IDF' with 'term-frequency' throughout
  since the implementation uses raw term-frequency maps without IDF weighting
---
 .../Services/FoundryModelService.cs           | 35 ++++++++++++-------
 samples/js/local-rag/README.md                | 14 ++++----
 samples/python/summarize/README.md            |  2 +-
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
index 2b22c3c8..3c8cd5ba 100644
--- a/samples/cs/whisper-transcription/Services/FoundryModelService.cs
+++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
@@ -8,6 +8,7 @@ public class FoundryModelService
     private readonly ILogger<FoundryModelService> _logger;
     private readonly ILoggerFactory _loggerFactory;
     private readonly FoundryOptions _options;
+    private readonly SemaphoreSlim _initLock = new(1, 1);
     private bool _initialized;
 
     public FoundryModelService(
@@ -24,20 +25,30 @@ public async Task InitializeAsync()
     {
         if (_initialized) return;
 
-        _logger.LogInformation("Initializing Foundry Local Manager");
-        var config = new Configuration
+        await _initLock.WaitAsync();
+        try
         {
-            AppName = "WhisperTranscription",
-            LogLevel = Enum.TryParse<Microsoft.AI.Foundry.Local.LogLevel>(
-                _options.LogLevel, true, out var lvl)
-                ? lvl
-                : Microsoft.AI.Foundry.Local.LogLevel.Information,
-        };
+            if (_initialized) return;
 
-        await FoundryLocalManager.CreateAsync(config, _loggerFactory.CreateLogger("FoundryLocal"));
-        var mgr = FoundryLocalManager.Instance;
-        await mgr.EnsureEpsDownloadedAsync();
-        _initialized = true;
+            _logger.LogInformation("Initializing Foundry Local Manager");
+            var config = new Configuration
+            {
+                AppName = "WhisperTranscription",
+                LogLevel = Enum.TryParse<Microsoft.AI.Foundry.Local.LogLevel>(
+                    _options.LogLevel, true, out var lvl)
+                    ? lvl
+                    : Microsoft.AI.Foundry.Local.LogLevel.Information,
+            };
+
+            await FoundryLocalManager.CreateAsync(config, _loggerFactory.CreateLogger("FoundryLocal"));
+            var mgr = FoundryLocalManager.Instance;
+            await mgr.EnsureEpsDownloadedAsync();
+            _initialized = true;
+        }
+        finally
+        {
+            _initLock.Release();
+        }
     }
 
     public async Task<Model> GetModelAsync(string? aliasOrId = null)
diff --git a/samples/js/local-rag/README.md b/samples/js/local-rag/README.md
index 0b71e307..dfcdb9a1 100644
--- a/samples/js/local-rag/README.md
+++ b/samples/js/local-rag/README.md
@@ -4,7 +4,7 @@ A fully offline **Retrieval-Augmented Generation (RAG)** sample application that
 
 ## What is RAG?
 
-RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with TF-IDF vectors, and retrieves only the most relevant chunks** at query time — no cloud APIs, no embedding models, no external vector databases. This makes it ideal for:
+RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with term-frequency vectors, and retrieves only the most relevant chunks** via cosine similarity at query time — no cloud APIs, no embedding models, no external vector databases. This makes it ideal for:
 
 - **Large knowledge bases** — scales beyond what fits in a single prompt
 - **Offline / air-gapped** environments (e.g., field operations)
@@ -26,12 +26,12 @@ RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with TF-ID
                           │
                    ┌──────┴───────┐
                    │   SQLite DB   │
-                   │  (TF-IDF idx) │
+                   │  (TF vectors) │
                    └──────────────┘
 ```
 
-1. **Ingest**: Documents in `docs/` are chunked (200 tokens, 25-token overlap) and stored in SQLite with TF-IDF vectors and an inverted index.
-2. **Query**: Each user question is vectorised using TF-IDF, then cosine similarity finds the top-K most relevant chunks.
+1. **Ingest**: Documents in `docs/` are chunked (200 tokens, 25-token overlap) and stored in SQLite with term-frequency vectors and an inverted index.
+2. **Query**: Each user question is vectorised using term-frequency, then cosine similarity finds the top-K most relevant chunks.
 3. **Prompt**: Retrieved chunks are injected into the system prompt with source citations.
 4. **Inference**: Foundry Local runs the model in-process — no external HTTP server needed.
 
@@ -107,7 +107,7 @@ local-rag/
     ├── server.js             # Express server with SSE status + chat + upload
     ├── chatEngine.js         # RAG engine: SDK init, retrieval, inference
     ├── config.js             # Configuration (model, chunking, paths)
-    ├── chunker.js            # Document parsing, chunking, TF-IDF math
+    ├── chunker.js            # Document parsing, chunking, term-frequency math
     ├── vectorStore.js        # SQLite-backed vector store with inverted index
     ├── ingest.js             # Batch document ingestion script
     └── prompts.js            # System prompts (full + compact/edge mode)
@@ -116,7 +116,7 @@ local-rag/
 ## Key Features
 
 - **Cache-aware** — skips model download if already in the Foundry cache
-- **TF-IDF vector search** — no embedding model needed; lightweight and fast
+- **Term-frequency vector search** — no embedding model needed; lightweight and fast
 - **SQLite storage** — single-file database, no external services
 - **Runtime document upload** — add documents via the web UI without restarting
 - **Source citations** — each response shows which chunks were used and their relevance scores
@@ -129,7 +129,7 @@ local-rag/
 | Feature | RAG (this sample) | CAG |
 |---------|-------------------|-----|
 | Document loading | Chunked + indexed | All loaded at startup |
-| Vector search | TF-IDF + cosine similarity | Keyword scoring |
+| Vector search | Term-frequency + cosine similarity | Keyword scoring |
 | Storage | SQLite database | In-memory |
 | Knowledge base size | Any size | Small–medium |
 | Runtime upload | Yes | No |
diff --git a/samples/python/summarize/README.md b/samples/python/summarize/README.md
index a944ecff..5e1405f6 100644
--- a/samples/python/summarize/README.md
+++ b/samples/python/summarize/README.md
@@ -6,7 +6,7 @@ A simple command-line utility that uses Foundry Local to generate summaries of t
 
 - **Cache-aware**: Checks the local model cache before downloading — if the model is already cached, the download is skipped automatically.
 - **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening.
-- **Flexible model selection**: Use `--model` to pick a specific model alias, or let the script default to `phi-4-mini` (falls back to the first cached model if unavailable).
+- **Flexible model selection**: Use `--model` to pick a specific model alias, or omit it to automatically use the first cached model.
 
 ## Setup
 

From acf06fc15f28fe1596a42ee61de5d9f5bb84c2aa Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Tue, 24 Mar 2026 17:06:09 -0700
Subject: [PATCH 04/13] =?UTF-8?q?fix:=20address=20round-3=20review=20issue?=
 =?UTF-8?q?s=20=E2=80=94=20env=20vars,=20event=20loop,=20CancellationToken?=
 =?UTF-8?q?,=20README=20accuracy?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Services/FoundryModelService.cs                    |  8 ++++----
 .../Services/TranscriptionService.cs                   |  2 +-
 samples/js/local-cag/src/config.js                     |  2 +-
 samples/js/local-rag/package.json                      |  2 +-
 samples/js/local-rag/src/config.js                     | 10 +++++-----
 samples/python/agent-framework/README.md               |  2 +-
 samples/python/agent-framework/src/app/web.py          |  2 ++
 7 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
index 3c8cd5ba..42ea828c 100644
--- a/samples/cs/whisper-transcription/Services/FoundryModelService.cs
+++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs
@@ -65,7 +65,7 @@ public async Task<Model> GetModelAsync(string? aliasOrId = null)
         return model;
     }
 
-    public async Task EnsureModelReadyAsync(Model model)
+    public async Task EnsureModelReadyAsync(Model model, CancellationToken ct = default)
     {
         // Prefer CPU variant
         var cpuVariant = model.Variants.FirstOrDefault(
@@ -76,7 +76,7 @@ public async Task EnsureModelReadyAsync(Model model)
         }
 
         // Check cache and download if needed
-        if (!await model.IsCachedAsync())
+        if (!await model.IsCachedAsync(ct))
         {
             _logger.LogInformation("Model \"{ModelId}\" not cached — downloading...", model.Id);
             var lastLoggedBucket = -1;
@@ -88,7 +88,7 @@ await model.DownloadAsync(progress =>
                     lastLoggedBucket = bucket;
                     _logger.LogInformation("Download progress: {Progress:F0}%", progress);
                 }
-            });
+            }, ct);
             _logger.LogInformation("Model downloaded");
         }
         else
@@ -97,7 +97,7 @@ await model.DownloadAsync(progress =>
         }
 
         _logger.LogInformation("Loading model \"{ModelId}\"...", model.Id);
-        await model.LoadAsync();
+        await model.LoadAsync(ct);
         _logger.LogInformation("Model loaded and ready");
     }
 }
diff --git a/samples/cs/whisper-transcription/Services/TranscriptionService.cs b/samples/cs/whisper-transcription/Services/TranscriptionService.cs
index e533be1f..f436aa3a 100644
--- a/samples/cs/whisper-transcription/Services/TranscriptionService.cs
+++ b/samples/cs/whisper-transcription/Services/TranscriptionService.cs
@@ -19,7 +19,7 @@ public async Task<TranscriptionResult> TranscribeAsync(string filePath, string?
         CancellationToken ct = default)
     {
         var model = await _modelService.GetModelAsync(modelAlias);
-        await _modelService.EnsureModelReadyAsync(model);
+        await _modelService.EnsureModelReadyAsync(model, ct);
 
         var audioClient = await model.GetAudioClientAsync(ct)
             ?? throw new InvalidOperationException("Failed to get audio client");
diff --git a/samples/js/local-cag/src/config.js b/samples/js/local-cag/src/config.js
index 8c928df5..9133fad6 100644
--- a/samples/js/local-cag/src/config.js
+++ b/samples/js/local-cag/src/config.js
@@ -28,7 +28,7 @@ export const config = {
 
   // Server
   port: parseInt(process.env.PORT, 10) || 3000,
-  host: "127.0.0.1",
+  host: process.env.HOST || "127.0.0.1",
 
   // UI
   publicDir: path.join(ROOT, "public"),
diff --git a/samples/js/local-rag/package.json b/samples/js/local-rag/package.json
index 8c76bae9..247c4bb4 100644
--- a/samples/js/local-rag/package.json
+++ b/samples/js/local-rag/package.json
@@ -23,7 +23,7 @@
     "gas-field",
     "support-agent",
     "sqlite",
-    "tfidf"
+    "term-frequency"
   ],
   "engines": {
     "node": ">=20.0.0"
diff --git a/samples/js/local-rag/src/config.js b/samples/js/local-rag/src/config.js
index 12b2937e..b53f08c1 100644
--- a/samples/js/local-rag/src/config.js
+++ b/samples/js/local-rag/src/config.js
@@ -7,18 +7,18 @@ const ROOT = path.resolve(__dirname, "..");
 
 export const config = {
   // Model
-  model: "phi-3.5-mini",
+  model: process.env.FOUNDRY_MODEL || "phi-3.5-mini",
 
   // RAG
   docsDir: path.join(ROOT, "docs"),
   dbPath: path.join(ROOT, "data", "rag.db"),
-  chunkSize: 200,       // tokens (approx) – kept small for NPU compatibility
+  chunkSize: 200,       // tokens (approx) \u2013 kept small for NPU compatibility
   chunkOverlap: 25,     // tokens overlap between chunks
-  topK: 3,              // number of chunks to retrieve – limited for NPU context window
+  topK: 3,              // number of chunks to retrieve \u2013 limited for NPU context window
 
   // Server
-  port: 3000,
-  host: "127.0.0.1",
+  port: parseInt(process.env.PORT, 10) || 3000,
+  host: process.env.HOST || "127.0.0.1",
 
   // UI
   publicDir: path.join(ROOT, "public"),
diff --git a/samples/python/agent-framework/README.md b/samples/python/agent-framework/README.md
index 23368e94..6802ff65 100644
--- a/samples/python/agent-framework/README.md
+++ b/samples/python/agent-framework/README.md
@@ -125,7 +125,7 @@ The smoke tests verify imports, document loading, the bootstrapper's environment
 | `ModuleNotFoundError: agent_framework` | `pip install agent-framework-core==1.0.0b260130` |
 | Model download hangs | Check network and ensure Foundry Local is on PATH |
 | `Connection refused` on port 5273 | Foundry Local service failed to start — run `foundry-local` manually to see errors |
-| Flask port 5000 in use | Set `FLASK_PORT` env var or kill the conflicting process |
+| Flask port 5000 in use | Use `--port <number>` flag or kill the conflicting process |
 
 ## License
 
diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py
index 05e5d737..fc424bb2 100644
--- a/samples/python/agent-framework/src/app/web.py
+++ b/samples/python/agent-framework/src/app/web.py
@@ -77,6 +77,7 @@ def api_run():
 
         def generate():
             loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
             try:
                 if mode == "sequential":
                     gen = run_sequential(_conn, _docs, question)
@@ -95,6 +96,7 @@ def generate():
                 log.exception("Workflow error")
                 yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n"
             finally:
+                asyncio.set_event_loop(None)
                 loop.close()
 
         return Response(generate(), mimetype="text/event-stream")

From 050fbede49664050f2c19f9adf38d11862212a69 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Tue, 24 Mar 2026 19:54:15 -0700
Subject: [PATCH 05/13] update

---
 samples/cs/whisper-transcription/Program.cs            |  7 ++++---
 .../js/copilot-sdk-foundry-local/src/tool-calling.ts   |  4 +++-
 samples/js/electron-chat-application/main.js           |  8 ++++----
 samples/js/local-cag/src/modelSelector.js              |  4 ++++
 samples/js/local-rag/src/chatEngine.js                 |  9 +++++----
 samples/js/tool-calling-foundry-local/src/app.js       |  2 +-
 samples/python/agent-framework/src/app/orchestrator.py |  8 ++------
 samples/python/agent-framework/src/app/web.py          | 10 +++++++---
 samples/python/functioncalling/fl_tools.ipynb          |  3 +++
 9 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/samples/cs/whisper-transcription/Program.cs b/samples/cs/whisper-transcription/Program.cs
index f985f26c..5ae320a6 100644
--- a/samples/cs/whisper-transcription/Program.cs
+++ b/samples/cs/whisper-transcription/Program.cs
@@ -59,7 +59,8 @@
     [FromServices] TranscriptionService svc,
     [FromForm] IFormFile file,
     [FromForm] string? model,
-    [FromForm] string? format) =>
+    [FromForm] string? format,
+    CancellationToken ct) =>
 {
     if (file is null || file.Length == 0)
     {
@@ -70,12 +71,12 @@
     var tmp = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + Path.GetExtension(file.FileName));
     await using (var fs = File.Create(tmp))
     {
-        await file.CopyToAsync(fs);
+        await file.CopyToAsync(fs, ct);
     }
 
     try
     {
-        var result = await svc.TranscribeAsync(tmp, model);
+        var result = await svc.TranscribeAsync(tmp, model, ct);
         var outputFormat = format?.ToLowerInvariant() ?? "text";
         return outputFormat switch
         {
diff --git a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
index 039849cf..96dad4af 100644
--- a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
+++ b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts
@@ -60,7 +60,9 @@ function defineCalculateTool() {
         handler: async (args) => {
             try {
                 // Only allow safe math characters and Math.* calls
-                const sanitized = args.expression.replace(/[^0-9+\-*/().,%\s]|Math\.\w+/g, (m) =>
+                // Math\.\w+ must come first so "Math.sqrt" is matched as a token
+                // before the single-char class strips individual letters.
+                const sanitized = args.expression.replace(/Math\.\w+|[^0-9+\-*/().,%\s]/g, (m) =>
                     m.startsWith("Math.") ? m : "",
                 );
                 const result = new Function(`"use strict"; return (${sanitized})`)();
diff --git a/samples/js/electron-chat-application/main.js b/samples/js/electron-chat-application/main.js
index 22a1fc1d..935c1c45 100644
--- a/samples/js/electron-chat-application/main.js
+++ b/samples/js/electron-chat-application/main.js
@@ -91,11 +91,11 @@ ipcMain.handle('get-models', async () => {
       variants: m.variants.map(v => ({
         id: v.id,
         alias: v.alias,
-        displayName: v.modelInfo.displayName || v.alias,
+        displayName: v.modelInfo?.displayName || v.alias,
         isCached: cachedIds.has(v.id),
-        fileSizeMb: v.modelInfo.fileSizeMb,
-        modelType: v.modelInfo.modelType,
-        publisher: v.modelInfo.publisher
+        fileSizeMb: v.modelInfo?.fileSizeMb,
+        modelType: v.modelInfo?.modelType,
+        publisher: v.modelInfo?.publisher
       }))
     }));
     
diff --git a/samples/js/local-cag/src/modelSelector.js b/samples/js/local-cag/src/modelSelector.js
index ba2c26a9..7f3f1e98 100644
--- a/samples/js/local-cag/src/modelSelector.js
+++ b/samples/js/local-cag/src/modelSelector.js
@@ -73,6 +73,10 @@ export async function selectBestModel(catalog, opts = {}) {
     if (!variant) continue;
     const info = variant.modelInfo;
     if (SKIP_ALIASES.has(info.alias)) continue;
+    if (info.fileSizeMb == null || info.fileSizeMb <= 0) {
+      console.log(`[ModelSelector]   skip ${info.alias} (unknown file size)`);
+      continue;
+    }
     if (info.fileSizeMb > budgetMb) {
       console.log(`[ModelSelector]   skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > RAM budget)`);
       continue;
diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js
index 0684a5a1..21927abf 100644
--- a/samples/js/local-rag/src/chatEngine.js
+++ b/samples/js/local-rag/src/chatEngine.js
@@ -202,12 +202,13 @@ export class ChatEngine {
     };
 
     // Yield text chunks from the SDK streaming callback buffer
-    while (!done || textChunks.length > 0) {
-      if (textChunks.length === 0 && !done) {
+    let head = 0;
+    while (!done || head < textChunks.length) {
+      if (head >= textChunks.length && !done) {
         await new Promise((r) => { resolve = r; });
       }
-      while (textChunks.length > 0) {
-        const chunk = textChunks.shift();
+      while (head < textChunks.length) {
+        const chunk = textChunks[head++];
         const content = chunk.choices?.[0]?.delta?.content;
         if (content) {
           yield { type: "text", data: content };
diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js
index 57fc5d01..c9fe3f7f 100644
--- a/samples/js/tool-calling-foundry-local/src/app.js
+++ b/samples/js/tool-calling-foundry-local/src/app.js
@@ -22,7 +22,7 @@ async function runToolCallingExample() {
     console.log("Initializing Foundry Local SDK...");
     manager = FoundryLocalManager.create({
       appName: "FoundryLocalSample",
-      serviceEndpoint: "http://localhost:5000",
+      webServiceUrls: "http://localhost:5000",
       logLevel: "info"
     });
 
diff --git a/samples/python/agent-framework/src/app/orchestrator.py b/samples/python/agent-framework/src/app/orchestrator.py
index dbea57d7..95886edf 100644
--- a/samples/python/agent-framework/src/app/orchestrator.py
+++ b/samples/python/agent-framework/src/app/orchestrator.py
@@ -148,17 +148,13 @@ async def run_full_workflow(
 
     # ── Concurrent fan-out ──
     snippets_text = ""
-    keywords_text = ""
     async for evt in run_concurrent_retrieval(conn, docs, plan_text):
         yield evt
         if evt["type"] == "step_done" and evt["agent"] == "Concurrent":
-            # Parse out retriever/tool output
-            output = evt.get("output", "")
-            snippets_text = output
-            keywords_text = ""
+            snippets_text = evt.get("output", "")
 
     # ── Critic (sequential) ──
-    combined = f"Plan:\n{plan_text}\n\nRetrieved + Keywords:\n{snippets_text}"
+    combined = f"Plan:\n{plan_text}\n\nRetrieved:\n{snippets_text}"
     for loop in range(MAX_CRITIC_LOOPS):
         yield {"type": "step_start", "agent": "Critic", "description": f"Reviewing for gaps (round {loop + 1})"}
         t0 = time.perf_counter()
diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py
index fc424bb2..ee95fc0a 100644
--- a/samples/python/agent-framework/src/app/web.py
+++ b/samples/python/agent-framework/src/app/web.py
@@ -94,7 +94,7 @@ def generate():
                         break
             except Exception as exc:
                 log.exception("Workflow error")
-                yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n"
+                yield f"data: {json.dumps({'type': 'error', 'message': 'An internal error occurred. Check server logs for details.'})}\n\n"
             finally:
                 asyncio.set_event_loop(None)
                 loop.close()
@@ -108,13 +108,15 @@ def api_tools():
             return jsonify({"status": "error", "message": "Not bootstrapped"}), 503
 
         loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
         try:
             results = loop.run_until_complete(run_tool_demo(_conn))
             return jsonify({"status": "ok", "results": results})
         except Exception as exc:
             log.exception("Tool demo error")
-            return jsonify({"status": "error", "message": str(exc)}), 500
+            return jsonify({"status": "error", "message": "An internal error occurred. Check server logs for details."}), 500
         finally:
+            asyncio.set_event_loop(None)
             loop.close()
 
     @app.route("/api/documents")
@@ -164,6 +166,7 @@ def api_demo_run(demo_id: str):
 
         def generate():
             loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
             try:
                 yield f"data: {json.dumps({'type': 'step_start', 'agent': demo.name})}\n\n"
                 result = loop.run_until_complete(demo.runner(_conn, prompt))
@@ -171,8 +174,9 @@ def generate():
                 yield f"data: {json.dumps({'type': 'complete', 'report': result.get('response', '')})}\n\n"
             except Exception as exc:
                 log.exception("Demo error: %s", demo_id)
-                yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n"
+                yield f"data: {json.dumps({'type': 'error', 'message': 'An internal error occurred. Check server logs for details.'})}\n\n"
             finally:
+                asyncio.set_event_loop(None)
                 loop.close()
 
         return Response(generate(), mimetype="text/event-stream")
diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb
index 2b226e2c..887f34be 100644
--- a/samples/python/functioncalling/fl_tools.ipynb
+++ b/samples/python/functioncalling/fl_tools.ipynb
@@ -140,6 +140,9 @@
     "cached_ids = {m.id for m in cached_models}\n",
     "model_info = manager.get_model_info(alias)\n",
     "\n",
+    "if model_info is None:\n",
+    "    raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n",
+    "\n",
     "if model_info.id in cached_ids:\n",
     "    print(f\"  ✓ Model '{alias}' is already cached — skipping download\")\n",
     "else:\n",

From e373a2bd6b1f1eb13676d6aefa20fe16a42bfd73 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Tue, 24 Mar 2026 20:50:11 -0700
Subject: [PATCH 06/13] update

---
 samples/js/local-cag/README.md                |  2 +-
 .../js/tool-calling-foundry-local/src/app.js  |  3 ++-
 samples/python/agent-framework/src/app/web.py | 20 +++++++++++++++++--
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/samples/js/local-cag/README.md b/samples/js/local-cag/README.md
index ece03869..d9772a0d 100644
--- a/samples/js/local-cag/README.md
+++ b/samples/js/local-cag/README.md
@@ -55,7 +55,7 @@ Set these environment variables (all optional):
 |----------|---------|-------------|
 | `FOUNDRY_MODEL` | *(auto-select)* | Force a specific model alias (e.g., `phi-3.5-mini`) |
 | `PORT` | `3000` | Server port |
-| `HOST` | `localhost` | Server bind address |
+| `HOST` | `127.0.0.1` | Server bind address |
 
 ## Adding Domain Documents
 
diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js
index c9fe3f7f..20f56d9d 100644
--- a/samples/js/tool-calling-foundry-local/src/app.js
+++ b/samples/js/tool-calling-foundry-local/src/app.js
@@ -19,10 +19,11 @@ async function runToolCallingExample() {
   let model = null;
 
   try {
+    const webServiceUrl = process.env.FOUNDRY_SERVICE_URL || "http://127.0.0.1:0";
     console.log("Initializing Foundry Local SDK...");
     manager = FoundryLocalManager.create({
       appName: "FoundryLocalSample",
-      webServiceUrls: "http://localhost:5000",
+      webServiceUrls: webServiceUrl,
       logLevel: "info"
     });
 
diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py
index ee95fc0a..31155cfe 100644
--- a/samples/python/agent-framework/src/app/web.py
+++ b/samples/python/agent-framework/src/app/web.py
@@ -99,7 +99,15 @@ def generate():
                 asyncio.set_event_loop(None)
                 loop.close()
 
-        return Response(generate(), mimetype="text/event-stream")
+        return Response(
+            generate(),
+            mimetype="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+            },
+        )
 
     @app.route("/api/tools", methods=["POST"])
     def api_tools():
@@ -179,6 +187,14 @@ def generate():
                 asyncio.set_event_loop(None)
                 loop.close()
 
-        return Response(generate(), mimetype="text/event-stream")
+        return Response(
+            generate(),
+            mimetype="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+            },
+        )
 
     return app

From 10d78b3948085af6267e0304669ef1a9d2ff3826 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 07:26:47 -0700
Subject: [PATCH 07/13] Update

---
 samples/python/summarize/summarize.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py
index 6c5943e8..f0cfe408 100644
--- a/samples/python/summarize/summarize.py
+++ b/samples/python/summarize/summarize.py
@@ -50,7 +50,6 @@ def main():
 
     # Check what's available in cache
     cached_models = fl_manager.list_cached_models()
-    cached_ids = {m.id for m in cached_models}
 
     if args.model:
         # User specified a model — check cache, download if needed
@@ -59,26 +58,29 @@ def main():
             print(f"✗ Model alias '{args.model}' not found in catalog")
             sys.exit(1)
 
-        if model_info.id in cached_ids:
-            print(f"✓ Model \"{args.model}\" ({model_info.id}) already cached — skipping download")
+        # Check if *any* variant of this alias is already cached
+        cached_variant = next((m for m in cached_models if m.alias == args.model), None)
+        if cached_variant is not None:
+            print(f"✓ Model \"{args.model}\" ({cached_variant.id}) already cached — skipping download")
+            model_name = cached_variant.id
         else:
             print(f"Model \"{args.model}\" not in cache. Downloading {model_info.id}...")
             fl_manager.download_model(args.model)
             print("✓ Model downloaded")
+            model_name = model_info.id
 
-        print(f"Loading model {model_info.id}...")
-        fl_manager.load_model(args.model)
-        model_name = model_info.id
+        print(f"Loading model {model_name}...")
+        fl_manager.load_model(model_name)
     else:
         # No model specified — use the first cached model, or fail
         if not cached_models:
             print("No downloaded models available. Run with --model <alias> to download one.")
             sys.exit(1)
 
-        cached_alias = cached_models[0].alias
         model_name = cached_models[0].id
-        print(f"✓ Using cached model: {cached_alias} ({model_name})")
-        fl_manager.load_model(cached_alias)
+        print(f"✓ Using cached model: {cached_models[0].alias} ({model_name})")
+        # Load by model ID to guarantee we load the exact cached variant
+        fl_manager.load_model(model_name)
 
     print(f"✓ Model loaded and ready\n")
 

From 26908ec52079fc893861e325b3b0a570a97a9654 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 07:30:26 -0700
Subject: [PATCH 08/13] update

---
 .../Middleware/ErrorHandlingMiddleware.cs      |  2 +-
 samples/python/hello-foundry-local/src/app.py  | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
index e44fd2e9..01fe75e8 100644
--- a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
+++ b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
@@ -25,7 +25,7 @@ public async Task InvokeAsync(HttpContext context)
             _logger.LogError(ex, "Unhandled exception");
             context.Response.StatusCode = (int)HttpStatusCode.InternalServerError;
             context.Response.ContentType = "application/json";
-            var payload = JsonSerializer.Serialize(new { error = ex.Message });
+            var payload = JsonSerializer.Serialize(new { error = "An unexpected error occurred." });
             await context.Response.WriteAsync(payload);
         }
     }
diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py
index 4d47f5eb..788bd5b1 100644
--- a/samples/python/hello-foundry-local/src/app.py
+++ b/samples/python/hello-foundry-local/src/app.py
@@ -18,22 +18,26 @@
 
 # Check if the model is already cached (downloaded)
 cached_models = manager.list_cached_models()
-cached_ids = {m.id for m in cached_models}
 model_info = manager.get_model_info(alias)
 if model_info is None:
     print(f"✗ Model \"{alias}\" not found in catalog")
     sys.exit(1)
 
-if model_info.id in cached_ids:
-    print(f"✓ Model \"{alias}\" ({model_info.id}) already cached — skipping download")
+# Check if *any* variant of this alias is already cached
+cached_variant = next((m for m in cached_models if m.alias == alias), None)
+if cached_variant is not None:
+    print(f"✓ Model \"{alias}\" ({cached_variant.id}) already cached — skipping download")
+    model_id = cached_variant.id
 else:
     print(f"Model \"{alias}\" not found in cache. Downloading {model_info.id}...")
     manager.download_model(alias)
     print(f"✓ Model downloaded")
+    model_id = model_info.id
 
-# Load the model into memory
-print(f"Loading model {model_info.id}...")
-manager.load_model(alias)
+# Load the model into memory — use the exact model ID to guarantee
+# we load the variant that is actually cached.
+print(f"Loading model {model_id}...")
+manager.load_model(model_id)
 print("✓ Model loaded and ready")
 
 # Configure the OpenAI client to use the local Foundry service
@@ -44,7 +48,7 @@
 
 # Generate a streaming response
 stream = client.chat.completions.create(
-    model=model_info.id,
+    model=model_id,
     messages=[{"role": "user", "content": "What is the golden ratio?"}],
     stream=True,
 )

From 16cfcac1e39877ca15e430bab16ac4432d596969 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 07:42:38 -0700
Subject: [PATCH 09/13] Update

---
 samples/js/local-cag/src/chatEngine.js        |  5 +++-
 samples/js/local-rag/src/chatEngine.js        | 18 +++++++------
 samples/python/functioncalling/fl_tools.ipynb | 25 ++++++++++---------
 3 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/samples/js/local-cag/src/chatEngine.js b/samples/js/local-cag/src/chatEngine.js
index 675edf10..5a8a02f2 100644
--- a/samples/js/local-cag/src/chatEngine.js
+++ b/samples/js/local-cag/src/chatEngine.js
@@ -210,7 +210,10 @@ export class ChatEngine {
     let index = 0;
     while (!done || index < chunks.length) {
       if (index < chunks.length) {
-        yield { type: "text", data: chunks[index++] };
+        const text = chunks[index];
+        chunks[index] = null; // release for GC
+        index++;
+        yield { type: "text", data: text };
       } else {
         await new Promise((r) => { resolve = r; });
       }
diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js
index 21927abf..d640ffea 100644
--- a/samples/js/local-rag/src/chatEngine.js
+++ b/samples/js/local-rag/src/chatEngine.js
@@ -177,14 +177,17 @@ export class ChatEngine {
     // 3. Stream from the local model via the SDK's callback-based streaming
     this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
 
-    // Buffer chunks from the callback and yield them as an async iterable
+    // Buffer extracted content strings from the callback and yield as an async iterable
     const textChunks = [];
     let resolve;
     let done = false;
 
     const streamPromise = this.chatClient.completeStreamingChat(messages, (chunk) => {
-      textChunks.push(chunk);
-      if (resolve) { resolve(); resolve = null; }
+      const content = chunk.choices?.[0]?.delta?.content;
+      if (content) {
+        textChunks.push(content);
+        if (resolve) { resolve(); resolve = null; }
+      }
     }).then(() => {
       done = true;
       if (resolve) { resolve(); resolve = null; }
@@ -208,11 +211,10 @@ export class ChatEngine {
         await new Promise((r) => { resolve = r; });
       }
       while (head < textChunks.length) {
-        const chunk = textChunks[head++];
-        const content = chunk.choices?.[0]?.delta?.content;
-        if (content) {
-          yield { type: "text", data: content };
-        }
+        const text = textChunks[head];
+        textChunks[head] = null; // release for GC
+        head++;
+        yield { type: "text", data: text };
       }
     }
 
diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb
index 887f34be..27e35683 100644
--- a/samples/python/functioncalling/fl_tools.ipynb
+++ b/samples/python/functioncalling/fl_tools.ipynb
@@ -135,24 +135,25 @@
     "manager.start_service()\n",
     "print(\"  ✓ Service is running\")\n",
     "\n",
-    "# Check if the model is already cached\n",
+    "# Check if any variant of this alias is already cached\n",
     "cached_models = manager.list_cached_models()\n",
-    "cached_ids = {m.id for m in cached_models}\n",
-    "model_info = manager.get_model_info(alias)\n",
+    "cached_variant = next((m for m in cached_models if m.alias == alias), None)\n",
     "\n",
-    "if model_info is None:\n",
-    "    raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n",
-    "\n",
-    "if model_info.id in cached_ids:\n",
+    "if cached_variant is not None:\n",
+    "    model_id = cached_variant.id\n",
     "    print(f\"  ✓ Model '{alias}' is already cached — skipping download\")\n",
     "else:\n",
+    "    model_info = manager.get_model_info(alias)\n",
+    "    if model_info is None:\n",
+    "        raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n",
     "    print(f\"  Downloading model '{alias}'...\")\n",
     "    manager.download_model(alias)\n",
+    "    model_id = model_info.id\n",
     "    print(f\"  ✓ Download complete\")\n",
     "\n",
     "# Load the model into memory\n",
     "print(f\"  Loading model '{alias}'...\")\n",
-    "manager.load_model(alias)\n",
+    "manager.load_model(model_id)\n",
     "print(f\"  ✓ Model loaded and ready\")"
    ]
   },
@@ -233,13 +234,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "id": "8e616290",
    "metadata": {},
    "outputs": [],
    "source": [
     "stream = client.chat.completions.create(\n",
-    "    model=manager.get_model_info(alias).id,\n",
+    "    model=model_id,\n",
     "    messages=[{\"role\": \"user\", \"content\": \"book flight ticket from Beijing to Paris(using airport code) in 2025-12-04 to 2025-12-10 , then book hotel from 2025-12-04 to 2025-12-10 in Paris\"}],\n",
     "    tools=[{\"name\": \"booking_flight_tickets\", \"description\": \"booking flights\", \"parameters\": {\"origin_airport_code\": {\"description\": \"The name of Departure airport code\", \"type\": \"string\"}, \"destination_airport_code\": {\"description\": \"The name of Destination airport code\", \"type\": \"string\"}, \"departure_date\": {\"description\": \"The date of outbound flight\", \"type\": \"string\"}, \"return_date\": {\"description\": \"The date of return flight\", \"type\": \"string\"}}}, {\"name\": \"booking_hotels\", \"description\": \"booking hotel\", \"parameters\": {\"destination\": {\"description\": \"The name of the city\", \"type\": \"string\"}, \"check_in_date\": {\"description\": \"The date of check in\", \"type\": \"string\"}, \"checkout_date\": {\"description\": \"The date of check out\", \"type\": \"string\"}}}],\n",
     "    temperature=0.00001,\n",
@@ -295,13 +296,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "id": "2d5ed823",
    "metadata": {},
    "outputs": [],
    "source": [
     "stream = client.chat.completions.create(\n",
-    "    model=manager.get_model_info(alias).id,\n",
+    "    model=model_id,\n",
     "    messages=[{\"role\": \"user\", \"content\": \"What is the weather today in Paris?\"}],\n",
     "    tools=[\n",
     "      {\n",

From bf1b5ca5e1ed420adb525d84d54e133a10349b01 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 09:27:55 -0700
Subject: [PATCH 10/13] update

---
 .../agent-framework/src/app/demos/code_reviewer.py     |  2 +-
 .../python/agent-framework/src/app/demos/math_agent.py |  2 +-
 .../src/app/demos/multi_agent_debate.py                |  2 +-
 .../src/app/demos/sentiment_analyzer.py                |  2 +-
 .../agent-framework/src/app/demos/weather_tools.py     |  2 +-
 samples/python/agent-framework/src/app/web.py          | 10 ++++++++--
 6 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/samples/python/agent-framework/src/app/demos/code_reviewer.py b/samples/python/agent-framework/src/app/demos/code_reviewer.py
index 23860448..5d36b679 100644
--- a/samples/python/agent-framework/src/app/demos/code_reviewer.py
+++ b/samples/python/agent-framework/src/app/demos/code_reviewer.py
@@ -220,7 +220,7 @@ async def run_code_review_demo(conn: FoundryConnection, prompt: str) -> dict:
     id="code_reviewer",
     name="Code Reviewer",
     description="Code analysis agent that checks style, complexity, potential bugs, and suggests improvements.",
-    icon="\ud83d\udc68\u200d\ud83d\udcbb",
+    icon="👨‍💻",
     category="Tool Calling",
     runner=run_code_review_demo,
     tags=["tools", "function-calling", "code-analysis", "single-agent"],
diff --git a/samples/python/agent-framework/src/app/demos/math_agent.py b/samples/python/agent-framework/src/app/demos/math_agent.py
index f2ff00c9..0c97aa11 100644
--- a/samples/python/agent-framework/src/app/demos/math_agent.py
+++ b/samples/python/agent-framework/src/app/demos/math_agent.py
@@ -193,7 +193,7 @@ async def run_math_demo(conn: FoundryConnection, prompt: str) -> dict:
     id="math_agent",
     name="Math Calculator",
     description="Precise calculation agent with tools for arithmetic, percentages, unit conversions, compound interest, and statistics.",
-    icon="\ud83d\udd22",
+    icon="🔢",
     category="Tool Calling",
     runner=run_math_demo,
     tags=["tools", "function-calling", "calculations", "single-agent"],
diff --git a/samples/python/agent-framework/src/app/demos/multi_agent_debate.py b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py
index c0ec7ddb..568bd6e8 100644
--- a/samples/python/agent-framework/src/app/demos/multi_agent_debate.py
+++ b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py
@@ -181,7 +181,7 @@ async def run_debate_demo(conn: FoundryConnection, prompt: str) -> dict:
     id="multi_agent_debate",
     name="Multi-Agent Debate",
     description="Three agents debate a topic: one argues FOR, one argues AGAINST, and a moderator declares a winner.",
-    icon="\ud83c\udfad",
+    icon="🎭",
     category="Multi-Agent",
     runner=run_debate_demo,
     tags=["multi-agent", "orchestration", "sequential", "debate"],
diff --git a/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py
index abee8526..5671713f 100644
--- a/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py
+++ b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py
@@ -238,7 +238,7 @@ async def run_sentiment_demo(conn: FoundryConnection, prompt: str) -> dict:
     id="sentiment_analyzer",
     name="Sentiment Analyzer",
     description="Text analysis agent that detects sentiment, emotions, key phrases, and word frequency.",
-    icon="\ud83d\udcac",
+    icon="💬",
     category="Tool Calling",
     runner=run_sentiment_demo,
     tags=["tools", "function-calling", "text-analysis", "single-agent"],
diff --git a/samples/python/agent-framework/src/app/demos/weather_tools.py b/samples/python/agent-framework/src/app/demos/weather_tools.py
index 89ab6579..24910085 100644
--- a/samples/python/agent-framework/src/app/demos/weather_tools.py
+++ b/samples/python/agent-framework/src/app/demos/weather_tools.py
@@ -169,7 +169,7 @@ async def run_weather_demo(conn: FoundryConnection, prompt: str) -> dict:
     id="weather_tools",
     name="Weather Tools",
     description="Multi-tool agent that provides weather information, forecasts, city comparisons, and activity recommendations.",
-    icon="\ud83c\udf24\ufe0f",
+    icon="🌤️",
     category="Tool Calling",
     runner=run_weather_demo,
     tags=["tools", "function-calling", "single-agent"],
diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py
index 31155cfe..58949d30 100644
--- a/samples/python/agent-framework/src/app/web.py
+++ b/samples/python/agent-framework/src/app/web.py
@@ -69,7 +69,10 @@ def api_run():
             return jsonify({"status": "error", "message": "Not bootstrapped"}), 503
 
         data = request.get_json(silent=True) or {}
-        question = data.get("question", "").strip()
+        question = data.get("question", "")
+        if not isinstance(question, str):
+            return jsonify({"status": "error", "message": "question must be a string"}), 400
+        question = question.strip()
         mode = data.get("mode", "full")
 
         if not question:
@@ -168,7 +171,10 @@ def api_demo_run(demo_id: str):
             return jsonify({"status": "error", "message": f"Demo '{demo_id}' not found"}), 404
 
         data = request.get_json(silent=True) or {}
-        prompt = data.get("prompt", "").strip()
+        prompt = data.get("prompt", "")
+        if not isinstance(prompt, str):
+            return jsonify({"status": "error", "message": "prompt must be a string"}), 400
+        prompt = prompt.strip()
         if not prompt:
             return jsonify({"status": "error", "message": "No prompt provided"}), 400
 

From a60d8e01e7b813de4dff7e9c8af98eff4983bde4 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 09:49:48 -0700
Subject: [PATCH 11/13] update

---
 samples/cs/whisper-transcription/README.md    |  1 +
 .../WhisperTranscription.csproj               |  3 +-
 samples/cs/whisper-transcription/nuget.config |  2 +-
 samples/js/local-cag/src/chatEngine.js        | 40 +++++++++++--------
 samples/js/local-rag/src/chatEngine.js        | 28 +++++++++----
 5 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/samples/cs/whisper-transcription/README.md b/samples/cs/whisper-transcription/README.md
index 3d13b434..bb22721c 100644
--- a/samples/cs/whisper-transcription/README.md
+++ b/samples/cs/whisper-transcription/README.md
@@ -17,6 +17,7 @@ Based on the [FLWhisper](https://github.com/leestott/FLWhisper) project.
 
 - **Windows 10/11** (ARM64 or x64)
 - **.NET 9 SDK** — [Download here](https://dotnet.microsoft.com/download/dotnet/9.0)
+- **Windows 11 SDK 10.0.26100.0 or newer** — required by the `Microsoft.AI.Foundry.Local.WinML` package used by this sample
 - **Foundry Local** — installed and on PATH
 
 ## Quick Start
diff --git a/samples/cs/whisper-transcription/WhisperTranscription.csproj b/samples/cs/whisper-transcription/WhisperTranscription.csproj
index f6d1d553..e4acba26 100644
--- a/samples/cs/whisper-transcription/WhisperTranscription.csproj
+++ b/samples/cs/whisper-transcription/WhisperTranscription.csproj
@@ -1,7 +1,8 @@
 <Project Sdk="Microsoft.NET.Sdk.Web">
 
   <PropertyGroup>
-    <TargetFramework>net9.0-windows10.0.26100</TargetFramework>
+    <TargetFramework>net9.0-windows10.0.26100.0</TargetFramework>
+    <TargetPlatformMinVersion>10.0.17763.0</TargetPlatformMinVersion>
     <Nullable>enable</Nullable>
     <ImplicitUsings>enable</ImplicitUsings>
     <Platforms>ARM64;x64</Platforms>
diff --git a/samples/cs/whisper-transcription/nuget.config b/samples/cs/whisper-transcription/nuget.config
index 5ae1c6b2..82e36490 100644
--- a/samples/cs/whisper-transcription/nuget.config
+++ b/samples/cs/whisper-transcription/nuget.config
@@ -10,7 +10,7 @@
       <package pattern="*" />
     </packageSource>
     <packageSource key="ORT">
-      <package pattern="*Foundry*" />
+      <package pattern="Microsoft.AI.Foundry.Local.WinML" />
     </packageSource>
   </packageSourceMapping>
 </configuration>
diff --git a/samples/js/local-cag/src/chatEngine.js b/samples/js/local-cag/src/chatEngine.js
index 5a8a02f2..bd45f0cf 100644
--- a/samples/js/local-cag/src/chatEngine.js
+++ b/samples/js/local-cag/src/chatEngine.js
@@ -181,38 +181,44 @@ export class ChatEngine {
 
     this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
 
-    // Collect streamed chunks via callback and yield them
+    // Buffer streamed chunks in a compacting queue so memory tracks only unread data.
     const chunks = [];
+    let head = 0;
     let resolve;
     let done = false;
 
+    const notify = () => {
+      if (resolve) {
+        const wake = resolve;
+        resolve = null;
+        wake();
+      }
+    };
+
+    const compactChunks = () => {
+      if (head > 0 && head * 2 >= chunks.length) {
+        chunks.splice(0, head);
+        head = 0;
+      }
+    };
+
     const promise = this.chatClient
       .completeStreamingChat(messages, (chunk) => {
         const content = chunk.choices?.[0]?.delta?.content;
         if (content) {
           chunks.push(content);
-          if (resolve) {
-            const r = resolve;
-            resolve = null;
-            r();
-          }
+          notify();
         }
       })
       .then(() => {
         done = true;
-        if (resolve) {
-          const r = resolve;
-          resolve = null;
-          r();
-        }
+        notify();
       });
 
-    let index = 0;
-    while (!done || index < chunks.length) {
-      if (index < chunks.length) {
-        const text = chunks[index];
-        chunks[index] = null; // release for GC
-        index++;
+    while (!done || head < chunks.length) {
+      if (head < chunks.length) {
+        const text = chunks[head++];
+        compactChunks();
         yield { type: "text", data: text };
       } else {
         await new Promise((r) => { resolve = r; });
diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js
index d640ffea..d60aebd2 100644
--- a/samples/js/local-rag/src/chatEngine.js
+++ b/samples/js/local-rag/src/chatEngine.js
@@ -177,20 +177,36 @@ export class ChatEngine {
     // 3. Stream from the local model via the SDK's callback-based streaming
     this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024;
 
-    // Buffer extracted content strings from the callback and yield as an async iterable
+    // Buffer extracted content strings in a compacting queue so memory tracks unread data.
     const textChunks = [];
+    let head = 0;
     let resolve;
     let done = false;
 
+    const notify = () => {
+      if (resolve) {
+        const wake = resolve;
+        resolve = null;
+        wake();
+      }
+    };
+
+    const compactTextChunks = () => {
+      if (head > 0 && head * 2 >= textChunks.length) {
+        textChunks.splice(0, head);
+        head = 0;
+      }
+    };
+
     const streamPromise = this.chatClient.completeStreamingChat(messages, (chunk) => {
       const content = chunk.choices?.[0]?.delta?.content;
       if (content) {
         textChunks.push(content);
-        if (resolve) { resolve(); resolve = null; }
+        notify();
       }
     }).then(() => {
       done = true;
-      if (resolve) { resolve(); resolve = null; }
+      notify();
     });
 
     // Yield sources metadata first
@@ -205,15 +221,13 @@ export class ChatEngine {
     };
 
     // Yield text chunks from the SDK streaming callback buffer
-    let head = 0;
     while (!done || head < textChunks.length) {
       if (head >= textChunks.length && !done) {
         await new Promise((r) => { resolve = r; });
       }
       while (head < textChunks.length) {
-        const text = textChunks[head];
-        textChunks[head] = null; // release for GC
-        head++;
+        const text = textChunks[head++];
+        compactTextChunks();
         yield { type: "text", data: text };
       }
     }

From 0a82b17230c26ce16a51d5f9da2595897cb36200 Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 09:52:39 -0700
Subject: [PATCH 12/13] update

---
 samples/python/functioncalling/fl_tools.ipynb | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb
index 27e35683..bae0c182 100644
--- a/samples/python/functioncalling/fl_tools.ipynb
+++ b/samples/python/functioncalling/fl_tools.ipynb
@@ -135,24 +135,25 @@
     "manager.start_service()\n",
     "print(\"  ✓ Service is running\")\n",
     "\n",
-    "# Check if any variant of this alias is already cached\n",
+    "# Check if any cached variant already satisfies this alias.\n",
     "cached_models = manager.list_cached_models()\n",
-    "cached_variant = next((m for m in cached_models if m.alias == alias), None)\n",
+    "cached_variant = next((model for model in cached_models if model.alias == alias), None)\n",
     "\n",
     "if cached_variant is not None:\n",
     "    model_id = cached_variant.id\n",
-    "    print(f\"  ✓ Model '{alias}' is already cached — skipping download\")\n",
+    "    print(f\"  ✓ Model '{alias}' is already cached as '{model_id}' — skipping download\")\n",
     "else:\n",
     "    model_info = manager.get_model_info(alias)\n",
     "    if model_info is None:\n",
     "        raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n",
+    "\n",
     "    print(f\"  Downloading model '{alias}'...\")\n",
-    "    manager.download_model(alias)\n",
-    "    model_id = model_info.id\n",
-    "    print(f\"  ✓ Download complete\")\n",
+    "    downloaded_model = manager.download_model(alias)\n",
+    "    model_id = downloaded_model.id\n",
+    "    print(f\"  ✓ Download complete: cached '{model_id}'\")\n",
     "\n",
-    "# Load the model into memory\n",
-    "print(f\"  Loading model '{alias}'...\")\n",
+    "# Load the exact cached variant into memory for deterministic behavior.\n",
+    "print(f\"  Loading model '{model_id}'...\")\n",
     "manager.load_model(model_id)\n",
     "print(f\"  ✓ Model loaded and ready\")"
    ]

From c6c1cab867be6729cd17fd1ca3409b109f43c10d Mon Sep 17 00:00:00 2001
From: Lee Stott <leestott@microsoft.com>
Date: Wed, 25 Mar 2026 10:17:14 -0700
Subject: [PATCH 13/13] update

---
 .../Middleware/ErrorHandlingMiddleware.cs     | 19 ++++++++++++-------
 samples/cs/whisper-transcription/Program.cs   | 17 ++++++++++++-----
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
index 01fe75e8..fc14fbab 100644
--- a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
+++ b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs
@@ -1,6 +1,3 @@
-using System.Net;
-using System.Text.Json;
-
 namespace WhisperTranscription;
 
 public class ErrorHandlingMiddleware
@@ -23,10 +20,18 @@ public async Task InvokeAsync(HttpContext context)
         catch (Exception ex)
         {
             _logger.LogError(ex, "Unhandled exception");
-            context.Response.StatusCode = (int)HttpStatusCode.InternalServerError;
-            context.Response.ContentType = "application/json";
-            var payload = JsonSerializer.Serialize(new { error = "An unexpected error occurred." });
-            await context.Response.WriteAsync(payload);
+
+            if (context.Response.HasStarted)
+            {
+                throw;
+            }
+
+            await Results.Problem(
+                statusCode: StatusCodes.Status500InternalServerError,
+                title: "Internal Server Error",
+                detail: "An unexpected error occurred.",
+                instance: context.Request.Path)
+                .ExecuteAsync(context);
         }
     }
 }
diff --git a/samples/cs/whisper-transcription/Program.cs b/samples/cs/whisper-transcription/Program.cs
index 5ae320a6..e47ef394 100644
--- a/samples/cs/whisper-transcription/Program.cs
+++ b/samples/cs/whisper-transcription/Program.cs
@@ -10,6 +10,7 @@
 builder.Services.AddHealthChecks()
     .AddCheck<FoundryHealthCheck>("foundry");
 builder.Services.AddEndpointsApiExplorer();
+builder.Services.AddProblemDetails();
 builder.Services.AddSwaggerGen();
 
 builder.Services.ConfigureHttpJsonOptions(options =>
@@ -64,7 +65,10 @@
 {
     if (file is null || file.Length == 0)
     {
-        return Results.BadRequest(new { error = "No audio file provided" });
+        return Results.Problem(
+            statusCode: 400,
+            title: "Invalid transcription request",
+            detail: "No audio file provided.");
     }
 
     // Save upload to temp file
@@ -80,7 +84,7 @@
         var outputFormat = format?.ToLowerInvariant() ?? "text";
         return outputFormat switch
         {
-            "json" => Results.Ok(new { text = result.Text, model = result.ModelId }),
+            "json" => Results.Ok(new TranscriptionResponse(result.Text, result.ModelId)),
             _ => Results.Text(result.Text, "text/plain"),
         };
     }
@@ -90,10 +94,13 @@
     }
 }).WithName("TranscribeAudio")
   .DisableAntiforgery()
-  .Produces(200)
-  .ProducesProblem(400)
-  .ProducesProblem(500);
+        .Produces<TranscriptionResponse>(200, "application/json")
+        .Produces<string>(200, "text/plain")
+        .ProducesProblem(400)
+        .ProducesProblem(500);
 
 app.MapFallbackToFile("index.html");
 
 app.Run();
+
+sealed record TranscriptionResponse(string Text, string Model);