From b6c9e49bb83666a70458ff6e6a98df730f13753e Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Mon, 23 Mar 2026 16:29:48 -0700 Subject: [PATCH 01/13] Improve samples with cache-awareness, add 4 new samples, fix SDK versions, and prepare repo for public sharing --- SUPPORT.md | 25 +- .../src/AudioTranscriptionExample/Program.cs | 29 +- .../src/FoundryLocalWebServer/Program.cs | 30 +- .../src/HelloFoundryLocalSdk/Program.cs | 29 +- .../src/ModelManagementExample/Program.cs | 29 +- .../src/ToolCallingFoundryLocalSdk/Program.cs | 30 +- .../Program.cs | 30 +- .../Health/FoundryHealthCheck.cs | 28 + .../Middleware/ErrorHandlingMiddleware.cs | 32 + samples/cs/whisper-transcription/Program.cs | 98 +++ samples/cs/whisper-transcription/README.md | 112 +++ .../Services/FoundryModelService.cs | 89 +++ .../Services/FoundryOptions.cs | 8 + .../Services/TranscriptionService.cs | 54 ++ .../WhisperTranscription.csproj | 22 + .../cs/whisper-transcription/appsettings.json | 14 + samples/cs/whisper-transcription/nuget.config | 16 + .../cs/whisper-transcription/wwwroot/app.js | 124 +++ .../whisper-transcription/wwwroot/index.html | 73 ++ .../whisper-transcription/wwwroot/styles.css | 126 +++ samples/js/audio-transcription-example/app.js | 30 +- .../chat-and-audio-foundry-local/package.json | 2 +- .../js/copilot-sdk-foundry-local/package.json | 2 +- .../js/copilot-sdk-foundry-local/src/app.ts | 13 +- .../src/tool-calling.ts | 13 +- .../js/electron-chat-application/package.json | 1 + .../js/langchain-integration-example/app.js | 28 +- samples/js/local-cag/README.md | 125 +++ .../js/local-cag/docs/emergency-shutdown.md | 40 + .../js/local-cag/docs/gas-leak-detection.md | 44 ++ samples/js/local-cag/docs/ppe-requirements.md | 54 ++ samples/js/local-cag/docs/pressure-testing.md | 51 ++ samples/js/local-cag/docs/valve-inspection.md | 44 ++ samples/js/local-cag/package.json | 27 + samples/js/local-cag/public/index.html | 724 ++++++++++++++++++ samples/js/local-cag/src/chatEngine.js | 222 ++++++ samples/js/local-cag/src/config.js | 35 + samples/js/local-cag/src/context.js | 301 ++++++++ samples/js/local-cag/src/modelSelector.js | 115 +++ samples/js/local-cag/src/prompts.js | 44 ++ samples/js/local-cag/src/server.js | 186 +++++ samples/js/local-rag/README.md | 143 ++++ .../js/local-rag/docs/emergency-shutdown.md | 53 ++ .../js/local-rag/docs/gas-leak-detection.md | 46 ++ samples/js/local-rag/docs/ppe-requirements.md | 50 ++ samples/js/local-rag/docs/pressure-testing.md | 52 ++ samples/js/local-rag/docs/valve-inspection.md | 59 ++ samples/js/local-rag/package.json | 31 + samples/js/local-rag/public/index.html | 698 +++++++++++++++++ samples/js/local-rag/src/chatEngine.js | 228 ++++++ samples/js/local-rag/src/chunker.js | 74 ++ samples/js/local-rag/src/config.js | 25 + samples/js/local-rag/src/ingest.js | 65 ++ samples/js/local-rag/src/prompts.js | 44 ++ samples/js/local-rag/src/server.js | 230 ++++++ samples/js/local-rag/src/vectorStore.js | 145 ++++ samples/js/native-chat-completions/app.js | 34 +- .../js/tool-calling-foundry-local/src/app.js | 13 +- samples/js/web-server-example/app.js | 28 +- samples/python/agent-framework/.env.example | 8 + samples/python/agent-framework/README.md | 132 ++++ .../data/agent_framework_guide.md | 54 ++ .../data/foundry_local_overview.md | 42 + .../data/orchestration_patterns.md | 67 ++ samples/python/agent-framework/pyproject.toml | 24 + .../python/agent-framework/requirements.txt | 6 + .../agent-framework/src/app/__init__.py | 1 + .../agent-framework/src/app/__main__.py | 85 ++ .../python/agent-framework/src/app/agents.py | 116 +++ .../agent-framework/src/app/demos/__init__.py | 20 + .../src/app/demos/code_reviewer.py | 228 ++++++ .../src/app/demos/math_agent.py | 201 +++++ .../src/app/demos/multi_agent_debate.py | 189 +++++ .../agent-framework/src/app/demos/registry.py | 42 + .../src/app/demos/sentiment_analyzer.py | 246 ++++++ .../src/app/demos/weather_tools.py | 177 +++++ .../agent-framework/src/app/documents.py | 89 +++ .../agent-framework/src/app/foundry_boot.py | 76 ++ .../agent-framework/src/app/orchestrator.py | 182 +++++ .../src/app/templates/index.html | 628 +++++++++++++++ .../agent-framework/src/app/tool_demo.py | 97 +++ samples/python/agent-framework/src/app/web.py | 179 +++++ .../agent-framework/tests/test_smoke.py | 88 +++ samples/python/functioncalling/README.md | 16 +- samples/python/functioncalling/fl_tools.ipynb | 51 +- samples/python/hello-foundry-local/README.md | 9 +- .../hello-foundry-local/requirements.txt | 2 + samples/python/hello-foundry-local/src/app.py | 38 +- samples/python/summarize/README.md | 6 + samples/python/summarize/requirements.txt | 2 +- samples/python/summarize/summarize.py | 48 +- samples/rag/README.md | 47 +- samples/rag/rag_foundrylocal_demo.ipynb | 204 +++-- 93 files changed, 8290 insertions(+), 227 deletions(-) create mode 100644 samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs create mode 100644 samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs create mode 100644 samples/cs/whisper-transcription/Program.cs create mode 100644 samples/cs/whisper-transcription/README.md create mode 100644 samples/cs/whisper-transcription/Services/FoundryModelService.cs create mode 100644 samples/cs/whisper-transcription/Services/FoundryOptions.cs create mode 100644 samples/cs/whisper-transcription/Services/TranscriptionService.cs create mode 100644 samples/cs/whisper-transcription/WhisperTranscription.csproj create mode 100644 samples/cs/whisper-transcription/appsettings.json create mode 100644 samples/cs/whisper-transcription/nuget.config create mode 100644 samples/cs/whisper-transcription/wwwroot/app.js create mode 100644 samples/cs/whisper-transcription/wwwroot/index.html create mode 100644 samples/cs/whisper-transcription/wwwroot/styles.css create mode 100644 samples/js/local-cag/README.md create mode 100644 samples/js/local-cag/docs/emergency-shutdown.md create mode 100644 samples/js/local-cag/docs/gas-leak-detection.md create mode 100644 samples/js/local-cag/docs/ppe-requirements.md create mode 100644 samples/js/local-cag/docs/pressure-testing.md create mode 100644 samples/js/local-cag/docs/valve-inspection.md create mode 100644 samples/js/local-cag/package.json create mode 100644 samples/js/local-cag/public/index.html create mode 100644 samples/js/local-cag/src/chatEngine.js create mode 100644 samples/js/local-cag/src/config.js create mode 100644 samples/js/local-cag/src/context.js create mode 100644 samples/js/local-cag/src/modelSelector.js create mode 100644 samples/js/local-cag/src/prompts.js create mode 100644 samples/js/local-cag/src/server.js create mode 100644 samples/js/local-rag/README.md create mode 100644 samples/js/local-rag/docs/emergency-shutdown.md create mode 100644 samples/js/local-rag/docs/gas-leak-detection.md create mode 100644 samples/js/local-rag/docs/ppe-requirements.md create mode 100644 samples/js/local-rag/docs/pressure-testing.md create mode 100644 samples/js/local-rag/docs/valve-inspection.md create mode 100644 samples/js/local-rag/package.json create mode 100644 samples/js/local-rag/public/index.html create mode 100644 samples/js/local-rag/src/chatEngine.js create mode 100644 samples/js/local-rag/src/chunker.js create mode 100644 samples/js/local-rag/src/config.js create mode 100644 samples/js/local-rag/src/ingest.js create mode 100644 samples/js/local-rag/src/prompts.js create mode 100644 samples/js/local-rag/src/server.js create mode 100644 samples/js/local-rag/src/vectorStore.js create mode 100644 samples/python/agent-framework/.env.example create mode 100644 samples/python/agent-framework/README.md create mode 100644 samples/python/agent-framework/data/agent_framework_guide.md create mode 100644 samples/python/agent-framework/data/foundry_local_overview.md create mode 100644 samples/python/agent-framework/data/orchestration_patterns.md create mode 100644 samples/python/agent-framework/pyproject.toml create mode 100644 samples/python/agent-framework/requirements.txt create mode 100644 samples/python/agent-framework/src/app/__init__.py create mode 100644 samples/python/agent-framework/src/app/__main__.py create mode 100644 samples/python/agent-framework/src/app/agents.py create mode 100644 samples/python/agent-framework/src/app/demos/__init__.py create mode 100644 samples/python/agent-framework/src/app/demos/code_reviewer.py create mode 100644 samples/python/agent-framework/src/app/demos/math_agent.py create mode 100644 samples/python/agent-framework/src/app/demos/multi_agent_debate.py create mode 100644 samples/python/agent-framework/src/app/demos/registry.py create mode 100644 samples/python/agent-framework/src/app/demos/sentiment_analyzer.py create mode 100644 samples/python/agent-framework/src/app/demos/weather_tools.py create mode 100644 samples/python/agent-framework/src/app/documents.py create mode 100644 samples/python/agent-framework/src/app/foundry_boot.py create mode 100644 samples/python/agent-framework/src/app/orchestrator.py create mode 100644 samples/python/agent-framework/src/app/templates/index.html create mode 100644 samples/python/agent-framework/src/app/tool_demo.py create mode 100644 samples/python/agent-framework/src/app/web.py create mode 100644 samples/python/agent-framework/tests/test_smoke.py create mode 100644 samples/python/hello-foundry-local/requirements.txt diff --git a/SUPPORT.md b/SUPPORT.md index eaf439ae..0b1323a9 100644 --- a/SUPPORT.md +++ b/SUPPORT.md @@ -1,25 +1,14 @@ -# TODO: The maintainer of this repo has not yet edited this file - -**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? - -- **No CSS support:** Fill out this template with information about how to file issues and get help. -- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. -- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. - -*Then remove this first heading from this SUPPORT.MD file before publishing your repo.* - # Support -## How to file issues and get help +## How to file issues and get help -This project uses GitHub Issues to track bugs and feature requests. Please search the existing -issues before filing new issues to avoid duplicates. For new issues, file your bug or +This project uses GitHub Issues to track bugs and feature requests. Please search the existing +issues before filing new issues to avoid duplicates. For new issues, file your bug or feature request as a new Issue. -For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE -FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER -CHANNEL. WHERE WILL YOU HELP PEOPLE?**. +For help and questions about using Foundry Local, please refer to the [documentation](docs/README.md) +and the [samples](samples/) in this repository. -## Microsoft Support Policy +## Microsoft Support Policy -Support for this **PROJECT or PRODUCT** is limited to the resources listed above. +Support for Foundry Local is limited to the resources listed above. diff --git a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs b/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs index be1db5db..0049f999 100644 --- a/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs +++ b/samples/cs/GettingStarted/src/AudioTranscriptionExample/Program.cs @@ -29,21 +29,32 @@ model.SelectVariant(modelVariant); -// Download the model (the method skips download if already cached) -await model.DownloadAsync(progress => +// Download the model (check cache first) +if (!await model.IsCachedAsync()) { - Console.Write($"\rDownloading model: {progress:F2}%"); - if (progress >= 100f) + Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading..."); + await model.DownloadAsync(progress => { - Console.WriteLine(); - } -}); + var filled = (int)Math.Round(progress / 100.0 * 30); + var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled); + Console.Write($"\rDownloading: [{bar}] {progress:F1}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } + }); + Console.WriteLine("\u2713 Model downloaded"); +} +else +{ + Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download"); +} -// Load the model +// Load the model into memory Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); -Console.WriteLine("done."); +Console.WriteLine("done. \u2713 Model ready"); // Get a chat client diff --git a/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs b/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs index f50ac1b0..187feaf3 100644 --- a/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs +++ b/samples/cs/GettingStarted/src/FoundryLocalWebServer/Program.cs @@ -31,21 +31,33 @@ // Get a model using an alias var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found"); -// Download the model (the method skips download if already cached) -await model.DownloadAsync(progress => + +// Check cache before downloading — skip download if model is already cached +if (!await model.IsCachedAsync()) { - Console.Write($"\rDownloading model: {progress:F2}%"); - if (progress >= 100f) + Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading..."); + await model.DownloadAsync(progress => { - Console.WriteLine(); - } -}); + var filled = (int)Math.Round(progress / 100.0 * 30); + var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled); + Console.Write($"\rDownloading: [{bar}] {progress:F1}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } + }); + Console.WriteLine("\u2713 Model downloaded"); +} +else +{ + Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download"); +} -// Load the model +// Load the model into memory Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); -Console.WriteLine("done."); +Console.WriteLine("done. \u2713 Model ready"); // Start the web service diff --git a/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs b/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs index 52efe410..a94a8514 100644 --- a/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs +++ b/samples/cs/GettingStarted/src/HelloFoundryLocalSdk/Program.cs @@ -29,20 +29,31 @@ // Get a model using an alias. var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found"); -// Download the model (the method skips download if already cached) -await model.DownloadAsync(progress => +// Check cache before downloading — skip download if model is already cached +if (!await model.IsCachedAsync()) { - Console.Write($"\rDownloading model: {progress:F2}%"); - if (progress >= 100f) + Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading..."); + await model.DownloadAsync(progress => { - Console.WriteLine(); - } -}); + var filled = (int)Math.Round(progress / 100.0 * 30); + var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled); + Console.Write($"\rDownloading: [{bar}] {progress:F1}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } + }); + Console.WriteLine("\u2713 Model downloaded"); +} +else +{ + Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download"); +} -// Load the model +// Load the model into memory Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); -Console.WriteLine("done."); +Console.WriteLine("done. \u2713 Model ready"); // Get a chat client var chatClient = await model.GetChatClientAsync(); diff --git a/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs b/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs index 2b6fe2e8..cca66c13 100644 --- a/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs +++ b/samples/cs/GettingStarted/src/ModelManagementExample/Program.cs @@ -89,18 +89,31 @@ model.SelectVariant(modelVariant); -// Download the model (the method skips download if already cached) -await model.DownloadAsync(progress => +// Download the model (check cache first) +if (!await model.IsCachedAsync()) { - Console.Write($"\rDownloading model: {progress:F2}%"); - if (progress >= 100f) + Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading..."); + await model.DownloadAsync(progress => { - Console.WriteLine(); - } -}); + var filled = (int)Math.Round(progress / 100.0 * 30); + var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled); + Console.Write($"\rDownloading: [{bar}] {progress:F1}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } + }); + Console.WriteLine("\u2713 Model downloaded"); +} +else +{ + Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download"); +} -// Load the model +// Load the model into memory +Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); +Console.WriteLine("done. \u2713 Model ready"); // List loaded models (i.e. in memory) from the catalog diff --git a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs index 3cdf3d38..eca12824 100644 --- a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs +++ b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalSdk/Program.cs @@ -32,22 +32,32 @@ // Get a model using an alias. var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found"); - -// Download the model (the method skips download if already cached) -await model.DownloadAsync(progress => +// Check cache before downloading — skip download if model is already cached +if (!await model.IsCachedAsync()) { - Console.Write($"\rDownloading model: {progress:F2}%"); - if (progress >= 100f) + Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading..."); + await model.DownloadAsync(progress => { - Console.WriteLine(); - } -}); + var filled = (int)Math.Round(progress / 100.0 * 30); + var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled); + Console.Write($"\rDownloading: [{bar}] {progress:F1}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } + }); + Console.WriteLine("\u2713 Model downloaded"); +} +else +{ + Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download"); +} -// Load the model +// Load the model into memory Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); -Console.WriteLine("done."); +Console.WriteLine("done. \u2713 Model ready"); // Get a chat client diff --git a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs index 6d6937fd..8882847a 100644 --- a/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs +++ b/samples/cs/GettingStarted/src/ToolCallingFoundryLocalWebServer/Program.cs @@ -33,21 +33,33 @@ // Get a model using an alias var model = await catalog.GetModelAsync("qwen2.5-0.5b") ?? throw new Exception("Model not found"); -// Download the model (the method skips download if already cached) -await model.DownloadAsync(progress => + +// Check cache before downloading — skip download if model is already cached +if (!await model.IsCachedAsync()) { - Console.Write($"\rDownloading model: {progress:F2}%"); - if (progress >= 100f) + Console.WriteLine($"Model \"{model.Id}\" not found in cache. Downloading..."); + await model.DownloadAsync(progress => { - Console.WriteLine(); - } -}); + var filled = (int)Math.Round(progress / 100.0 * 30); + var bar = new string('\u2588', filled) + new string('\u2591', 30 - filled); + Console.Write($"\rDownloading: [{bar}] {progress:F1}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } + }); + Console.WriteLine("\u2713 Model downloaded"); +} +else +{ + Console.WriteLine($"\u2713 Model \"{model.Id}\" already cached \u2014 skipping download"); +} -// Load the model +// Load the model into memory Console.Write($"Loading model {model.Id}..."); await model.LoadAsync(); -Console.WriteLine("done."); +Console.WriteLine("done. \u2713 Model ready"); // Start the web service diff --git a/samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs b/samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs new file mode 100644 index 00000000..dc53f5ec --- /dev/null +++ b/samples/cs/whisper-transcription/Health/FoundryHealthCheck.cs @@ -0,0 +1,28 @@ +using Microsoft.Extensions.Diagnostics.HealthChecks; + +namespace WhisperTranscription; + +public class FoundryHealthCheck : IHealthCheck +{ + private readonly FoundryModelService _modelService; + + public FoundryHealthCheck(FoundryModelService modelService) + { + _modelService = modelService; + } + + public async Task CheckHealthAsync( + HealthCheckContext context, + CancellationToken cancellationToken = default) + { + try + { + var model = await _modelService.GetModelAsync(); + return HealthCheckResult.Healthy($"Model available: {model.Id}"); + } + catch (Exception ex) + { + return HealthCheckResult.Unhealthy("Foundry Local unavailable", ex); + } + } +} diff --git a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs new file mode 100644 index 00000000..e44fd2e9 --- /dev/null +++ b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs @@ -0,0 +1,32 @@ +using System.Net; +using System.Text.Json; + +namespace WhisperTranscription; + +public class ErrorHandlingMiddleware +{ + private readonly RequestDelegate _next; + private readonly ILogger _logger; + + public ErrorHandlingMiddleware(RequestDelegate next, ILogger logger) + { + _next = next; + _logger = logger; + } + + public async Task InvokeAsync(HttpContext context) + { + try + { + await _next(context); + } + catch (Exception ex) + { + _logger.LogError(ex, "Unhandled exception"); + context.Response.StatusCode = (int)HttpStatusCode.InternalServerError; + context.Response.ContentType = "application/json"; + var payload = JsonSerializer.Serialize(new { error = ex.Message }); + await context.Response.WriteAsync(payload); + } + } +} diff --git a/samples/cs/whisper-transcription/Program.cs b/samples/cs/whisper-transcription/Program.cs new file mode 100644 index 00000000..f985f26c --- /dev/null +++ b/samples/cs/whisper-transcription/Program.cs @@ -0,0 +1,98 @@ +using Microsoft.AspNetCore.Mvc; +using WhisperTranscription; + +var builder = WebApplication.CreateBuilder(args); + +builder.Services.Configure( + builder.Configuration.GetSection(FoundryOptions.SectionName)); +builder.Services.AddSingleton(); +builder.Services.AddSingleton(); +builder.Services.AddHealthChecks() + .AddCheck("foundry"); +builder.Services.AddEndpointsApiExplorer(); +builder.Services.AddSwaggerGen(); + +builder.Services.ConfigureHttpJsonOptions(options => +{ + options.SerializerOptions.WriteIndented = true; + options.SerializerOptions.PropertyNamingPolicy = System.Text.Json.JsonNamingPolicy.CamelCase; +}); + +var app = builder.Build(); + +app.UseMiddleware(); +app.UseDefaultFiles(); +app.UseStaticFiles(); + +if (app.Environment.IsDevelopment()) +{ + app.UseSwagger(); + app.UseSwaggerUI(); +} + +app.MapHealthChecks("/health"); + +app.MapGet("/api/health/status", async ([FromServices] FoundryModelService modelService) => +{ + try + { + var model = await modelService.GetModelAsync(); + var isCached = await model.IsCachedAsync(); + return Results.Ok(new + { + status = "Healthy", + model = model.Id, + cached = isCached, + }); + } + catch (Exception ex) + { + return Results.Ok(new + { + status = "Degraded", + error = ex.Message, + }); + } +}).WithName("GetHealthStatus"); + +app.MapPost("/v1/audio/transcriptions", async ( + [FromServices] TranscriptionService svc, + [FromForm] IFormFile file, + [FromForm] string? model, + [FromForm] string? format) => +{ + if (file is null || file.Length == 0) + { + return Results.BadRequest(new { error = "No audio file provided" }); + } + + // Save upload to temp file + var tmp = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + Path.GetExtension(file.FileName)); + await using (var fs = File.Create(tmp)) + { + await file.CopyToAsync(fs); + } + + try + { + var result = await svc.TranscribeAsync(tmp, model); + var outputFormat = format?.ToLowerInvariant() ?? "text"; + return outputFormat switch + { + "json" => Results.Ok(new { text = result.Text, model = result.ModelId }), + _ => Results.Text(result.Text, "text/plain"), + }; + } + finally + { + try { File.Delete(tmp); } catch { /* cleanup best-effort */ } + } +}).WithName("TranscribeAudio") + .DisableAntiforgery() + .Produces(200) + .ProducesProblem(400) + .ProducesProblem(500); + +app.MapFallbackToFile("index.html"); + +app.Run(); diff --git a/samples/cs/whisper-transcription/README.md b/samples/cs/whisper-transcription/README.md new file mode 100644 index 00000000..3d13b434 --- /dev/null +++ b/samples/cs/whisper-transcription/README.md @@ -0,0 +1,112 @@ +# Whisper Transcription — Foundry Local + +An on-device audio transcription web application powered by [Foundry Local](https://foundrylocal.ai) and OpenAI Whisper models. All processing runs locally — no audio data leaves your machine. + +Based on the [FLWhisper](https://github.com/leestott/FLWhisper) project. + +## Features + +- **100% local processing** — audio never leaves your device +- **Streaming transcription** — uses the Foundry Local SDK streaming API for real-time output +- **Web UI** — drag-and-drop or file picker with audio preview +- **REST API** — OpenAI-compatible `/v1/audio/transcriptions` endpoint +- **Health checks** — built-in health endpoint for monitoring +- **Cache-aware** — skips download when the model is already cached + +## Prerequisites + +- **Windows 10/11** (ARM64 or x64) +- **.NET 9 SDK** — [Download here](https://dotnet.microsoft.com/download/dotnet/9.0) +- **Foundry Local** — installed and on PATH + +## Quick Start + +```bash +cd samples/cs/whisper-transcription + +# Restore and run +dotnet restore +dotnet run +``` + +Open **http://localhost:5000** (or the port shown in console output). + +On first launch, Foundry Local will download the Whisper model if it is not already cached. Subsequent launches will be near-instant. + +## Project Structure + +``` +whisper-transcription/ +├── Program.cs # ASP.NET Core Minimal API entry point +├── Health/ +│ └── FoundryHealthCheck.cs # Health check implementation +├── Middleware/ +│ └── ErrorHandlingMiddleware.cs # Global error handler +├── Services/ +│ ├── FoundryOptions.cs # Configuration options +│ ├── FoundryModelService.cs # Model management (cache check, download, load) +│ └── TranscriptionService.cs # Audio transcription via streaming API +├── wwwroot/ +│ ├── index.html # Web UI +│ ├── app.js # Client-side logic +│ └── styles.css # Styling +├── appsettings.json # Configuration +├── nuget.config # NuGet package sources +├── WhisperTranscription.csproj # Project file +└── README.md +``` + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| GET | `/health` | ASP.NET Core health check | +| GET | `/api/health/status` | Model status with cache info | +| POST | `/v1/audio/transcriptions` | Transcribe audio (OpenAI compatible) | +| GET | `/swagger` | Interactive API docs (dev mode) | + +### Transcription Request + +``` +POST /v1/audio/transcriptions +Content-Type: multipart/form-data +``` + +Parameters: +- `file` (required) — audio file (WAV, MP3, M4A, etc.) +- `model` (optional) — model alias (default: from config) +- `format` (optional) — `text` (default) or `json` + +## Configuration + +Edit `appsettings.json`: + +```json +{ + "Foundry": { + "ModelAlias": "whisper-tiny", + "LogLevel": "Information" + } +} +``` + +Override via environment variable: `Foundry__ModelAlias=whisper-medium` + +## How It Works + +1. **Bootstrap** — `FoundryModelService` initializes the Foundry Local runtime and registers execution providers. +2. **Model resolution** — the configured model alias is resolved from the catalog. +3. **Cache check** — `IsCachedAsync()` skips download when the model is already on disk. +4. **Download** — if not cached, the model is downloaded with progress logging. +5. **Load** — the CPU variant is selected and loaded into the inference engine. +6. **Transcription** — audio is transcribed using `TranscribeAudioStreamingAsync()` for streaming output. +7. **Response** — the full transcription text is returned as plain text or JSON. + +## Related Samples + +- [AudioTranscriptionExample](../GettingStarted/src/AudioTranscriptionExample/) — console-based Whisper transcription +- [FLWhisper](https://github.com/leestott/FLWhisper) — full-featured medical transcription app + +## License + +This sample is provided under the [MIT License](../../../LICENSE). diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs new file mode 100644 index 00000000..97f34bbe --- /dev/null +++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs @@ -0,0 +1,89 @@ +using Microsoft.AI.Foundry.Local; +using Microsoft.Extensions.Options; + +namespace WhisperTranscription; + +public class FoundryModelService +{ + private readonly ILogger _logger; + private readonly ILoggerFactory _loggerFactory; + private readonly FoundryOptions _options; + private bool _initialized; + + public FoundryModelService( + IOptions options, + ILogger logger, + ILoggerFactory loggerFactory) + { + _logger = logger; + _loggerFactory = loggerFactory; + _options = options.Value; + } + + public async Task InitializeAsync() + { + if (_initialized) return; + + _logger.LogInformation("Initializing Foundry Local Manager"); + var config = new Configuration + { + AppName = "WhisperTranscription", + LogLevel = Enum.TryParse( + _options.LogLevel, true, out var lvl) + ? lvl + : Microsoft.AI.Foundry.Local.LogLevel.Information, + }; + + await FoundryLocalManager.CreateAsync(config, _loggerFactory.CreateLogger("FoundryLocal")); + var mgr = FoundryLocalManager.Instance; + await mgr.EnsureEpsDownloadedAsync(); + _initialized = true; + } + + public async Task GetModelAsync(string? aliasOrId = null) + { + await InitializeAsync(); + var mgr = FoundryLocalManager.Instance; + var catalog = await mgr.GetCatalogAsync() + ?? throw new InvalidOperationException("Failed to get model catalog"); + + var alias = string.IsNullOrWhiteSpace(aliasOrId) ? _options.ModelAlias : aliasOrId; + var model = await catalog.GetModelAsync(alias) + ?? throw new InvalidOperationException($"Model '{alias}' not found in catalog"); + + return model; + } + + public async Task EnsureModelReadyAsync(Model model) + { + // Prefer CPU variant + var cpuVariant = model.Variants.FirstOrDefault( + v => v.Info.Runtime?.DeviceType == DeviceType.CPU); + if (cpuVariant != null) + { + model.SelectVariant(cpuVariant); + } + + // Check cache and download if needed + if (!await model.IsCachedAsync()) + { + _logger.LogInformation("Model \"{ModelId}\" not cached — downloading...", model.Id); + await model.DownloadAsync(progress => + { + if (progress % 10 == 0) + { + _logger.LogInformation("Download progress: {Progress:F0}%", progress); + } + }); + _logger.LogInformation("Model downloaded"); + } + else + { + _logger.LogInformation("Model \"{ModelId}\" already cached", model.Id); + } + + _logger.LogInformation("Loading model \"{ModelId}\"...", model.Id); + await model.LoadAsync(); + _logger.LogInformation("Model loaded and ready"); + } +} diff --git a/samples/cs/whisper-transcription/Services/FoundryOptions.cs b/samples/cs/whisper-transcription/Services/FoundryOptions.cs new file mode 100644 index 00000000..9fb875fb --- /dev/null +++ b/samples/cs/whisper-transcription/Services/FoundryOptions.cs @@ -0,0 +1,8 @@ +namespace WhisperTranscription; + +public class FoundryOptions +{ + public const string SectionName = "Foundry"; + public string ModelAlias { get; set; } = "whisper-tiny"; + public string LogLevel { get; set; } = "Information"; +} diff --git a/samples/cs/whisper-transcription/Services/TranscriptionService.cs b/samples/cs/whisper-transcription/Services/TranscriptionService.cs new file mode 100644 index 00000000..bef7001e --- /dev/null +++ b/samples/cs/whisper-transcription/Services/TranscriptionService.cs @@ -0,0 +1,54 @@ +using Microsoft.AI.Foundry.Local; + +namespace WhisperTranscription; + +public class TranscriptionService +{ + private readonly FoundryModelService _modelService; + private readonly ILogger _logger; + + public TranscriptionService( + FoundryModelService modelService, + ILogger logger) + { + _modelService = modelService; + _logger = logger; + } + + public async Task TranscribeAsync(string filePath, string? modelAlias = null) + { + var model = await _modelService.GetModelAsync(modelAlias); + await _modelService.EnsureModelReadyAsync(model); + + var audioClient = await model.GetAudioClientAsync() + ?? throw new InvalidOperationException("Failed to get audio client"); + + _logger.LogInformation("Transcribing \"{FilePath}\" with model {ModelId}", filePath, model.Id); + + // Use streaming transcription for real-time output + var textParts = new List(); + var response = audioClient.TranscribeAudioStreamingAsync(filePath, CancellationToken.None); + await foreach (var chunk in response) + { + if (!string.IsNullOrEmpty(chunk.Text)) + { + textParts.Add(chunk.Text); + } + } + + var fullText = string.Join("", textParts); + _logger.LogInformation("Transcription complete: {Length} characters", fullText.Length); + + return new TranscriptionResult + { + Text = fullText, + ModelId = model.Id, + }; + } +} + +public class TranscriptionResult +{ + public string Text { get; set; } = ""; + public string ModelId { get; set; } = ""; +} diff --git a/samples/cs/whisper-transcription/WhisperTranscription.csproj b/samples/cs/whisper-transcription/WhisperTranscription.csproj new file mode 100644 index 00000000..f6d1d553 --- /dev/null +++ b/samples/cs/whisper-transcription/WhisperTranscription.csproj @@ -0,0 +1,22 @@ + + + + net9.0-windows10.0.26100 + enable + enable + ARM64;x64 + false + None + false + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + diff --git a/samples/cs/whisper-transcription/appsettings.json b/samples/cs/whisper-transcription/appsettings.json new file mode 100644 index 00000000..63b756a1 --- /dev/null +++ b/samples/cs/whisper-transcription/appsettings.json @@ -0,0 +1,14 @@ +{ + "Foundry": { + "ModelAlias": "whisper-tiny", + "LogLevel": "Information" + }, + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft": "Warning", + "Microsoft.Hosting.Lifetime": "Information" + } + }, + "AllowedHosts": "*" +} diff --git a/samples/cs/whisper-transcription/nuget.config b/samples/cs/whisper-transcription/nuget.config new file mode 100644 index 00000000..5ae1c6b2 --- /dev/null +++ b/samples/cs/whisper-transcription/nuget.config @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/samples/cs/whisper-transcription/wwwroot/app.js b/samples/cs/whisper-transcription/wwwroot/app.js new file mode 100644 index 00000000..7c2bbf81 --- /dev/null +++ b/samples/cs/whisper-transcription/wwwroot/app.js @@ -0,0 +1,124 @@ +const state = { file: null, fileName: null }; + +function bindUpload() { + const fileInput = document.getElementById('fileInput'); + const chooseBtn = document.getElementById('chooseFileBtn'); + const dropzone = document.getElementById('dropzone'); + + chooseBtn.addEventListener('click', () => fileInput.click()); + + fileInput.addEventListener('change', (e) => { + const file = e.target.files[0]; + if (file) loadFile(file); + }); + + dropzone.addEventListener('dragover', (e) => { e.preventDefault(); dropzone.classList.add('drag-over'); }); + dropzone.addEventListener('dragleave', () => dropzone.classList.remove('drag-over')); + dropzone.addEventListener('drop', (e) => { + e.preventDefault(); + dropzone.classList.remove('drag-over'); + const file = e.dataTransfer.files[0]; + if (file) loadFile(file); + }); +} + +function loadFile(file) { + state.file = file; + state.fileName = file.name; + + const preview = document.getElementById('previewSection'); + const nameEl = document.getElementById('fileName'); + const player = document.getElementById('audioPlayer'); + + nameEl.textContent = `${file.name} (${(file.size / 1024).toFixed(1)} KB)`; + player.src = URL.createObjectURL(file); + player.load(); + preview.style.display = 'block'; + document.getElementById('transcribeBtn').disabled = false; +} + +async function transcribe() { + const statusEl = document.getElementById('transcribeStatus'); + const btn = document.getElementById('transcribeBtn'); + + if (!state.file) { + statusEl.textContent = 'Please select an audio file first.'; + statusEl.classList.add('error'); + return; + } + + statusEl.textContent = 'Transcribing\u2026'; + statusEl.classList.remove('error'); + btn.disabled = true; + + const format = document.getElementById('formatSelect').value; + const form = new FormData(); + form.append('file', state.file, state.fileName); + form.append('format', format); + + try { + const res = await fetch('/v1/audio/transcriptions', { method: 'POST', body: form }); + if (!res.ok) { + const txt = await res.text(); + throw new Error(txt || `HTTP ${res.status}`); + } + + if (format === 'json') { + const data = await res.json(); + renderResult(JSON.stringify(data, null, 2)); + } else { + const text = await res.text(); + renderResult(text); + } + statusEl.textContent = 'Done — transcription complete.'; + } catch (err) { + statusEl.textContent = `Error: ${err.message}`; + statusEl.classList.add('error'); + } finally { + btn.disabled = false; + } +} + +function renderResult(text) { + const resultEl = document.getElementById('resultText'); + const copyBtn = document.getElementById('copyBtn'); + resultEl.textContent = text; + copyBtn.style.display = 'inline-block'; +} + +function setupCopyButton() { + const copyBtn = document.getElementById('copyBtn'); + const resultEl = document.getElementById('resultText'); + copyBtn.addEventListener('click', async () => { + try { + await navigator.clipboard.writeText(resultEl.textContent); + const orig = copyBtn.textContent; + copyBtn.textContent = 'Copied!'; + copyBtn.classList.add('success'); + setTimeout(() => { copyBtn.textContent = orig; copyBtn.classList.remove('success'); }, 2000); + } catch { alert('Failed to copy'); } + }); +} + +async function checkHealth() { + try { + const res = await fetch('/api/health/status'); + if (res.ok) { + const data = await res.json(); + document.getElementById('stat-status').textContent = data.status || 'Unknown'; + document.getElementById('stat-model').textContent = data.model || '—'; + document.getElementById('stat-cached').textContent = data.cached ? 'Yes' : 'No'; + } else { + document.getElementById('stat-status').textContent = 'Degraded'; + } + } catch { + document.getElementById('stat-status').textContent = 'Offline'; + } +} + +document.addEventListener('DOMContentLoaded', () => { + bindUpload(); + setupCopyButton(); + document.getElementById('transcribeBtn').addEventListener('click', transcribe); + checkHealth(); +}); diff --git a/samples/cs/whisper-transcription/wwwroot/index.html b/samples/cs/whisper-transcription/wwwroot/index.html new file mode 100644 index 00000000..df66b2e2 --- /dev/null +++ b/samples/cs/whisper-transcription/wwwroot/index.html @@ -0,0 +1,73 @@ + + + + + + Whisper Transcription — Foundry Local + + + +
+
+

Whisper Transcription

+

On-device audio transcription powered by Foundry Local + OpenAI Whisper.

+
+
+ + Model +
+
+ + Service +
+
+ + Cached +
+
+
+
+
+
+
+ +
+
+

Upload Audio

+
+ + +

Drag & drop or pick a WAV, MP3, or M4A file.

+
+ +
+

Options

+
+ + +
+
+ +
+
+ +
+
+

Transcription Result

+ +
+

+    
+
+ +
Whisper Transcription · Foundry Local — all processing on device.
+ + + diff --git a/samples/cs/whisper-transcription/wwwroot/styles.css b/samples/cs/whisper-transcription/wwwroot/styles.css new file mode 100644 index 00000000..ab6224df --- /dev/null +++ b/samples/cs/whisper-transcription/wwwroot/styles.css @@ -0,0 +1,126 @@ +:root { + --bg: #f5fbff; + --card: #ffffff; + --border: #d6e4f0; + --accent: #2d8cff; + --accent-dark: #1b6dd8; + --teal: #16a6b6; + --text: #1d2a35; + --muted: #6c7a89; + --shadow: 0 20px 45px rgba(37, 99, 235, 0.12); +} + +body { + margin: 0; + font-family: 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; + background: var(--bg); + color: var(--text); +} + +.hero { + display: flex; + justify-content: space-between; + align-items: center; + padding: 40px 60px; + background: linear-gradient(135deg, #2193b0, #6dd5ed); + color: #fff; + box-shadow: var(--shadow); +} + +.hero-content h1 { margin: 0 0 8px; font-size: 2.5rem; } +.hero-content p { margin: 0; opacity: 0.9; } + +.hero-stats { display: flex; gap: 24px; margin-top: 24px; } +.stat { display: flex; flex-direction: column; } +.stat-number { font-size: 1.4rem; font-weight: 600; } +.stat-label { font-size: 0.8rem; color: rgba(255,255,255,0.7); } + +.hero-illustration { position: relative; width: 120px; height: 60px; } +.waveform { + width: 100%; height: 4px; background: #fff; border-radius: 2px; + position: absolute; top: 50%; + animation: pulse 2s infinite; +} +@keyframes pulse { + 0% { box-shadow: 0 0 0 0 rgba(255,255,255,0.6); } + 70% { box-shadow: 0 0 0 10px rgba(255,255,255,0); } + 100% { box-shadow: 0 0 0 0 rgba(255,255,255,0); } +} + +.main { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 24px; + padding: 32px 48px 60px; +} + +.panel { + background: var(--card); + border-radius: 16px; + padding: 24px; + box-shadow: var(--shadow); + border: 1px solid var(--border); + min-height: 360px; +} + +.upload-area { + border: 2px dashed var(--accent); + border-radius: 12px; + padding: 24px; + text-align: center; + margin-bottom: 20px; + background: rgba(45,140,255,0.05); + transition: background 0.2s; +} +.upload-area.drag-over { background: rgba(45,140,255,0.15); } + +.btn { + padding: 10px 18px; border: none; border-radius: 8px; + cursor: pointer; font-weight: 600; +} +.btn.primary { background: var(--accent); color: #fff; } +.btn.accent { background: var(--teal); color: #fff; } +.btn.secondary { background: var(--border); color: var(--text); transition: all 0.2s; } +.btn.secondary:hover { background: var(--accent); color: #fff; } +.btn.secondary.success { background: #10b981; color: #fff; } +.btn:disabled { opacity: 0.5; cursor: not-allowed; } + +.hint { color: var(--muted); font-size: 0.9rem; } +.file-name { font-weight: 600; margin: 0 0 8px; } + +.preview { margin-top: 16px; } +.preview audio { width: 100%; } + +.transcription-options { + margin-top: 16px; padding: 16px; background: #f9fbff; + border-radius: 8px; border: 1px solid var(--border); +} +.transcription-options h3 { margin: 0 0 12px; font-size: 1rem; } +.option-group { margin-bottom: 12px; } +.format-select { + width: 100%; padding: 10px 12px; margin-top: 6px; + border: 2px solid var(--border); border-radius: 8px; + background: var(--card); color: var(--text); font-size: 0.95rem; + cursor: pointer; +} +.format-select:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 3px rgba(45,140,255,0.1); } + +.status { margin-top: 12px; font-size: 0.9rem; color: var(--muted); } +.status.error { color: #ef4444; } + +.result-panel { display: flex; flex-direction: column; } +.result-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 16px; } +.result-header h2 { margin: 0; } +.result-text { + flex: 1; background: #f9fbff; border-radius: 12px; padding: 16px; + overflow: auto; max-height: 400px; border: 1px solid var(--border); + white-space: pre-wrap; word-break: break-word; +} + +.footer { text-align: center; padding: 16px 0 24px; color: var(--muted); font-size: 0.9rem; } + +@media (max-width: 960px) { + .main { grid-template-columns: 1fr; } + .hero { flex-direction: column; text-align: center; padding: 24px; } + .hero-stats { justify-content: center; } +} diff --git a/samples/js/audio-transcription-example/app.js b/samples/js/audio-transcription-example/app.js index fe441d1b..26395dbf 100644 --- a/samples/js/audio-transcription-example/app.js +++ b/samples/js/audio-transcription-example/app.js @@ -10,21 +10,29 @@ const manager = FoundryLocalManager.create({ console.log('✓ SDK initialized successfully'); // Get the model object -const modelAlias = 'whisper-tiny'; // Using an available model from the list above +const modelAlias = 'whisper-tiny'; let model = await manager.catalog.getModel(modelAlias); console.log(`Using model: ${model.id}`); -// Download the model -console.log(`\nDownloading model ${modelAlias}...`); -await model.download((progress) => { - process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`); -}); -console.log('\n✓ Model downloaded'); - -// Load the model -console.log(`\nLoading model ${modelAlias}...`); +// Check cache before downloading — skip download if model is already cached +if (!model.isCached) { + console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`); + await model.download((progress) => { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled); + process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); + }); + console.log('✓ Model downloaded'); +} else { + console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`); +} + +// Load the model into memory +console.log(`Loading model ${modelAlias}...`); await model.load(); -console.log('✓ Model loaded'); +console.log('✓ Model loaded and ready'); // Create audio client console.log('\nCreating audio client...'); diff --git a/samples/js/chat-and-audio-foundry-local/package.json b/samples/js/chat-and-audio-foundry-local/package.json index a91ecda3..339d2956 100644 --- a/samples/js/chat-and-audio-foundry-local/package.json +++ b/samples/js/chat-and-audio-foundry-local/package.json @@ -6,6 +6,6 @@ "start": "node src/app.js" }, "dependencies": { - "foundry-local-sdk": "latest" + "foundry-local-sdk": "^0.5.1" } } diff --git a/samples/js/copilot-sdk-foundry-local/package.json b/samples/js/copilot-sdk-foundry-local/package.json index d01a25a9..f427f992 100644 --- a/samples/js/copilot-sdk-foundry-local/package.json +++ b/samples/js/copilot-sdk-foundry-local/package.json @@ -9,7 +9,7 @@ }, "dependencies": { "@github/copilot-sdk": "latest", - "foundry-local-sdk": "latest", + "foundry-local-sdk": "^0.5.1", "zod": "^3.0.0" }, "devDependencies": { diff --git a/samples/js/copilot-sdk-foundry-local/src/app.ts b/samples/js/copilot-sdk-foundry-local/src/app.ts index c7c7966a..459f0096 100644 --- a/samples/js/copilot-sdk-foundry-local/src/app.ts +++ b/samples/js/copilot-sdk-foundry-local/src/app.ts @@ -61,7 +61,18 @@ async function main() { }); model = await manager.catalog.getModel(alias); - await model.download(); + if (!model.isCached) { + console.log(`Model "${alias}" not in cache. Downloading...`); + await model.download((progress: number) => { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled); + process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); + }); + } else { + console.log(`\u2713 Model "${alias}" already cached \u2014 skipping download`); + } await model.load(); console.log(`Model: ${model.id}`); diff --git a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts index 3e41748c..039849cf 100644 --- a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts +++ b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts @@ -140,7 +140,18 @@ async function main() { }); model = await manager.catalog.getModel(alias); - await model.download(); + if (!model.isCached) { + console.log(`Model "${alias}" not in cache. Downloading...`); + await model.download((progress: number) => { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const bar = '\u2588'.repeat(filled) + '\u2591'.repeat(barWidth - filled); + process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); + }); + } else { + console.log(`\u2713 Model "${alias}" already cached \u2014 skipping download`); + } await model.load(); console.log(`Model: ${model.id}`); diff --git a/samples/js/electron-chat-application/package.json b/samples/js/electron-chat-application/package.json index 29ccd2b7..07dc5282 100644 --- a/samples/js/electron-chat-application/package.json +++ b/samples/js/electron-chat-application/package.json @@ -19,6 +19,7 @@ "electron": "^34.5.8" }, "dependencies": { + "foundry-local-sdk": "^0.5.1", "highlight.js": "^11.11.1", "marked": "^15.0.6" } diff --git a/samples/js/langchain-integration-example/app.js b/samples/js/langchain-integration-example/app.js index 94e0afdc..a6c7d980 100644 --- a/samples/js/langchain-integration-example/app.js +++ b/samples/js/langchain-integration-example/app.js @@ -15,20 +15,28 @@ const manager = FoundryLocalManager.create({ console.log('✓ SDK initialized successfully'); // Get the model object -const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above +const modelAlias = 'qwen2.5-0.5b'; const model = await manager.catalog.getModel(modelAlias); -// Download the model -console.log(`\nDownloading model ${modelAlias}...`); -await model.download((progress) => { - process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`); -}); -console.log('\n✓ Model downloaded'); +// Check cache before downloading — skip download if model is already cached +if (!model.isCached) { + console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`); + await model.download((progress) => { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled); + process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); + }); + console.log('✓ Model downloaded'); +} else { + console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`); +} -// Load the model -console.log(`\nLoading model ${modelAlias}...`); +// Load the model into memory +console.log(`Loading model ${modelAlias}...`); await model.load(); -console.log('✓ Model loaded'); +console.log('✓ Model loaded and ready'); // Start the web service console.log('\nStarting web service...'); diff --git a/samples/js/local-cag/README.md b/samples/js/local-cag/README.md new file mode 100644 index 00000000..ece03869 --- /dev/null +++ b/samples/js/local-cag/README.md @@ -0,0 +1,125 @@ +# Local CAG – Context-Augmented Generation with Foundry Local + +A fully offline **Context-Augmented Generation (CAG)** sample application that runs an AI support agent entirely on-device using [Foundry Local](https://foundrylocal.ai). + +## What is CAG? + +CAG (Context-Augmented Generation) pre-loads **all** domain documents at startup and injects them into the AI prompt — no vector database, no embeddings, no retrieval step. This makes it ideal for: + +- **Offline / air-gapped** environments (e.g., field operations) +- **Small-to-medium knowledge bases** (dozens of documents) +- **Low-latency responses** — no retrieval round-trip +- **Simple deployment** — no external dependencies beyond Foundry Local + +## Architecture + +``` +┌─────────────┐ ┌──────────────────┐ ┌────────────────────┐ +│ Browser UI │───▶│ Express Server │───▶│ Foundry Local SDK │ +│ (index.html)│◀───│ (server.js) │◀───│ (in-process) │ +└─────────────┘ └──────────────────┘ └────────────────────┘ + │ │ + ┌──────┴───────┐ ┌──────┴───────┐ + │ ChatEngine │ │ Model (SLM) │ + │ + Context │ │ qwen / phi │ + └──────────────┘ └──────────────┘ +``` + +1. **Startup**: All markdown documents in `docs/` are loaded into memory. +2. **Model selection**: The SDK auto-selects the best model for the device's RAM. +3. **Query**: Each user question is matched to the most relevant docs via keyword scoring, then injected into the prompt alongside the question. +4. **Inference**: Foundry Local runs the model in-process — no HTTP server needed. + +## Prerequisites + +- **Node.js 20+** +- **Foundry Local** installed — see [foundrylocal.ai](https://foundrylocal.ai) + +## Quick Start + +```bash +# Install dependencies +npm install + +# Start the server +npm start +``` + +Open [http://localhost:3000](http://localhost:3000) in your browser. The UI shows real-time progress as the model loads. + +## Configuration + +Set these environment variables (all optional): + +| Variable | Default | Description | +|----------|---------|-------------| +| `FOUNDRY_MODEL` | *(auto-select)* | Force a specific model alias (e.g., `phi-3.5-mini`) | +| `PORT` | `3000` | Server port | +| `HOST` | `localhost` | Server bind address | + +## Adding Domain Documents + +Place markdown files in the `docs/` folder with YAML front-matter: + +```markdown +--- +title: Your Document Title +category: Safety +id: unique-doc-id +--- + +# Your Document Title + +Content goes here... +``` + +The engine loads all `.md` files at startup and makes them available to the AI. + +## Project Structure + +``` +local-cag/ +├── package.json +├── README.md +├── docs/ # Domain knowledge (markdown with front-matter) +│ ├── gas-leak-detection.md +│ ├── emergency-shutdown.md +│ ├── pressure-testing.md +│ ├── ppe-requirements.md +│ └── valve-inspection.md +├── public/ +│ └── index.html # Web UI with loading overlay + chat +└── src/ + ├── server.js # Express server with SSE status + chat endpoints + ├── chatEngine.js # CAG engine: SDK init, model selection, inference + ├── config.js # Configuration (env vars + defaults) + ├── context.js # Document loading, parsing, keyword-based selection + ├── modelSelector.js # Dynamic model selection based on device RAM + └── prompts.js # System prompts (full + compact/edge mode) +``` + +## Key Features + +- **Dynamic model selection** — automatically picks the best model for the device's available RAM +- **Cache-aware** — skips download if the model is already in the Foundry cache +- **Edge mode** — toggle compact prompts for smaller models or constrained devices +- **SSE progress** — real-time loading status streamed to the browser +- **Keyword-based doc selection** — only the most relevant documents are injected per query +- **No internet required** — fully offline after initial model download + +## How It Differs From RAG + +| Feature | CAG (this sample) | RAG | +|---------|-------------------|-----| +| Document loading | All at startup | On-demand retrieval | +| Vector database | Not needed | Required | +| Embeddings | Not needed | Required | +| Latency | Lower (no retrieval) | Higher (search + retrieve) | +| Knowledge base size | Small–medium | Any size | +| Complexity | Simpler | More complex | + +## Learn More + +- [Foundry Local Documentation](https://foundrylocal.ai) +- [Foundry Local SDK (npm)](https://www.npmjs.com/package/foundry-local-sdk) +- [RAG sample](../local-rag/) — for larger knowledge bases with vector retrieval diff --git a/samples/js/local-cag/docs/emergency-shutdown.md b/samples/js/local-cag/docs/emergency-shutdown.md new file mode 100644 index 00000000..71200687 --- /dev/null +++ b/samples/js/local-cag/docs/emergency-shutdown.md @@ -0,0 +1,40 @@ +--- +title: Emergency Shutdown Procedures +category: Safety +id: emergency-shutdown +--- + +# Emergency Shutdown Procedures + +## Safety Warning +**An emergency shutdown (ESD) is a last-resort action.** Only initiate an ESD when there is an immediate threat to life, equipment, or the environment. Follow your site-specific ESD plan. + +## When to Initiate ESD +- Uncontrolled gas release exceeding 50% LEL +- Fire or explosion on site +- Equipment failure causing uncontrolled flow +- Structural collapse near pressurised systems +- Toxic gas detection above STEL (Short-Term Exposure Limit) + +## Procedure +1. **Sound the alarm** — activate the nearest emergency alarm point. +2. **Initiate ESD** — press the Emergency Shutdown button at the control panel. +3. **Isolate** — close the master isolation valve(s) upstream. +4. **Evacuate** — direct all personnel to the designated muster point. +5. **Account** — perform a headcount at the muster point. +6. **Notify** — call emergency services and the site supervisor. +7. **Do NOT re-enter** until the all-clear is given by the Incident Commander. + +## Post-ESD Actions +- Complete the ESD incident report within 4 hours. +- Inspect all equipment before restarting. +- Conduct a root-cause analysis within 48 hours. +- Brief all affected personnel before resuming operations. + +## ESD System Components +| Component | Location | Function | +|-----------|----------|----------| +| ESD Push Button | Control room, wellheads | Initiates shutdown sequence | +| Master Isolation Valve | Pipeline inlet | Stops gas flow to facility | +| Blowdown Valve | Process vessels | Depressurises equipment safely | +| Fire & Gas Panel | Control room | Monitors detectors, triggers alarms | diff --git a/samples/js/local-cag/docs/gas-leak-detection.md b/samples/js/local-cag/docs/gas-leak-detection.md new file mode 100644 index 00000000..59f15acd --- /dev/null +++ b/samples/js/local-cag/docs/gas-leak-detection.md @@ -0,0 +1,44 @@ +--- +title: Gas Leak Detection Procedures +category: Safety +id: gas-leak-detection +--- + +# Gas Leak Detection Procedures + +## Safety Warning +**Always assume a gas leak is dangerous until proven otherwise.** Evacuate the area if concentration exceeds 20% LEL (Lower Explosive Limit). Do NOT operate electrical equipment in suspected leak zones. + +## Detection Methods + +### Portable Gas Detector +1. Calibrate detector before each shift using known calibration gas. +2. Turn on and allow 60-second warm-up period. +3. Hold sensor 2–5 cm from suspected leak point. +4. Move slowly along pipe runs, joints, valves, and fittings. +5. Record readings at each test point on the inspection form. + +### Soap Bubble Test +1. Apply leak-detection fluid to joints and connections. +2. Observe for 30 seconds — bubbles indicate a leak. +3. Mark leak location with approved marker tape. +4. Do NOT use soap solution near high-temperature surfaces. + +### Ultrasonic Leak Detection +- Suitable for pressurised systems above 50 psi. +- Point the sensor at suspected areas and listen for high-frequency noise. +- Effective range: up to 15 metres in quiet environments. + +## Response Procedure +1. **Isolate** the gas supply upstream of the detected leak. +2. **Ventilate** the area — open doors, windows, or activate forced ventilation. +3. **Evacuate** if concentration exceeds 20% LEL. +4. **Report** the leak to the site supervisor and log in the incident system. +5. **Repair** only after the area is confirmed safe by a gas-free certificate. + +## PPE Requirements +- Flame-resistant clothing (FRC) +- Gas detector (personal monitor) +- Safety glasses with side shields +- Steel-toe boots +- Hard hat diff --git a/samples/js/local-cag/docs/ppe-requirements.md b/samples/js/local-cag/docs/ppe-requirements.md new file mode 100644 index 00000000..95a9b68d --- /dev/null +++ b/samples/js/local-cag/docs/ppe-requirements.md @@ -0,0 +1,54 @@ +--- +title: PPE Requirements for Gas Field Operations +category: Safety +id: ppe-requirements +--- + +# PPE Requirements for Gas Field Operations + +## Mandatory PPE (All Areas) +All personnel entering the operational area must wear: + +- **Hard hat** — ANSI Z89.1 Type I or II +- **Safety glasses** — with side shields, ANSI Z87.1 rated +- **Steel-toe boots** — ASTM F2413 rated, minimum 6-inch height +- **Flame-resistant clothing (FRC)** — minimum ATPV 8 cal/cm² +- **High-visibility vest** — Class 2 or higher +- **Hearing protection** — when noise exceeds 85 dB TWA + +## Additional PPE by Task + +### Gas Leak Investigation +- Personal gas monitor (4-gas: LEL, O₂, CO, H₂S) +- Chemical-resistant gloves +- Respiratory protection (if H₂S or low O₂ risk) + +### Welding Operations +- Welding helmet with auto-darkening lens +- Welding gloves (leather gauntlet) +- Leather apron or welding jacket +- Fire watch equipment + +### Confined Space Entry +- Full-body harness with retrieval line +- Supplied-air breathing apparatus (SABA) or SCBA +- Communication device (intrinsically safe) +- Atmospheric monitoring (continuous) + +### Pressure Testing +- Face shield (in addition to safety glasses) +- Impact-resistant gloves +- Blast shield at test points (pneumatic tests) + +## Inspection and Maintenance +- Inspect all PPE before each use. +- Replace damaged PPE immediately — do not field-repair. +- FRC: launder per manufacturer instructions; do not use bleach. +- Gas monitors: bump-test daily; full calibrate per manufacturer schedule. +- Hard hats: replace after any significant impact or per manufacturer expiry. + +## Regulatory References +- OSHA 29 CFR 1910.132 (General PPE requirements) +- OSHA 29 CFR 1910.134 (Respiratory protection) +- NFPA 2112 (Flame-resistant garments) +- API RP 74 (Recommended practice for occupational safety) diff --git a/samples/js/local-cag/docs/pressure-testing.md b/samples/js/local-cag/docs/pressure-testing.md new file mode 100644 index 00000000..2027c67c --- /dev/null +++ b/samples/js/local-cag/docs/pressure-testing.md @@ -0,0 +1,51 @@ +--- +title: Pressure Testing Procedures +category: Maintenance +id: pressure-testing +--- + +# Pressure Testing Procedures + +## Overview +Pressure testing verifies the integrity of pipelines, vessels, and fittings after installation, repair, or modification. Tests must be performed before commissioning or returning equipment to service. + +## Safety Warning +**Never exceed the maximum allowable working pressure (MAWP) of any component in the test section.** Ensure all personnel are at a safe distance during pressurisation. + +## Types of Pressure Tests + +### Hydrostatic Test (Water) +- **Preferred method** for most applications. +- Test medium: clean water (potable or treated). +- Test pressure: 1.5 × MAWP (held for minimum 30 minutes). +- Advantages: incompressible medium, lower stored energy. + +### Pneumatic Test (Air/Nitrogen) +- Used when water is impractical (e.g., sub-zero temperatures). +- Test pressure: 1.1 × MAWP (held for minimum 10 minutes). +- **Higher risk** due to stored energy — requires additional safety precautions. +- Mandatory exclusion zone: minimum 15 metres. + +## Procedure +1. **Isolate** the test section with rated blinds or isolation valves. +2. **Inspect** all connections, flanges, and fittings visually. +3. **Install** calibrated pressure gauges at both ends of the test section. +4. **Fill** the section with test medium, venting air from high points. +5. **Pressurise** gradually in 25% increments to test pressure. +6. **Hold** at test pressure for the required duration. +7. **Inspect** for leaks at all joints, welds, and connections. +8. **Record** start pressure, end pressure, temperature, and duration. +9. **Depressurise** gradually and remove test equipment. +10. **Document** results on the pressure test certificate. + +## Acceptance Criteria +- No visible leaks at any point. +- Pressure drop ≤ 1% over the hold period (accounting for temperature). +- All gauges agree within calibration tolerance. + +## Required Equipment +- Calibrated pressure gauges (2 minimum) +- Test pump (manual or powered) +- Isolation blinds or valves +- Bleed valves at high and low points +- Pressure test certificate forms diff --git a/samples/js/local-cag/docs/valve-inspection.md b/samples/js/local-cag/docs/valve-inspection.md new file mode 100644 index 00000000..dfcae228 --- /dev/null +++ b/samples/js/local-cag/docs/valve-inspection.md @@ -0,0 +1,44 @@ +--- +title: Valve Inspection and Maintenance +category: Maintenance +id: valve-inspection +--- + +# Valve Inspection and Maintenance + +## Overview +Regular valve inspection ensures safe, reliable gas flow control. All valves in the system must be inspected per the maintenance schedule and after any abnormal operating event. + +## Inspection Schedule +| Valve Type | Routine Interval | Post-Event | +|------------|-----------------|------------| +| Gate valve | 12 months | After ESD or overpressure | +| Ball valve | 12 months | After ESD or overpressure | +| Check valve | 6 months | After reverse-flow incident | +| Relief valve | 12 months | After any lift event | +| Control valve | 6 months | After erratic operation | + +## Visual Inspection Checklist +1. Check for external corrosion or coating damage. +2. Inspect stem packing for leaks (apply soap solution). +3. Verify handwheel or actuator is secure and operable. +4. Check flange bolts for tightness (torque wrench). +5. Inspect body and bonnet for cracks or deformation. +6. Verify position indicator matches actual valve position. +7. Check drain and vent plugs for tightness. + +## Operational Test +1. Cycle the valve through full open/close range. +2. Verify smooth operation — no binding or excessive torque. +3. Check for seat leakage using downstream pressure gauge. +4. For actuated valves: test fail-safe action (remove air supply). +5. Record stroke time for actuated valves. + +## Common Faults +- **Stem packing leak**: Tighten gland nuts ¼ turn. If leak persists, repack. +- **Seat leak-through**: May indicate erosion or debris. Isolate, depressurise, inspect internals. +- **Stiff operation**: Lubricate stem per manufacturer specs. Check for corrosion or scale. +- **Actuator failure**: Check air supply pressure, solenoid valve, and positioner calibration. + +## Safety Warning +**Never attempt to repair a valve under pressure.** Always isolate and depressurise the section before disassembly. Use a lock-out/tag-out (LOTO) procedure. diff --git a/samples/js/local-cag/package.json b/samples/js/local-cag/package.json new file mode 100644 index 00000000..c591ce39 --- /dev/null +++ b/samples/js/local-cag/package.json @@ -0,0 +1,27 @@ +{ + "name": "gas-field-local-cag", + "version": "2.0.0", + "description": "Offline CAG-powered support agent for gas field engineers using Foundry Local. Pre-loads domain documents into the context window — no vector database, no embeddings, no retrieval pipeline.", + "type": "module", + "scripts": { + "start": "node src/server.js", + "dev": "node --watch src/server.js", + "test": "node --test test/*.test.js" + }, + "dependencies": { + "express": "^4.21.0", + "foundry-local-sdk": "^0.5.1" + }, + "license": "MIT", + "keywords": [ + "cag", + "offline-ai", + "foundry-local", + "context-augmented-generation", + "gas-field", + "support-agent" + ], + "engines": { + "node": ">=20.0.0" + } +} diff --git a/samples/js/local-cag/public/index.html b/samples/js/local-cag/public/index.html new file mode 100644 index 00000000..01f40369 --- /dev/null +++ b/samples/js/local-cag/public/index.html @@ -0,0 +1,724 @@ + + + + + + Gas Field Support Agent + + + + +
+
+

Gas Field Support Agent

+

Preparing your offline AI assistant

+
+
+
+

Connecting to server...

+

+
+
Load domain documents
+
Initialise Foundry Local
+
Select best model for device
+
Ensure model is available
+
Load model into memory
+
+
+
+ +
+

+ 🔧 + Gas Field Support Agent +

+
+ + Connecting... +
+
+ +
+ + + + + + + + +
+ +
+
+ Gas Field Support Agent – Ready
+ Running locally on this device using Context-Aware Generation (CAG). No internet connection required.

+ All domain knowledge is pre-loaded. Ask me about: +
    +
  • Gas leak detection & safety procedures
  • +
  • Fault diagnosis & decision trees
  • +
  • Equipment maintenance & repair steps
  • +
  • Pressure testing & valve inspection
  • +
  • PPE requirements & compliance
  • +
+ Always follow your site-specific safety procedures. +
+
+ +
+ + +
+ + + + diff --git a/samples/js/local-cag/src/chatEngine.js b/samples/js/local-cag/src/chatEngine.js new file mode 100644 index 00000000..675edf10 --- /dev/null +++ b/samples/js/local-cag/src/chatEngine.js @@ -0,0 +1,222 @@ +/** + * Foundry Local chat engine – Context-Aware Generation (CAG). + * Uses the Foundry Local SDK (native bindings) to run inference + * directly in-process, with no HTTP round-trips to a local server. + * + * Architecture: CAG injects the full domain knowledge base into the + * system prompt at startup. No vector search, no embeddings, no + * retrieval step at query time. + */ +import { FoundryLocalManager } from "foundry-local-sdk"; +import { config } from "./config.js"; +import { selectBestModel } from "./modelSelector.js"; +import { SYSTEM_PROMPT, SYSTEM_PROMPT_COMPACT } from "./prompts.js"; +import { + loadDocuments, + buildDomainContext, + buildCompactContext, + findRelevantDocs, + buildSelectedContext, + buildDocumentIndex, + listDocuments, +} from "./context.js"; + +export class ChatEngine { + constructor() { + this.chatClient = null; + this.modelAlias = null; + this.compactMode = false; + this.docs = []; + this.domainContext = ""; + this.compactContext = ""; + this.docIndex = ""; + } + + /** + * Initialise the engine: load domain context, start Foundry Local, load model. + * @param {function} [onProgress] – callback receiving { stage, message, progress?, model? } + */ + async init(onProgress = () => {}) { + // 1. Pre-load all domain documents into memory + onProgress({ stage: "context", message: "Loading domain documents..." }); + console.log("[ChatEngine] Loading domain context..."); + this.docs = loadDocuments(); + this.domainContext = buildDomainContext(this.docs); + this.compactContext = buildCompactContext(this.docs); + this.docIndex = buildDocumentIndex(this.docs); + console.log( + `[ChatEngine] Context loaded: ${this.docs.length} documents (${this.domainContext.length} chars).` + ); + onProgress({ stage: "context", message: `Loaded ${this.docs.length} domain documents` }); + + // 2. Initialise Foundry Local SDK (native bindings, no CLI) + onProgress({ stage: "sdk", message: "Initialising Foundry Local SDK..." }); + console.log("[ChatEngine] Initialising Foundry Local SDK..."); + const manager = FoundryLocalManager.create({ appName: "gas-field-cag" }); + + // 3. Select the best model for this device (or use the forced alias) + onProgress({ stage: "selecting", message: "Selecting best model for this device..." }); + const { model, reason } = await selectBestModel(manager.catalog, { + forceModel: config.model || undefined, + ramBudgetPercent: config.ramBudgetPercent, + maxModelSizeMb: config.maxModelSizeMb, + }); + this.selectionReason = reason; + onProgress({ stage: "selected", message: `Selected model: ${model.alias}`, model: model.alias }); + + // 4. Download model if not cached + if (!model.isCached) { + console.log(`[ChatEngine] Downloading model ${model.alias}...`); + onProgress({ stage: "downloading", message: `Downloading ${model.alias}...`, progress: 0, model: model.alias }); + await model.download((progress) => { + process.stdout.write(`\r[ChatEngine] Download: ${progress.toFixed(0)}%`); + onProgress({ stage: "downloading", message: `Downloading ${model.alias}...`, progress, model: model.alias }); + }); + console.log(""); + } else { + onProgress({ stage: "cached", message: `${model.alias} is already cached`, model: model.alias }); + } + + // 5. Load model into memory + onProgress({ stage: "loading", message: `Loading ${model.alias} into memory...`, model: model.alias }); + console.log(`[ChatEngine] Loading model ${model.alias} into memory...`); + await model.load(); + this.modelAlias = model.alias; + console.log(`[ChatEngine] Model loaded: ${model.id} (${model.alias})`); + + // 6. Create a ChatClient for direct in-process inference + this.chatClient = model.createChatClient(); + this.chatClient.settings.temperature = 0.1; + console.log("[ChatEngine] ChatClient ready (in-process inference)."); + onProgress({ stage: "ready", message: "Ready", model: model.alias }); + } + + /** + * Get the list of loaded domain documents. + */ + getDocuments() { + return listDocuments(this.docs); + } + + /** + * Set compact mode for extreme latency / edge devices. + */ + setCompactMode(enabled) { + this.compactMode = enabled; + console.log(`[ChatEngine] Compact mode: ${enabled ? "ON" : "OFF"}`); + } + + /** + * Build the messages array with pre-loaded context injection. + * + * Prompt structure: + * System: role + behavioural rules + * System: full domain context (pre-loaded, not retrieved) + * ...conversation history... + * User: question + */ + _buildMessages(userMessage, history = []) { + const systemPrompt = this.compactMode + ? SYSTEM_PROMPT_COMPACT + : SYSTEM_PROMPT; + + const recentHistory = history + .slice(-4) + .filter((entry) => entry && typeof entry.content === "string" && entry.content.trim()) + .map((entry) => ({ role: entry.role, content: entry.content.trim() })); + + // Select only the most relevant documents for this query + const { docs: relevant, matched, terms } = findRelevantDocs( + userMessage, + this.docs, + config.maxContextDocs, + ); + const context = this.compactMode + ? buildCompactContext(relevant) + : buildSelectedContext(relevant, userMessage, { + terms, + maxCharsPerDoc: 1600, + maxSections: 2, + }); + const contextEnvelope = matched + ? `Relevant documents for this query:\n\n${context}` + : `Available documents:\n${this.docIndex}\n\nRelevant documents for this query:\n\n${context}`; + + console.log( + `[ChatEngine] Query context: ${relevant.length} docs ` + + `(${context.length} chars) – ${relevant.map((d) => d.id).join(", ")}`, + ); + + return [ + { role: "system", content: systemPrompt }, + { + role: "system", + content: contextEnvelope, + }, + ...recentHistory, + { role: "user", content: userMessage }, + ]; + } + + /** + * Generate a response for a user query (non-streaming). + */ + async query(userMessage, history = []) { + const messages = this._buildMessages(userMessage, history); + + this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; + const response = await this.chatClient.completeChat(messages); + + return { + text: response.choices[0].message.content, + }; + } + + /** + * Generate a streaming response for a user query. + * Returns an async iterable of text chunks. + */ + async *queryStream(userMessage, history = []) { + const messages = this._buildMessages(userMessage, history); + + this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; + + // Collect streamed chunks via callback and yield them + const chunks = []; + let resolve; + let done = false; + + const promise = this.chatClient + .completeStreamingChat(messages, (chunk) => { + const content = chunk.choices?.[0]?.delta?.content; + if (content) { + chunks.push(content); + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + } + }) + .then(() => { + done = true; + if (resolve) { + const r = resolve; + resolve = null; + r(); + } + }); + + let index = 0; + while (!done || index < chunks.length) { + if (index < chunks.length) { + yield { type: "text", data: chunks[index++] }; + } else { + await new Promise((r) => { resolve = r; }); + } + } + + // Ensure the streaming promise settles + await promise; + } +} diff --git a/samples/js/local-cag/src/config.js b/samples/js/local-cag/src/config.js new file mode 100644 index 00000000..8c928df5 --- /dev/null +++ b/samples/js/local-cag/src/config.js @@ -0,0 +1,35 @@ +// Application configuration – all paths relative to project root +import { fileURLToPath } from "url"; +import path from "path"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.resolve(__dirname, ".."); + +export const config = { + // Model – set FOUNDRY_MODEL to force a specific alias (e.g. "phi-3.5-mini"). + // When left empty the app auto-selects the best model for the device. + model: process.env.FOUNDRY_MODEL || "", + + // Maximum fraction of total system RAM the model may occupy (0–1). + ramBudgetPercent: parseFloat(process.env.RAM_BUDGET) || 0.6, + + // Maximum model file size in MB. Models larger than this are skipped + // even if they fit in the RAM budget. Keeps CPU inference practical. + // Set MAX_MODEL_MB to override (e.g. MAX_MODEL_MB=10240 for 10 GB). + maxModelSizeMb: parseInt(process.env.MAX_MODEL_MB, 10) || 8192, + + // Context (CAG) + docsDir: path.join(ROOT, "docs"), + + // Maximum number of documents injected per query. All documents are + // pre-loaded at startup but only the most relevant ones are included + // in each prompt to keep context small enough for CPU inference. + maxContextDocs: parseInt(process.env.MAX_CONTEXT_DOCS, 10) || 3, + + // Server + port: parseInt(process.env.PORT, 10) || 3000, + host: "127.0.0.1", + + // UI + publicDir: path.join(ROOT, "public"), +}; diff --git a/samples/js/local-cag/src/context.js b/samples/js/local-cag/src/context.js new file mode 100644 index 00000000..18251b73 --- /dev/null +++ b/samples/js/local-cag/src/context.js @@ -0,0 +1,301 @@ +/** + * Context module for Context-Augmented Generation (CAG). + * Reads all domain documents from the docs/ folder at startup + * and provides them as pre-loaded, structured context blocks. + * + * Unlike RAG (which retrieves chunks at query time), CAG injects + * the full domain knowledge into the prompt upfront — no vector + * search, no embeddings, no retrieval step. + */ +import fs from "fs"; +import path from "path"; +import { config } from "./config.js"; + +const STOP_WORDS = new Set([ + "about", "after", "before", "could", "field", "from", "have", + "into", "local", "mode", "need", "should", "that", "them", + "there", "these", "this", "what", "when", "with", "would", "your", +]); + +function getDocContent(doc) { + return (doc.content ?? doc.body ?? "").trim(); +} + +function normalize(text) { + return String(text || "").toLowerCase(); +} + +function tokenize(text) { + return normalize(text) + .split(/[^a-z0-9]+/) + .filter((term) => term.length > 2 && !STOP_WORDS.has(term)); +} + +function uniqueTerms(text) { + return [...new Set(tokenize(text))]; +} + +function trimToLength(text, maxLength) { + if (!text || text.length <= maxLength) return text; + const slice = text.slice(0, maxLength); + const lastBreak = Math.max(slice.lastIndexOf("\n"), slice.lastIndexOf(". ")); + return `${slice.slice(0, lastBreak > 200 ? lastBreak : maxLength).trim()}\n...`; +} + +function splitSections(content) { + const lines = content.split("\n"); + const sections = []; + let heading = "Overview"; + let bodyLines = []; + + const pushSection = () => { + const body = bodyLines.join("\n").trim(); + if (!heading && !body) return; + sections.push({ + heading, + body, + text: [heading, body].filter(Boolean).join("\n"), + normalizedHeading: normalize(heading), + normalizedBody: normalize(body), + }); + }; + + for (const line of lines) { + if (/^#{1,3}\s+/.test(line)) { + pushSection(); + heading = line.trim(); + bodyLines = []; + continue; + } + bodyLines.push(line); + } + pushSection(); + return sections.filter((s) => s.body || s.heading !== "Overview"); +} + +function extractCompactContent(content) { + const lines = content.split("\n"); + const keyLines = []; + let inSafety = false; + let inProcedure = false; + + for (const line of lines) { + if (/^##\s*(safety|warning)/i.test(line)) { + inSafety = true; + inProcedure = false; + keyLines.push(line); + } else if (/^##\s*procedure/i.test(line)) { + inProcedure = true; + inSafety = false; + keyLines.push(line); + } else if (/^##\s/.test(line)) { + inSafety = false; + inProcedure = false; + } else if (inSafety || inProcedure) { + keyLines.push(line); + } + } + + if (keyLines.length > 0) return keyLines.join("\n").trim(); + return lines.filter((l) => l.trim()).slice(0, 5).join("\n"); +} + +function buildSectionText(section, maxLength) { + const heading = section.heading === "Overview" ? "" : section.heading; + return trimToLength([heading, section.body].filter(Boolean).join("\n"), maxLength); +} + +function scoreSection(section, terms) { + let score = 0; + for (const term of terms) { + if (section.normalizedHeading.includes(term)) score += 5; + if (section.normalizedBody.includes(term)) score += 2; + } + return score; +} + +function buildFocusedDocContext(doc, terms, { compact = false, maxCharsPerDoc = 1600, maxSections = 2 } = {}) { + const titleLine = `--- ${doc.title} [${doc.id}] ---`; + + if (compact) { + const compactContent = trimToLength(doc.compactContent || extractCompactContent(getDocContent(doc)), maxCharsPerDoc); + return [titleLine, compactContent].join("\n"); + } + + const sections = Array.isArray(doc.sections) && doc.sections.length > 0 + ? doc.sections + : splitSections(getDocContent(doc)); + + if (terms.length === 0) { + return [titleLine, trimToLength(getDocContent(doc), maxCharsPerDoc)].join("\n"); + } + + const ranked = sections + .map((section) => ({ section, score: scoreSection(section, terms) })) + .sort((a, b) => b.score - a.score); + + const positiveMatches = ranked.filter((e) => e.score > 0).slice(0, maxSections); + const chosen = positiveMatches.length > 0 ? positiveMatches : ranked.slice(0, 1); + + let remaining = maxCharsPerDoc; + const blocks = []; + for (const entry of chosen) { + if (remaining <= 0) break; + const sectionText = buildSectionText(entry.section, remaining); + if (!sectionText) continue; + blocks.push(sectionText); + remaining -= sectionText.length + 2; + } + + const content = blocks.join("\n\n") || trimToLength(getDocContent(doc), maxCharsPerDoc); + return [titleLine, content].join("\n"); +} + +export function buildSearchTerms(query) { + return uniqueTerms(query); +} + +export function findRelevantDocs(query, docs, maxDocs = 3) { + const terms = buildSearchTerms(query); + + if (terms.length === 0) { + return { docs: docs.slice(0, maxDocs), matched: false, terms }; + } + + const scored = docs.map((doc) => { + const searchTitle = doc.searchTitle || normalize(doc.title); + const searchCategory = doc.searchCategory || normalize(doc.category); + const searchContent = doc.searchContent || normalize(getDocContent(doc)); + let score = 0; + for (const term of terms) { + if (searchTitle.includes(term)) score += 8; + if (searchCategory.includes(term)) score += 3; + if (searchContent.includes(term)) score += 1; + } + return { doc, score }; + }); + + scored.sort((a, b) => b.score - a.score); + const selected = scored.slice(0, maxDocs).filter((e) => e.score > 0); + + return { + docs: selected.length > 0 ? selected.map((e) => e.doc) : docs.slice(0, maxDocs), + matched: selected.length > 0, + terms, + }; +} + +/** + * Parse YAML-like front-matter from a markdown document. + */ +export function parseFrontMatter(text) { + const match = text.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/); + if (!match) return { meta: {}, body: text }; + + const meta = {}; + for (const line of match[1].split("\n")) { + const idx = line.indexOf(":"); + if (idx > 0) { + meta[line.slice(0, idx).trim()] = line.slice(idx + 1).trim(); + } + } + return { meta, body: match[2] }; +} + +/** + * Load all markdown documents from the docs/ folder. + */ +export function loadDocuments() { + const docsDir = config.docsDir; + if (!fs.existsSync(docsDir)) { + console.warn(`[Context] Docs directory not found: ${docsDir}`); + return []; + } + + const files = fs.readdirSync(docsDir).filter((f) => f.endsWith(".md")).sort(); + + const docs = []; + for (const file of files) { + const raw = fs.readFileSync(path.join(docsDir, file), "utf-8"); + const { meta, body } = parseFrontMatter(raw); + const content = body.trim(); + + docs.push({ + id: meta.id || path.basename(file, ".md"), + title: meta.title || file, + category: meta.category || "General", + content, + compactContent: extractCompactContent(content), + sections: splitSections(content), + searchTitle: normalize(meta.title || file), + searchCategory: normalize(meta.category || "General"), + searchContent: normalize(content), + }); + } + + return docs; +} + +/** + * Build the full domain context block from all loaded documents. + */ +export function buildDomainContext(docs) { + if (docs.length === 0) return ""; + + const categories = new Map(); + for (const doc of docs) { + if (!categories.has(doc.category)) categories.set(doc.category, []); + categories.get(doc.category).push(doc); + } + + const sections = []; + for (const [category, categoryDocs] of categories) { + sections.push(`=== ${category} ===`); + for (const doc of categoryDocs) { + sections.push(`--- ${doc.title} [${doc.id}] ---`); + sections.push(getDocContent(doc)); + sections.push(""); + } + } + + return sections.join("\n"); +} + +/** + * Build a compact context summary for edge/constrained devices. + */ +export function buildCompactContext(docs) { + if (docs.length === 0) return ""; + + const sections = []; + for (const doc of docs) { + sections.push(`--- ${doc.title} [${doc.id}] ---`); + sections.push(doc.compactContent || extractCompactContent(getDocContent(doc))); + sections.push(""); + } + + return sections.join("\n"); +} + +/** + * Build context from a subset of selected documents. + */ +export function buildSelectedContext(docs, query = "", options = {}) { + const terms = options.terms || buildSearchTerms(query); + const sections = docs.map((doc) => buildFocusedDocContext(doc, terms, options)); + return sections.join("\n\n"); +} + +/** + * Build a short document index listing all available topics. + */ +export function buildDocumentIndex(docs) { + return docs.map((d) => `- ${d.title} [${d.id}]`).join("\n"); +} + +/** + * Get a list of loaded documents (for the /api/context endpoint). + */ +export function listDocuments(docs) { + return docs.map((d) => ({ id: d.id, title: d.title, category: d.category })); +} diff --git a/samples/js/local-cag/src/modelSelector.js b/samples/js/local-cag/src/modelSelector.js new file mode 100644 index 00000000..36d98910 --- /dev/null +++ b/samples/js/local-cag/src/modelSelector.js @@ -0,0 +1,115 @@ +/** + * Dynamic model selector – picks the best Foundry Local model for + * the current device based on available system RAM and the SDK catalogue. + * + * Selection strategy: + * 1. Enumerate all chat-completion models from the catalogue. + * 2. Exclude models that are too large for available RAM. + * 3. Rank remaining models by a quality preference order. + * 4. Boost cached models to avoid lengthy downloads. + * 5. Return the best match. + */ +import os from "os"; + +// Chat models ranked by quality for domain Q&A tasks (best first). +const QUALITY_RANK = [ + "qwen2.5-7b", + "qwen2.5-14b", + "phi-4", + "gpt-oss-20b", + "mistral-7b-v0.2", + "phi-4-mini-reasoning", + "phi-3.5-mini", + "phi-3-mini-128k", + "phi-3-mini-4k", + "qwen2.5-1.5b", + "qwen2.5-0.5b", +]; + +// Aliases to skip (not suited for domain Q&A chat) +const SKIP_ALIASES = new Set([ + "qwen2.5-coder-0.5b", + "qwen2.5-coder-1.5b", + "qwen2.5-coder-7b", + "qwen2.5-coder-14b", +]); + +/** + * Pick the best chat model from the Foundry Local catalogue that + * fits within the device's RAM budget. + * + * @param {object} catalog – FoundryLocalManager.catalog instance + * @param {object} [opts] + * @param {number} [opts.ramBudgetPercent=0.6] – fraction of total RAM + * @param {number} [opts.maxModelSizeMb=4096] – hard cap on model file size in MB + * @param {string} [opts.forceModel] – bypass selection and use this alias + * @returns {Promise<{model, reason: string}>} + */ +export async function selectBestModel(catalog, opts = {}) { + const forceAlias = opts.forceModel || process.env.FOUNDRY_MODEL; + if (forceAlias) { + const model = await catalog.getModel(forceAlias); + return { model, reason: `forced via ${opts.forceModel ? "config" : "FOUNDRY_MODEL env"}` }; + } + + const totalRamMb = os.totalmem() / (1024 * 1024); + const budgetPercent = opts.ramBudgetPercent ?? 0.6; + const budgetMb = totalRamMb * budgetPercent; + const maxSizeMb = opts.maxModelSizeMb ?? 4096; + + console.log( + `[ModelSelector] System RAM: ${(totalRamMb / 1024).toFixed(1)} GB ` + + `| Budget (${(budgetPercent * 100).toFixed(0)}%): ${(budgetMb / 1024).toFixed(1)} GB` + + ` | Max model size: ${(maxSizeMb / 1024).toFixed(1)} GB` + ); + + const allModels = await catalog.getModels(); + + // Filter to chat-completion models that fit within the RAM budget + const candidates = []; + for (const m of allModels) { + const info = m.selectedVariant?._modelInfo; + if (!info) continue; + if (info.task !== "chat-completion") continue; + if (SKIP_ALIASES.has(info.alias)) continue; + if (info.fileSizeMb > budgetMb) { + console.log(`[ModelSelector] skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > RAM budget)`); + continue; + } + if (info.fileSizeMb > maxSizeMb) { + console.log(`[ModelSelector] skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > max model size)`); + continue; + } + candidates.push({ model: m, info }); + } + + if (candidates.length === 0) { + throw new Error( + "No chat model fits within the available RAM budget " + + `(${(budgetMb / 1024).toFixed(1)} GB). ` + + "Try increasing ramBudgetPercent or freeing memory." + ); + } + + // Score each candidate: quality rank + cache bonus + const scored = candidates.map(({ model, info }) => { + const rankIndex = QUALITY_RANK.indexOf(info.alias); + const qualityScore = rankIndex >= 0 + ? (QUALITY_RANK.length - rankIndex) * 10 + : 1; + const cacheBonus = info.cached ? 5 : 0; + const score = qualityScore + cacheBonus; + return { model, info, score }; + }); + + scored.sort((a, b) => b.score - a.score); + + const best = scored[0]; + const reason = + `auto-selected (${(best.info.fileSizeMb / 1024).toFixed(1)} GB, ` + + `${best.info.cached ? "cached" : "will download"}, ` + + `rank ${scored.indexOf(best) + 1}/${scored.length})`; + + console.log(`[ModelSelector] Selected: ${best.info.alias} – ${reason}`); + return { model: best.model, reason }; +} diff --git a/samples/js/local-cag/src/prompts.js b/samples/js/local-cag/src/prompts.js new file mode 100644 index 00000000..8f1244b5 --- /dev/null +++ b/samples/js/local-cag/src/prompts.js @@ -0,0 +1,44 @@ +// Gas Field Agent – System Prompt (Context-Augmented Generation) +export const SYSTEM_PROMPT = `You are a local, offline customer services and technical support agent for gas field inspection and maintenance engineers. + +Context: +- You run entirely on-device with no internet connectivity. +- You are embedded in a field application used during live gas infrastructure inspections and repairs. +- Your responses must be accurate, concise, safety-first, and aligned with gas engineering standards and field maintenance procedures. +- You have been provided with the complete domain knowledge base as pre-loaded context. This includes approved gas engineering manuals, inspection procedures, fault codes, safety guidance, and maintenance playbooks. + +Primary Objectives: +1. Assist engineers in diagnosing issues encountered during gas field inspections. +2. Provide step-by-step repair and maintenance guidance. +3. Surface relevant safety warnings before any action. +4. Reference applicable standards, procedures, and documentation from the provided context. +5. Operate reliably in offline, constrained environments. + +Behaviour Rules: +- Always prioritise safety. If a procedure involves risk, explicitly call it out. +- Do not hallucinate procedures, measurements, tolerances, or legal requirements. +- If the answer is not present in the provided domain context, say: + "This information is not available in the local knowledge base." +- Use clear, structured responses suitable for field engineers wearing PPE. +- Prefer bullet points and numbered steps. +- Assume noisy, time-critical environments. +- Keep answers SHORT – engineers are in the field. + +Response Format: +- **Summary** (1–2 lines) +- **Safety Warnings** (if applicable) +- **Step-by-step Guidance** +- **Reference** (document name + section) + +You must only use information from the domain context provided in this conversation.`; + +// Compact prompt variant for extreme latency / edge devices +export const SYSTEM_PROMPT_COMPACT = `You are an offline gas field support agent. Safety-first. Concise answers only. + +Rules: +- Prioritise safety warnings before any action. +- Use bullet points and numbered steps. +- If info is missing from the provided context, say: "Not in local knowledge base." +- Never invent procedures, tolerances, or legal requirements. + +Format: Summary → Safety → Steps → Reference.`; diff --git a/samples/js/local-cag/src/server.js b/samples/js/local-cag/src/server.js new file mode 100644 index 00000000..0a4727cc --- /dev/null +++ b/samples/js/local-cag/src/server.js @@ -0,0 +1,186 @@ +/** + * Express server – Gas Field CAG Application. + * Serves the web UI and provides the /api/chat endpoint. + * Fully offline, connects to Foundry Local on dynamic port. + * + * Uses Context-Aware Generation (CAG): all domain knowledge is + * pre-loaded at startup and injected into the prompt — no retrieval, + * no vector search, no embeddings. + */ +import express from "express"; +import path from "path"; +import { config } from "./config.js"; +import { ChatEngine } from "./chatEngine.js"; + +const app = express(); + +// ── Security headers ── +app.use((_req, res, next) => { + res.setHeader("X-Content-Type-Options", "nosniff"); + res.setHeader("X-Frame-Options", "DENY"); + res.setHeader("Referrer-Policy", "no-referrer"); + res.setHeader("Permissions-Policy", "camera=(), microphone=(), geolocation=()"); + res.setHeader( + "Content-Security-Policy", + "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:;" + ); + next(); +}); + +app.use(express.json({ limit: "1mb" })); +app.use(express.static(config.publicDir)); + +// ── Chat engine instance ── +const engine = new ChatEngine(); + +// ── Initialisation state (broadcast to connected SSE clients) ── +let initState = { stage: "starting", message: "Starting up..." }; +const statusClients = new Set(); + +function broadcastStatus(state) { + initState = state; + const payload = `data: ${JSON.stringify(state)}\n\n`; + for (const client of statusClients) { + client.write(payload); + } +} + +// ── API: Server-Sent Events for initialisation status ── +app.get("/api/status", (_req, res) => { + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + // Send current state immediately + res.write(`data: ${JSON.stringify(initState)}\n\n`); + statusClients.add(res); + _req.on("close", () => statusClients.delete(res)); +}); + +// ── Guard: reject chat requests while model is loading ── +function requireReady(_req, res, next) { + if (initState.stage !== "ready") { + return res.status(503).json({ + error: "Model is still loading. Please wait.", + status: initState, + }); + } + next(); +} + +// ── API: Chat (non-streaming) ── +app.post("/api/chat", requireReady, async (req, res) => { + try { + const { message, history, compact } = req.body; + if (!message || typeof message !== "string") { + return res.status(400).json({ error: "message is required" }); + } + + if (compact !== undefined) engine.setCompactMode(!!compact); + + const result = await engine.query( + message, + Array.isArray(history) ? history : [] + ); + res.json(result); + } catch (err) { + console.error("[API] Error:", err.message); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ── API: Chat (streaming via SSE) ── +app.post("/api/chat/stream", requireReady, async (req, res) => { + try { + const { message, history, compact } = req.body; + if (!message || typeof message !== "string") { + return res.status(400).json({ error: "message is required" }); + } + + if (compact !== undefined) engine.setCompactMode(!!compact); + + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + + const stream = engine.queryStream( + message, + Array.isArray(history) ? history : [] + ); + + for await (const chunk of stream) { + res.write(`data: ${JSON.stringify(chunk)}\n\n`); + } + + res.write("data: [DONE]\n\n"); + res.end(); + } catch (err) { + console.error("[API] Stream error:", err.message); + res.write(`data: ${JSON.stringify({ type: "error", data: "Internal server error" })}\n\n`); + res.end(); + } +}); + +// ── API: List pre-loaded context documents ── +app.get("/api/context", (_req, res) => { + try { + const docs = engine.getDocuments(); + res.json({ docs, count: docs.length }); + } catch (err) { + console.error("[API] Context list error:", err.message); + res.status(500).json({ error: "Failed to list context documents" }); + } +}); + +// ── API: Health check ── +app.get("/api/health", (_req, res) => { + res.json({ + status: "ok", + model: engine.modelAlias, + modelSelection: engine.selectionReason, + architecture: "CAG", + }); +}); + +// ── Fallback: serve index.html for SPA ── +app.get("*", (_req, res) => { + res.sendFile(path.join(config.publicDir, "index.html")); +}); + +// ── Start server FIRST so the frontend can connect for status updates ── +async function start() { + console.log("=== Gas Field CAG – Local Support Agent ===\n"); + + const server = await new Promise((resolve, reject) => { + const candidate = app.listen(config.port, config.host, () => { + console.log(`[Server] Running at http://${config.host}:${config.port}`); + console.log("[Server] Fully offline – no outbound connections."); + console.log("[Server] Architecture: Context-Aware Generation (CAG)"); + console.log("[Server] Initialising engine – open the browser to see progress...\n"); + resolve(candidate); + }); + + candidate.once("error", (err) => { + if (err.code === "EADDRINUSE") { + console.error(`[Server] Port ${config.port} is already in use.`); + console.error("[Server] Stop the other process or set a different PORT."); + } else { + console.error("[Server] Failed to start:", err.message); + } + reject(err); + }); + }); + + try { + // Initialise engine AFTER the server is confirmed listening, broadcasting progress + await engine.init(broadcastStatus); + console.log("\n[Server] Engine ready – accepting requests.\n"); + } catch (err) { + server.close(); + throw err; + } +} + +start().catch((err) => { + console.error("Failed to start:", err); + process.exit(1); +}); diff --git a/samples/js/local-rag/README.md b/samples/js/local-rag/README.md new file mode 100644 index 00000000..0b71e307 --- /dev/null +++ b/samples/js/local-rag/README.md @@ -0,0 +1,143 @@ +# Local RAG – Retrieval-Augmented Generation with Foundry Local + +A fully offline **Retrieval-Augmented Generation (RAG)** sample application that runs an AI support agent entirely on-device using [Foundry Local](https://foundrylocal.ai). + +## What is RAG? + +RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with TF-IDF vectors, and retrieves only the most relevant chunks** at query time — no cloud APIs, no embedding models, no external vector databases. This makes it ideal for: + +- **Large knowledge bases** — scales beyond what fits in a single prompt +- **Offline / air-gapped** environments (e.g., field operations) +- **Dynamic content** — upload new documents at runtime via the web UI +- **Precise answers** — retrieval focuses the model on the most relevant content + +## Architecture + +``` +┌─────────────┐ ┌──────────────────┐ ┌────────────────────┐ +│ Browser UI │───▶│ Express Server │───▶│ Foundry Local SDK │ +│ (index.html)│◀───│ (server.js) │◀───│ (in-process) │ +└─────────────┘ └──────────────────┘ └────────────────────┘ + │ │ + ┌──────┴───────┐ ┌──────┴───────┐ + │ ChatEngine │ │ Model (SLM) │ + │ + VectorStore│ │ phi-3.5-mini │ + └──────┬───────┘ └──────────────┘ + │ + ┌──────┴───────┐ + │ SQLite DB │ + │ (TF-IDF idx) │ + └──────────────┘ +``` + +1. **Ingest**: Documents in `docs/` are chunked (200 tokens, 25-token overlap) and stored in SQLite with TF-IDF vectors and an inverted index. +2. **Query**: Each user question is vectorised using TF-IDF, then cosine similarity finds the top-K most relevant chunks. +3. **Prompt**: Retrieved chunks are injected into the system prompt with source citations. +4. **Inference**: Foundry Local runs the model in-process — no external HTTP server needed. + +## Prerequisites + +- **Node.js 20+** +- **Foundry Local** installed — see [foundrylocal.ai](https://foundrylocal.ai) + +## Quick Start + +```bash +# Install dependencies +npm install + +# Ingest sample documents into the vector store +npm run ingest + +# Start the server +npm start +``` + +Open [http://localhost:3000](http://localhost:3000) in your browser. The UI shows real-time progress as the model loads. + +## Configuration + +Set these environment variables (all optional): + +| Variable | Default | Description | +|----------|---------|-------------| +| `FOUNDRY_MODEL` | `phi-3.5-mini` | Model alias to use | +| `PORT` | `3000` | Server port | +| `HOST` | `127.0.0.1` | Server bind address | + +## Adding Documents + +### Option 1: File System + +Place markdown files in the `docs/` folder with YAML front-matter, then re-run `npm run ingest`: + +```markdown +--- +title: Your Document Title +category: Safety +id: unique-doc-id +--- + +# Your Document Title + +Content goes here... +``` + +### Option 2: Web UI Upload + +Click the **📄** button in the chat interface to upload `.md` or `.txt` files at runtime. Documents are chunked and indexed immediately — no restart required. + +## Project Structure + +``` +local-rag/ +├── package.json +├── README.md +├── docs/ # Domain knowledge (markdown with front-matter) +│ ├── gas-leak-detection.md +│ ├── emergency-shutdown.md +│ ├── pressure-testing.md +│ ├── ppe-requirements.md +│ └── valve-inspection.md +├── public/ +│ └── index.html # Web UI with upload, chat, source citations +├── data/ # Created at ingest time +│ └── rag.db # SQLite vector store +└── src/ + ├── server.js # Express server with SSE status + chat + upload + ├── chatEngine.js # RAG engine: SDK init, retrieval, inference + ├── config.js # Configuration (model, chunking, paths) + ├── chunker.js # Document parsing, chunking, TF-IDF math + ├── vectorStore.js # SQLite-backed vector store with inverted index + ├── ingest.js # Batch document ingestion script + └── prompts.js # System prompts (full + compact/edge mode) +``` + +## Key Features + +- **Cache-aware** — skips model download if already in the Foundry cache +- **TF-IDF vector search** — no embedding model needed; lightweight and fast +- **SQLite storage** — single-file database, no external services +- **Runtime document upload** — add documents via the web UI without restarting +- **Source citations** — each response shows which chunks were used and their relevance scores +- **SSE progress** — real-time loading status streamed to the browser +- **Edge mode** — toggle compact prompts for smaller models or constrained devices +- **No internet required** — fully offline after initial model download + +## How It Differs From CAG + +| Feature | RAG (this sample) | CAG | +|---------|-------------------|-----| +| Document loading | Chunked + indexed | All loaded at startup | +| Vector search | TF-IDF + cosine similarity | Keyword scoring | +| Storage | SQLite database | In-memory | +| Knowledge base size | Any size | Small–medium | +| Runtime upload | Yes | No | +| Source citations | Chunk-level with scores | Document-level | +| Complexity | More complex | Simpler | + +## Learn More + +- [Foundry Local Documentation](https://foundrylocal.ai) +- [Foundry Local SDK (npm)](https://www.npmjs.com/package/foundry-local-sdk) +- [CAG sample](../local-cag/) — for simpler use-cases where all docs fit in one prompt diff --git a/samples/js/local-rag/docs/emergency-shutdown.md b/samples/js/local-rag/docs/emergency-shutdown.md new file mode 100644 index 00000000..f2794148 --- /dev/null +++ b/samples/js/local-rag/docs/emergency-shutdown.md @@ -0,0 +1,53 @@ +--- +title: Emergency Shutdown Procedures +category: Safety +id: emergency-shutdown +--- + +# Emergency Shutdown (ESD) Procedures + +## When to Activate ESD +Activate the Emergency Shutdown system when any of the following occur: +- Confirmed uncontrolled gas release +- Fire detected on site +- High-high pressure alarm on any vessel +- Loss of containment from process equipment +- Structural failure of a pressure vessel or pipeline +- Direction from the Site Emergency Coordinator + +## ESD Levels + +### Level 1 – Unit Shutdown +- Shuts down a single production unit. +- Isolates fuel gas and process feeds to the affected unit. +- Activates local fire suppression (deluge) if configured. + +### Level 2 – Plant Shutdown +- Shuts down all production units on the facility. +- Closes all incoming and outgoing pipeline isolation valves. +- Activates emergency ventilation and general alarm. + +### Level 3 – Total Facility Evacuation +- Triggers Level 2 actions plus facility-wide muster. +- Activates public address and perimeter alarms. +- Notifies external emergency services automatically. + +## Manual Activation +1. Break the glass cover on the nearest ESD push-button (located at muster points and key access routes). +2. Push and hold the button for 3 seconds until the indicator light turns red. +3. The system cannot be reset from the field — reset is performed from the control room only. + +## Post-ESD Actions +1. Confirm all personnel are accounted for at the muster point. +2. Perform gas surveys before re-entering the affected area. +3. Complete the ESD Event Report within 4 hours. +4. Do NOT restart equipment until the Site Emergency Coordinator authorises. + +## ESD Push-Button Locations +| Location | ID | Distance from Well Pad | +|----------|----|----------------------| +| Main Gate | ESD-001 | N/A | +| Separator Area | ESD-002 | 30 m | +| Compressor Building | ESD-003 | 45 m | +| Flare Stack | ESD-004 | 100 m | +| Control Room | ESD-005 | 60 m | diff --git a/samples/js/local-rag/docs/gas-leak-detection.md b/samples/js/local-rag/docs/gas-leak-detection.md new file mode 100644 index 00000000..9dc3cc44 --- /dev/null +++ b/samples/js/local-rag/docs/gas-leak-detection.md @@ -0,0 +1,46 @@ +--- +title: Gas Leak Detection Procedures +category: Safety +id: gas-leak-detection +--- + +# Gas Leak Detection Procedures + +## Safety Warning +**Always assume a gas leak is dangerous until proven otherwise.** Evacuate the area if concentration exceeds 20% LEL (Lower Explosive Limit). Do NOT operate electrical equipment in suspected leak zones. + +## Detection Methods + +### Portable Gas Detector +1. Calibrate detector before each shift using known calibration gas. +2. Turn on and allow 60-second warm-up period. +3. Hold sensor 2–5 cm from suspected leak point. +4. Move slowly along pipe runs, joints, valves, and fittings. +5. Record readings at each test point on the inspection form. + +### Soap Bubble Test +1. Apply leak-detection fluid to joints and connections. +2. Observe for 30 seconds — bubbles indicate a leak. +3. Mark leak location with approved marker tape. +4. Do NOT use soap solution near high-temperature surfaces. + +### Ultrasonic Leak Detection +- Suitable for pressurised systems above 50 psi. +- Point the sensor at suspected areas and listen for high-frequency noise. +- Effective range: up to 15 metres in quiet environments. + +## Response Procedure +1. **Evacuate** — Move upwind, minimum 50 metres from the leak source. +2. **Notify** — Contact the control room immediately: radio channel 5 or emergency phone. +3. **Isolate** — Close the nearest upstream and downstream isolation valves if safe to do so. +4. **Ventilate** — Open doors/vents in enclosed spaces. Use intrinsically safe fans only. +5. **Monitor** — Take continuous gas readings at the perimeter. Do not re-enter until readings are below 10% LEL for 15 consecutive minutes. + +## Common Leak Sources +| Component | Failure Mode | Check Interval | +|-----------|-------------|----------------| +| Flange gaskets | Deterioration, bolt relaxation | Monthly | +| Valve stems | Packing wear | Quarterly | +| Threaded connections | Vibration loosening | Monthly | +| Instrument tubing | Fatigue cracking | Bi-annually | +| Relief valve outlets | Seat erosion | Annually | diff --git a/samples/js/local-rag/docs/ppe-requirements.md b/samples/js/local-rag/docs/ppe-requirements.md new file mode 100644 index 00000000..e7d56994 --- /dev/null +++ b/samples/js/local-rag/docs/ppe-requirements.md @@ -0,0 +1,50 @@ +--- +title: Personal Protective Equipment Requirements +category: Safety +id: ppe-requirements +--- + +# Personal Protective Equipment (PPE) Requirements + +## Minimum PPE – All Site Personnel +Every person entering the operational area must wear: +- Hard hat (EN 397 or ANSI Z89.1) +- Safety glasses with side shields (EN 166 or ANSI Z87.1) +- Steel-toe safety boots (EN ISO 20345 S3 minimum) +- High-visibility vest or coveralls (EN ISO 20471 Class 2+) +- Flame-resistant (FR) coveralls in hydrocarbon processing areas + +## Task-Specific PPE + +### Gas Testing / Confined Space Entry +| Item | Standard | Notes | +|------|----------|-------| +| Personal gas monitor (4-gas) | EN 60079-29-1 | Bump-test daily | +| Full-face respirator | EN 136 | With gas-specific cartridge | +| Harness + retrieval line | EN 361 / EN 1496 | Mandatory for confined spaces | +| Intrinsically safe torch | IECEx / ATEX Zone 1 | No standard torches permitted | + +### Welding / Hot Work +| Item | Standard | Notes | +|------|----------|-------| +| Welding helmet (auto-darkening) | EN 379 shade 9–13 | Depending on process | +| Leather welding gloves | EN 12477 Type A | Replace when damaged | +| Leather apron or sleeve guards | EN ISO 11611 Class 2 | For overhead work | +| Fire blanket | EN 1869 | Within 3 metres of work area | + +### Chemical Handling +| Item | Standard | Notes | +|------|----------|-------| +| Chemical-resistant gloves | EN 374 | Check permeation data for specific chemical | +| Face shield | EN 166 grade B | In addition to safety glasses | +| Chemical splash suit | EN 14605 Type 4 | For pressurised spray risk | +| Emergency eye-wash station | EN 15154-2 | Within 10 seconds walking distance | + +## PPE Inspection +- Inspect all PPE **before each use**. +- Replace any cracked, torn, or degraded equipment immediately. +- Maintain a PPE inspection log — audited quarterly. +- FR clothing must be industrially laundered (not domestic washing machines). + +## Disciplinary Policy +Failure to wear mandatory PPE is a **zero-tolerance violation**. First offence: verbal warning and immediate correction. Second offence: written warning. Third offence: removal from site. diff --git a/samples/js/local-rag/docs/pressure-testing.md b/samples/js/local-rag/docs/pressure-testing.md new file mode 100644 index 00000000..712546e3 --- /dev/null +++ b/samples/js/local-rag/docs/pressure-testing.md @@ -0,0 +1,52 @@ +--- +title: Pressure Testing Procedures +category: Maintenance +id: pressure-testing +--- + +# Pressure Testing Procedures + +## Overview +All newly installed or repaired piping and vessels must undergo pressure testing before being placed into service. This procedure covers both hydrostatic and pneumatic methods. + +## Test Methods + +### Hydrostatic Test (Preferred) +- Test medium: clean, fresh water (inhibited if ambient temperature < 5°C). +- Test pressure: **1.5 × design pressure** for piping; **1.3 × MAWP** for vessels. +- Hold time: minimum **30 minutes** after temperature stabilisation. +- Acceptance: no visible leaks, no pressure drop greater than 2% over hold period. + +### Pneumatic Test (When Hydrostatic Not Feasible) +- Test medium: dry nitrogen or clean, dry air. +- Test pressure: **1.1 × design pressure**. +- Hold time: minimum **10 minutes**. +- **Hazard warning**: stored energy in compressed gas is significantly higher than liquid — maintain a 15-metre exclusion zone around the test area. +- Incremental pressurisation: raise in steps of 25%, holding 5 minutes at each step. + +## Pre-Test Checklist +- [ ] All welds inspected and NDE records reviewed. +- [ ] Test boundaries clearly identified and isolation valves closed. +- [ ] Pressure relief device set to 110% of test pressure installed on system. +- [ ] All instrument connections plugged or valved off. +- [ ] Exclusion zone barricaded with warning signs. +- [ ] Test gauge calibrated within the last 6 months. +- [ ] Permit to Work issued and signed. + +## Recording Results +| Parameter | Value | +|-----------|-------| +| Test date | ______ | +| Test pressure (psi/bar) | ______ | +| Hold time (minutes) | ______ | +| Ambient temp (°C) | ______ | +| Pressure at start | ______ | +| Pressure at end | ______ | +| Pass / Fail | ______ | +| Inspector name | ______ | + +## Failure Procedure +1. Depressurise the system slowly. +2. Identify the leak using approved detection methods. +3. Repair and re-inspect per applicable code (ASME B31.3 or PD 5500). +4. Repeat the pressure test from the beginning. diff --git a/samples/js/local-rag/docs/valve-inspection.md b/samples/js/local-rag/docs/valve-inspection.md new file mode 100644 index 00000000..7acec568 --- /dev/null +++ b/samples/js/local-rag/docs/valve-inspection.md @@ -0,0 +1,59 @@ +--- +title: Valve Inspection and Maintenance Guide +category: Maintenance +id: valve-inspection +--- + +# Valve Inspection and Maintenance Guide + +## Scope +This guide covers routine inspection and maintenance of isolation valves, control valves, and safety relief valves on gas field production equipment. + +## Inspection Schedule + +| Valve Type | Frequency | Method | +|-----------|-----------|--------| +| Ball valves (isolation) | Quarterly | Visual + operational stroke | +| Gate valves | Bi-annually | Visual + ultrasonic wall thickness | +| Globe valves (control) | Quarterly | Visual + calibration check | +| Safety Relief Valves (SRV) | Annually | Bench test (pop test) | +| Check valves | Bi-annually | Visual + internal inspection | + +## Ball Valve Inspection + +### Visual Checks +1. Check for external corrosion, especially at the stem and body joint. +2. Verify handle/actuator is intact and moves freely. +3. Inspect flange bolts for correct torque and corrosion. +4. Look for signs of leakage at the stem packing and body seals. + +### Operational Stroke Test +1. Record the current valve position. +2. Slowly cycle the valve from fully open → fully closed → fully open. +3. Observe for stiffness, irregular torque, or unusual noise. +4. If the valve fails to stroke smoothly, **tag it out of service** and schedule repair. + +## Safety Relief Valve Maintenance +- **Do NOT adjust** a safety relief valve in the field. +- All SRVs must be tested on a certified test bench. +- Record the set pressure, reseat pressure, and blowdown percentage. +- Replace any SRV that fails to lift within ±3% of the set pressure. +- After testing, apply a tamper-proof seal and attach a new test tag showing: date, set pressure, and next test due date. + +## Valve Packing Replacement +1. Isolate the valve (double block and bleed). +2. Confirm zero energy (pressure gauge reads zero, bleed valve open). +3. Remove the packing gland nuts and old packing rings. +4. Clean the stem and stuffing box with a lint-free cloth. +5. Install new packing rings — stagger the ring joints by 90°. +6. Tighten the gland evenly. Do NOT over-tighten. +7. Stroke the valve and check for leaks before returning to service. + +## Common Failure Modes +| Symptom | Likely Cause | Action | +|---------|-------------|--------| +| Valve won't close fully | Debris in seat, seat damage | Flush or replace seat | +| Stem leak | Worn packing | Replace packing | +| Excessive torque to operate | Corrosion, lack of lubrication | Lubricate or overhaul | +| Relief valve chattering | Set pressure too close to operating pressure | Review set-point vs operating pressure | +| Check valve backflow | Disc wear, spring failure | Replace internals | diff --git a/samples/js/local-rag/package.json b/samples/js/local-rag/package.json new file mode 100644 index 00000000..8c76bae9 --- /dev/null +++ b/samples/js/local-rag/package.json @@ -0,0 +1,31 @@ +{ + "name": "gas-field-local-rag", + "version": "2.0.0", + "description": "Offline RAG-powered support agent for gas field engineers using Foundry Local", + "type": "module", + "scripts": { + "ingest": "node src/ingest.js", + "start": "node src/server.js", + "dev": "node --watch src/server.js", + "test": "node --test test/*.test.js" + }, + "dependencies": { + "better-sqlite3": "^11.7.0", + "express": "^4.21.0", + "foundry-local-sdk": "^0.5.1" + }, + "license": "MIT", + "keywords": [ + "rag", + "offline-ai", + "foundry-local", + "phi-3", + "gas-field", + "support-agent", + "sqlite", + "tfidf" + ], + "engines": { + "node": ">=20.0.0" + } +} diff --git a/samples/js/local-rag/public/index.html b/samples/js/local-rag/public/index.html new file mode 100644 index 00000000..74da5004 --- /dev/null +++ b/samples/js/local-rag/public/index.html @@ -0,0 +1,698 @@ + + + + + + Gas Field Support Agent (RAG) + + + + +
+

+ 🔧 + Gas Field Support Agent (RAG) +

+
+ + Connecting... +
+
+ +
+ + + + + + + + +
+ +
+
+ Initializing...
+ Starting the AI model. This may take a moment on first run.

+ The chat will be available once the model is loaded. +
+
+ +
+ + + +
+ + + + + + + diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js new file mode 100644 index 00000000..2d878839 --- /dev/null +++ b/samples/js/local-rag/src/chatEngine.js @@ -0,0 +1,228 @@ +/** + * Foundry Local chat engine. + * Uses the Foundry Local SDK to discover, load, and run inference + * on a local model. Performs RAG retrieval and generates responses. + * Selects the hardware-optimised model variant automatically and + * reports download/load progress via a status callback. + */ +import { FoundryLocalManager } from "foundry-local-sdk"; +import { VectorStore } from "./vectorStore.js"; +import { config } from "./config.js"; +import { SYSTEM_PROMPT, SYSTEM_PROMPT_COMPACT } from "./prompts.js"; + +export class ChatEngine { + constructor() { + this.chatClient = null; + this.model = null; + this.store = null; + this.compactMode = false; + this.modelAlias = null; + /** @type {(status: {phase: string, message: string, progress?: number}) => void} */ + this._statusCallback = null; + } + + /** Register a callback that receives init status updates for the UI. */ + onStatus(callback) { + this._statusCallback = callback; + } + + _emitStatus(phase, message, progress) { + const status = { phase, message, ...(progress !== undefined && { progress }) }; + console.log(`[ChatEngine] ${message}`); + if (this._statusCallback) this._statusCallback(status); + } + + /** + * Initialize the engine: create Foundry Local manager, discover and load + * the best model variant for this hardware, and open the vector store. + */ + async init() { + this._emitStatus("init", "Initializing Foundry Local SDK..."); + + // Create the manager (requires appName) + const manager = FoundryLocalManager.create({ appName: "gas-field-local-rag" }); + const catalog = manager.catalog; + + this._emitStatus("catalog", "Discovering available models..."); + this.model = await catalog.getModel(config.model); + this.modelAlias = this.model.alias; + + // The SDK auto-selects the best variant for this hardware (GPU > NPU > CPU) + this._emitStatus("variant", `Selected model: ${this.modelAlias}`); + + // Download the model if not already cached, with progress reporting + if (!this.model.isCached) { + this._emitStatus("download", `Downloading ${this.modelAlias}... This may take a few minutes on first run.`, 0); + await this.model.download((progress) => { + const pct = Math.round(progress * 100); + this._emitStatus("download", `Downloading ${this.modelAlias}... ${pct}%`, progress); + }); + this._emitStatus("download", `Download complete.`, 1); + } else { + this._emitStatus("cached", `Model ${this.modelAlias} is already cached.`); + } + + // Load the model into memory + this._emitStatus("loading", `Loading ${this.modelAlias} into memory...`); + await this.model.load(); + + // Create the native chat client with performance settings pre-configured + this.chatClient = this.model.createChatClient(); + this.chatClient.settings.temperature = 0.1; // Low for deterministic, safety-critical responses + this._emitStatus("ready", `Model ready: ${this.modelAlias}`); + + // Open the local vector store + this.store = new VectorStore(config.dbPath); + const count = this.store.count(); + this._emitStatus("ready", `Vector store ready: ${count} chunks indexed.`); + + if (count === 0) { + console.warn("[ChatEngine] WARNING: No documents ingested. Run 'npm run ingest' first."); + } + } + + /** Expose the vector store for direct operations (e.g. upload ingestion). */ + getStore() { + return this.store; + } + + /** + * Set compact mode for extreme latency / edge devices. + */ + setCompactMode(enabled) { + this.compactMode = enabled; + console.log(`[ChatEngine] Compact mode: ${enabled ? "ON" : "OFF"}`); + } + + /** + * Retrieve relevant context from the local knowledge base. + */ + retrieve(query) { + const topK = this.compactMode ? Math.min(config.topK, 3) : config.topK; + return this.store.search(query, topK); + } + + /** + * Format retrieved chunks into a context block for the prompt. + */ + _buildContext(chunks) { + if (chunks.length === 0) { + return "No relevant documents found in local knowledge base."; + } + + return chunks + .map( + (c, i) => + `--- Document ${i + 1}: ${c.title} [${c.category}] ---\n${c.content}` + ) + .join("\n\n"); + } + + /** + * Generate a response for a user query (non-streaming). + */ + async query(userMessage, history = []) { + // 1. Retrieve relevant chunks + const chunks = this.retrieve(userMessage); + const context = this._buildContext(chunks); + + // 2. Build messages array + const systemPrompt = this.compactMode ? SYSTEM_PROMPT_COMPACT : SYSTEM_PROMPT; + const messages = [ + { role: "system", content: systemPrompt }, + { + role: "system", + content: `Retrieved context from local knowledge base:\n\n${context}`, + }, + ...history, + { role: "user", content: userMessage }, + ]; + + // 3. Call the local model via the native chat client + this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; + const response = await this.chatClient.completeChat(messages); + + return { + text: response.choices[0].message.content, + sources: chunks.map((c) => ({ + title: c.title, + category: c.category, + docId: c.doc_id, + score: Math.round(c.score * 100) / 100, + })), + }; + } + + /** + * Generate a streaming response for a user query. + * Returns an async iterable of text chunks. + */ + async *queryStream(userMessage, history = []) { + // 1. Retrieve relevant chunks + const chunks = this.retrieve(userMessage); + const context = this._buildContext(chunks); + + // 2. Build messages array + const systemPrompt = this.compactMode ? SYSTEM_PROMPT_COMPACT : SYSTEM_PROMPT; + const messages = [ + { role: "system", content: systemPrompt }, + { + role: "system", + content: `Retrieved context from local knowledge base:\n\n${context}`, + }, + ...history, + { role: "user", content: userMessage }, + ]; + + // 3. Stream from the local model via the SDK's callback-based streaming + this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; + + // Buffer chunks from the callback and yield them as an async iterable + const textChunks = []; + let resolve; + let done = false; + + const streamPromise = this.chatClient.completeStreamingChat(messages, (chunk) => { + textChunks.push(chunk); + if (resolve) { resolve(); resolve = null; } + }).then(() => { + done = true; + if (resolve) { resolve(); resolve = null; } + }); + + // Yield sources metadata first + yield { + type: "sources", + data: chunks.map((c) => ({ + title: c.title, + category: c.category, + docId: c.doc_id, + score: Math.round(c.score * 100) / 100, + })), + }; + + // Yield text chunks from the SDK streaming callback buffer + while (!done || textChunks.length > 0) { + if (textChunks.length === 0 && !done) { + await new Promise((r) => { resolve = r; }); + } + while (textChunks.length > 0) { + const chunk = textChunks.shift(); + const content = chunk.choices?.[0]?.delta?.content; + if (content) { + yield { type: "text", data: content }; + } + } + } + + // Ensure the stream promise resolves cleanly + await streamPromise; + } + + close() { + if (this.model) { + this.model.unload().catch(() => {}); + } + if (this.store) this.store.close(); + } +} diff --git a/samples/js/local-rag/src/chunker.js b/samples/js/local-rag/src/chunker.js new file mode 100644 index 00000000..3e46be36 --- /dev/null +++ b/samples/js/local-rag/src/chunker.js @@ -0,0 +1,74 @@ +/** + * Document chunking utility. + * Splits markdown documents into overlapping chunks suitable for RAG retrieval. + */ + +/** + * Parse front-matter (YAML-like) from a markdown document. + */ +export function parseFrontMatter(text) { + const match = text.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/); + if (!match) return { meta: {}, body: text }; + + const meta = {}; + for (const line of match[1].split("\n")) { + const idx = line.indexOf(":"); + if (idx > 0) { + meta[line.slice(0, idx).trim()] = line.slice(idx + 1).trim(); + } + } + return { meta, body: match[2] }; +} + +/** + * Split text into chunks of approximately `maxTokens` tokens + * with `overlapTokens` overlap between consecutive chunks. + * Uses whitespace-based token approximation (good enough for local RAG). + */ +export function chunkText(text, maxTokens = 400, overlapTokens = 50) { + const words = text.split(/\s+/).filter(Boolean); + if (words.length <= maxTokens) return [text]; + + const chunks = []; + let start = 0; + while (start < words.length) { + const end = Math.min(start + maxTokens, words.length); + chunks.push(words.slice(start, end).join(" ")); + if (end >= words.length) break; + start = end - overlapTokens; + } + return chunks; +} + +/** + * Build simple term-frequency vector for a chunk of text. + * Returns a Map. + */ +export function termFrequency(text) { + const tf = new Map(); + const tokens = text + .toLowerCase() + .replace(/[^a-z0-9\-']/g, " ") + .split(/\s+/) + .filter((t) => t.length > 1); + for (const t of tokens) { + tf.set(t, (tf.get(t) || 0) + 1); + } + return tf; +} + +/** + * Compute cosine similarity between two term-frequency maps. + */ +export function cosineSimilarity(a, b) { + let dot = 0; + let normA = 0; + let normB = 0; + for (const [term, freq] of a) { + normA += freq * freq; + if (b.has(term)) dot += freq * b.get(term); + } + for (const [, freq] of b) normB += freq * freq; + if (normA === 0 || normB === 0) return 0; + return dot / (Math.sqrt(normA) * Math.sqrt(normB)); +} diff --git a/samples/js/local-rag/src/config.js b/samples/js/local-rag/src/config.js new file mode 100644 index 00000000..12b2937e --- /dev/null +++ b/samples/js/local-rag/src/config.js @@ -0,0 +1,25 @@ +// Application configuration – all paths relative to project root +import { fileURLToPath } from "url"; +import path from "path"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.resolve(__dirname, ".."); + +export const config = { + // Model + model: "phi-3.5-mini", + + // RAG + docsDir: path.join(ROOT, "docs"), + dbPath: path.join(ROOT, "data", "rag.db"), + chunkSize: 200, // tokens (approx) – kept small for NPU compatibility + chunkOverlap: 25, // tokens overlap between chunks + topK: 3, // number of chunks to retrieve – limited for NPU context window + + // Server + port: 3000, + host: "127.0.0.1", + + // UI + publicDir: path.join(ROOT, "public"), +}; diff --git a/samples/js/local-rag/src/ingest.js b/samples/js/local-rag/src/ingest.js new file mode 100644 index 00000000..b607903b --- /dev/null +++ b/samples/js/local-rag/src/ingest.js @@ -0,0 +1,65 @@ +/** + * Ingestion script. + * Reads all markdown documents from the docs/ folder, + * chunks them, and stores in the local SQLite vector store. + * + * Usage: node src/ingest.js + */ +import fs from "fs"; +import path from "path"; +import { config } from "./config.js"; +import { parseFrontMatter, chunkText } from "./chunker.js"; +import { VectorStore } from "./vectorStore.js"; + +async function ingest() { + console.log("=== Gas Field RAG – Document Ingestion ===\n"); + + const docsDir = config.docsDir; + if (!fs.existsSync(docsDir)) { + console.error(`Docs directory not found: ${docsDir}`); + process.exit(1); + } + + const files = fs + .readdirSync(docsDir) + .filter((f) => f.endsWith(".md")) + .sort(); + + if (files.length === 0) { + console.error("No markdown files found in docs/"); + process.exit(1); + } + + console.log(`Found ${files.length} documents.\n`); + + const store = new VectorStore(config.dbPath); + store.clear(); // Fresh ingestion each time + + let totalChunks = 0; + + for (const file of files) { + const raw = fs.readFileSync(path.join(docsDir, file), "utf-8"); + const { meta, body } = parseFrontMatter(raw); + const docId = meta.id || path.basename(file, ".md"); + const title = meta.title || file; + const category = meta.category || "Uncategorised"; + + const chunks = chunkText(body, config.chunkSize, config.chunkOverlap); + + for (let i = 0; i < chunks.length; i++) { + store.insert(docId, title, category, i, chunks[i]); + } + + console.log(` ✓ ${file} → ${chunks.length} chunk(s) [${category}]`); + totalChunks += chunks.length; + } + + console.log(`\nIngestion complete: ${totalChunks} chunks from ${files.length} documents.`); + console.log(`Database: ${config.dbPath}`); + store.close(); +} + +ingest().catch((err) => { + console.error("Ingestion failed:", err); + process.exit(1); +}); diff --git a/samples/js/local-rag/src/prompts.js b/samples/js/local-rag/src/prompts.js new file mode 100644 index 00000000..dceff398 --- /dev/null +++ b/samples/js/local-rag/src/prompts.js @@ -0,0 +1,44 @@ +// Gas Field Agent – System Prompt (optimised for edge/low-latency) +export const SYSTEM_PROMPT = `You are a local, offline customer services and technical support agent for gas field inspection and maintenance engineers. + +Context: +- You run entirely on-device with no internet connectivity. +- You are embedded in a field application used during live gas infrastructure inspections and repairs. +- Your responses must be accurate, concise, safety-first, and aligned with gas engineering standards and field maintenance procedures. +- You use Retrieval-Augmented Generation (RAG) from a local document database containing approved gas engineering manuals, inspection procedures, fault codes, safety guidance, and maintenance playbooks. + +Primary Objectives: +1. Assist engineers in diagnosing issues encountered during gas field inspections. +2. Provide step-by-step repair and maintenance guidance. +3. Surface relevant safety warnings before any action. +4. Reference applicable standards, procedures, and documentation from the local knowledge base. +5. Operate reliably in offline, constrained environments. + +Behaviour Rules: +- Always prioritise safety. If a procedure involves risk, explicitly call it out. +- Do not hallucinate procedures, measurements, tolerances, or legal requirements. +- If the answer is not present in the local RAG data, say: + "This information is not available in the local knowledge base." +- Use clear, structured responses suitable for field engineers wearing PPE. +- Prefer bullet points and numbered steps. +- Assume noisy, time-critical environments. +- Keep answers SHORT – engineers are in the field. + +Response Format: +- **Summary** (1–2 lines) +- **Safety Warnings** (if applicable) +- **Step-by-step Guidance** +- **Reference** (document name + section) + +You must only use information retrieved from the local RAG database.`; + +// Compact prompt variant for extreme latency / edge devices +export const SYSTEM_PROMPT_COMPACT = `You are an offline gas field support agent. Safety-first. Concise answers only. + +Rules: +- Prioritise safety warnings before any action. +- Use bullet points and numbered steps. +- If info is missing from RAG data, say: "Not in local knowledge base." +- Never invent procedures, tolerances, or legal requirements. + +Format: Summary → Safety → Steps → Reference.`; diff --git a/samples/js/local-rag/src/server.js b/samples/js/local-rag/src/server.js new file mode 100644 index 00000000..8f322be6 --- /dev/null +++ b/samples/js/local-rag/src/server.js @@ -0,0 +1,230 @@ +/** + * Express server – Gas Field RAG Application. + * Serves the web UI and provides the /api/chat endpoint. + * Fully offline, connects to Foundry Local on dynamic port. + */ +import express from "express"; +import path from "path"; +import fs from "fs"; +import { config } from "./config.js"; +import { ChatEngine } from "./chatEngine.js"; +import { parseFrontMatter, chunkText } from "./chunker.js"; + +const app = express(); + +// ── Security headers ── +app.use((_req, res, next) => { + res.setHeader("X-Content-Type-Options", "nosniff"); + res.setHeader("X-Frame-Options", "DENY"); + res.setHeader("Referrer-Policy", "no-referrer"); + res.setHeader("Permissions-Policy", "camera=(), microphone=(), geolocation=()"); + res.setHeader( + "Content-Security-Policy", + "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:;" + ); + next(); +}); + +app.use(express.json({ limit: "1mb" })); +app.use(express.text({ type: "text/markdown", limit: "2mb" })); +app.use(express.static(config.publicDir)); + +// ── Chat engine instance ── +const engine = new ChatEngine(); + +// ── API: Chat (non-streaming) ── +app.post("/api/chat", async (req, res) => { + try { + const { message, history, compact } = req.body; + if (!message || typeof message !== "string") { + return res.status(400).json({ error: "message is required" }); + } + + if (compact !== undefined) engine.setCompactMode(!!compact); + + const result = await engine.query( + message, + Array.isArray(history) ? history : [] + ); + res.json(result); + } catch (err) { + console.error("[API] Error:", err.message); + res.status(500).json({ error: "Internal server error" }); + } +}); + +// ── API: Chat (streaming via SSE) ── +app.post("/api/chat/stream", async (req, res) => { + try { + const { message, history, compact } = req.body; + if (!message || typeof message !== "string") { + return res.status(400).json({ error: "message is required" }); + } + + if (compact !== undefined) engine.setCompactMode(!!compact); + + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + + const stream = engine.queryStream( + message, + Array.isArray(history) ? history : [] + ); + + for await (const chunk of stream) { + res.write(`data: ${JSON.stringify(chunk)}\n\n`); + } + + res.write("data: [DONE]\n\n"); + res.end(); + } catch (err) { + console.error("[API] Stream error:", err.message); + res.write(`data: ${JSON.stringify({ type: "error", data: "Internal server error" })}\n\n`); + res.end(); + } +}); + +// ── API: Upload document ── +app.post("/api/upload", express.raw({ type: "*/*", limit: "2mb" }), async (req, res) => { + try { + const filename = req.headers["x-filename"]; + if (!filename || typeof filename !== "string") { + return res.status(400).json({ error: "x-filename header is required" }); + } + + // Sanitise filename: allow only alphanumeric, hyphens, underscores, dots + const safeName = path.basename(filename).replace(/[^a-zA-Z0-9._-]/g, "_"); + if (!safeName.endsWith(".md") && !safeName.endsWith(".txt")) { + return res.status(400).json({ error: "Only .md and .txt files are accepted" }); + } + + const content = req.body.toString("utf-8"); + if (!content || content.length < 10) { + return res.status(400).json({ error: "Document content is too short" }); + } + + // Save file to docs/ (verify path stays inside docsDir) + const filePath = path.resolve(config.docsDir, safeName); + if (!filePath.startsWith(path.resolve(config.docsDir))) { + return res.status(400).json({ error: "Invalid filename" }); + } + if (!fs.existsSync(config.docsDir)) { + fs.mkdirSync(config.docsDir, { recursive: true }); + } + fs.writeFileSync(filePath, content, "utf-8"); + + // Chunk and ingest into vector store + const { meta, body } = parseFrontMatter(content); + const docId = meta.id || path.basename(safeName, path.extname(safeName)); + const title = meta.title || safeName; + const category = meta.category || "Uploaded"; + + // Remove previous version if re-uploading + const store = engine.getStore(); + store.removeByDocId(docId); + + const chunks = chunkText(body, config.chunkSize, config.chunkOverlap); + for (let i = 0; i < chunks.length; i++) { + store.insert(docId, title, category, i, chunks[i]); + } + + console.log(`[Upload] ${safeName} → ${chunks.length} chunk(s) ingested`); + + res.json({ + success: true, + filename: safeName, + docId, + title, + category, + chunks: chunks.length, + totalChunks: store.count(), + }); + } catch (err) { + console.error("[Upload] Error:", err.message); + res.status(500).json({ error: "Upload failed" }); + } +}); + +// ── API: List documents ── +app.get("/api/docs", (_req, res) => { + try { + const docs = engine.getStore().listDocs(); + res.json({ docs }); + } catch (err) { + console.error("[API] Docs list error:", err.message); + res.status(500).json({ error: "Failed to list documents" }); + } +}); + +// ── API: Health check ── +let engineReady = false; +let lastStatus = { phase: "init", message: "Starting..." }; + +app.get("/api/health", (_req, res) => { + res.json({ status: engineReady ? "ok" : "loading", model: config.model, ...lastStatus }); +}); + +// ── API: Init status stream (SSE) — shows download/load progress to the UI ── +const statusClients = new Set(); + +app.get("/api/status", (_req, res) => { + res.setHeader("Content-Type", "text/event-stream"); + res.setHeader("Cache-Control", "no-cache"); + res.setHeader("Connection", "keep-alive"); + + // Send current status immediately + res.write(`data: ${JSON.stringify(lastStatus)}\n\n`); + + if (engineReady) { + res.write(`data: ${JSON.stringify({ phase: "ready", message: "Ready" })}\n\n`); + res.end(); + return; + } + + statusClients.add(res); + _req.on("close", () => statusClients.delete(res)); +}); + +function broadcastStatus(status) { + lastStatus = status; + for (const client of statusClients) { + client.write(`data: ${JSON.stringify(status)}\n\n`); + if (status.phase === "ready") { + client.end(); + } + } + if (status.phase === "ready") statusClients.clear(); +} + +// ── Fallback: serve index.html for SPA ── +app.get("*", (_req, res) => { + res.sendFile(path.join(config.publicDir, "index.html")); +}); + +// ── Start server ── +async function start() { + console.log("=== Gas Field RAG – Local Support Agent ===\n"); + + // Register status callback to relay progress to connected UI clients + engine.onStatus((status) => broadcastStatus(status)); + + // Start the HTTP server first so the UI is immediately accessible + app.listen(config.port, config.host, () => { + console.log(`[Server] UI available at http://${config.host}:${config.port}`); + console.log("[Server] Initializing model in background...\n"); + }); + + // Initialize the engine (downloads model if needed, loads it) + await engine.init(); + engineReady = true; + broadcastStatus({ phase: "ready", message: "Ready" }); + + console.log("[Server] Fully offline – no outbound connections.\n"); +} + +start().catch((err) => { + console.error("Failed to start:", err); + broadcastStatus({ phase: "error", message: err.message || "Failed to start" }); + process.exit(1); +}); diff --git a/samples/js/local-rag/src/vectorStore.js b/samples/js/local-rag/src/vectorStore.js new file mode 100644 index 00000000..4afc27e7 --- /dev/null +++ b/samples/js/local-rag/src/vectorStore.js @@ -0,0 +1,145 @@ +/** + * Local vector store backed by SQLite. + * Stores document chunks and their term-frequency vectors for offline RAG retrieval. + * + * Performance optimisations: + * - Inverted index: maps terms -> chunk IDs for fast candidate filtering + * - Row cache: parsed TF maps kept in memory to avoid JSON.parse on every query + * - Prepared statements: reused across calls + */ +import Database from "better-sqlite3"; +import path from "path"; +import fs from "fs"; +import { termFrequency, cosineSimilarity } from "./chunker.js"; + +export class VectorStore { + constructor(dbPath) { + // Ensure data directory exists + const dir = path.dirname(dbPath); + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); + + this.db = new Database(dbPath); + this.db.pragma("journal_mode = WAL"); + this._init(); + + // In-memory caches for fast retrieval + this._rowCache = null; // Array of { id, doc_id, title, category, content, tf } + this._invertedIndex = null; // Map> + } + + _init() { + this.db.exec(` + CREATE TABLE IF NOT EXISTS chunks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + doc_id TEXT NOT NULL, + title TEXT, + category TEXT, + chunk_index INTEGER NOT NULL, + content TEXT NOT NULL, + tf_json TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS idx_doc_id ON chunks(doc_id); + `); + + // Prepare reusable statements + this._stmtInsert = this.db.prepare( + "INSERT INTO chunks (doc_id, title, category, chunk_index, content, tf_json) VALUES (?, ?, ?, ?, ?, ?)" + ); + this._stmtAll = this.db.prepare("SELECT * FROM chunks"); + this._stmtCount = this.db.prepare("SELECT COUNT(*) as cnt FROM chunks"); + this._stmtListDocs = this.db.prepare( + "SELECT doc_id, title, category, COUNT(*) as chunks FROM chunks GROUP BY doc_id ORDER BY title" + ); + this._stmtDeleteDoc = this.db.prepare("DELETE FROM chunks WHERE doc_id = ?"); + } + + /** Invalidate in-memory caches (called after any mutation). */ + _invalidateCache() { + this._rowCache = null; + this._invertedIndex = null; + } + + /** Build or return the in-memory row cache and inverted index. */ + _ensureCache() { + if (this._rowCache) return; + + const rows = this._stmtAll.all(); + this._rowCache = rows.map((row) => { + const tf = new Map(JSON.parse(row.tf_json)); + return { id: row.id, doc_id: row.doc_id, title: row.title, category: row.category, content: row.content, tf }; + }); + + // Build inverted index: term -> set of row indices + this._invertedIndex = new Map(); + for (let i = 0; i < this._rowCache.length; i++) { + for (const term of this._rowCache[i].tf.keys()) { + if (!this._invertedIndex.has(term)) { + this._invertedIndex.set(term, new Set()); + } + this._invertedIndex.get(term).add(i); + } + } + } + + /** Remove all existing chunks (for fresh re-ingestion). */ + clear() { + this.db.exec("DELETE FROM chunks"); + this._invalidateCache(); + } + + /** Insert a single chunk. */ + insert(docId, title, category, chunkIndex, content) { + const tf = termFrequency(content); + const tfJson = JSON.stringify([...tf]); + this._stmtInsert.run(docId, title, category, chunkIndex, content, tfJson); + this._invalidateCache(); + } + + /** Retrieve top-K most relevant chunks for a query. */ + search(query, topK = 5) { + const queryTf = termFrequency(query); + this._ensureCache(); + + // Use inverted index to find candidate chunks that share at least one term + const candidateIndices = new Set(); + for (const term of queryTf.keys()) { + const indices = this._invertedIndex.get(term); + if (indices) { + for (const idx of indices) candidateIndices.add(idx); + } + } + + // Score only candidates instead of all rows + const scored = []; + for (const idx of candidateIndices) { + const row = this._rowCache[idx]; + const score = cosineSimilarity(queryTf, row.tf); + if (score > 0) { + scored.push({ ...row, score, tf_json: undefined }); + } + } + + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, topK); + } + + /** Remove all chunks for a specific document. */ + removeByDocId(docId) { + this._stmtDeleteDoc.run(docId); + this._invalidateCache(); + } + + /** Get total chunk count. */ + count() { + return this._stmtCount.get().cnt; + } + + /** List distinct documents in the store. */ + listDocs() { + return this._stmtListDocs.all(); + } + + close() { + this.db.close(); + } +} diff --git a/samples/js/native-chat-completions/app.js b/samples/js/native-chat-completions/app.js index af566ef7..7efac872 100644 --- a/samples/js/native-chat-completions/app.js +++ b/samples/js/native-chat-completions/app.js @@ -1,5 +1,15 @@ import { FoundryLocalManager } from 'foundry-local-sdk'; +/** Render a CLI progress bar for model download. */ +function renderProgressBar(label, progress) { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const empty = barWidth - filled; + const bar = '█'.repeat(filled) + '░'.repeat(empty); + process.stdout.write(`\r${label}: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); +} + // Initialize the Foundry Local SDK console.log('Initializing Foundry Local SDK...'); @@ -10,20 +20,24 @@ const manager = FoundryLocalManager.create({ console.log('✓ SDK initialized successfully'); // Get the model object -const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above +const modelAlias = 'qwen2.5-0.5b'; const model = await manager.catalog.getModel(modelAlias); -// Download the model -console.log(`\nDownloading model ${modelAlias}...`); -await model.download((progress) => { - process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`); -}); -console.log('\n✓ Model downloaded'); +// Check cache before downloading — skip download if model is already cached +if (!model.isCached) { + console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`); + await model.download((progress) => { + renderProgressBar('Downloading', progress); + }); + console.log('✓ Model downloaded'); +} else { + console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`); +} -// Load the model -console.log(`\nLoading model ${modelAlias}...`); +// Load the model into memory +console.log(`Loading model ${modelAlias}...`); await model.load(); -console.log('✓ Model loaded'); +console.log('✓ Model loaded and ready'); // Create chat client console.log('\nCreating chat client...'); diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js index f11eacdd..57fc5d01 100644 --- a/samples/js/tool-calling-foundry-local/src/app.js +++ b/samples/js/tool-calling-foundry-local/src/app.js @@ -33,7 +33,18 @@ async function runToolCallingExample() { } console.log(`Loading model ${model.id}...`); - await model.download(); + if (!model.isCached) { + console.log('Model not in cache. Downloading...'); + await model.download((progress) => { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled); + process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); + }); + } else { + console.log('✓ Model already cached — skipping download'); + } await model.load(); console.log('✓ Model loaded'); diff --git a/samples/js/web-server-example/app.js b/samples/js/web-server-example/app.js index 5e97edfc..0443fdc1 100644 --- a/samples/js/web-server-example/app.js +++ b/samples/js/web-server-example/app.js @@ -14,20 +14,28 @@ const manager = FoundryLocalManager.create({ console.log('✓ SDK initialized successfully'); // Get the model object -const modelAlias = 'qwen2.5-0.5b'; // Using an available model from the list above +const modelAlias = 'qwen2.5-0.5b'; const model = await manager.catalog.getModel(modelAlias); -// Download the model -console.log(`\nDownloading model ${modelAlias}...`); -await model.download((progress) => { - process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`); -}); -console.log('\n✓ Model downloaded'); +// Check cache before downloading — skip download if model is already cached +if (!model.isCached) { + console.log(`\nModel "${modelAlias}" not found in cache. Downloading...`); + await model.download((progress) => { + const barWidth = 30; + const filled = Math.round((progress / 100) * barWidth); + const bar = '█'.repeat(filled) + '░'.repeat(barWidth - filled); + process.stdout.write(`\rDownloading: [${bar}] ${progress.toFixed(1)}%`); + if (progress >= 100) process.stdout.write('\n'); + }); + console.log('✓ Model downloaded'); +} else { + console.log(`\n✓ Model "${modelAlias}" already cached — skipping download`); +} -// Load the model -console.log(`\nLoading model ${modelAlias}...`); +// Load the model into memory +console.log(`Loading model ${modelAlias}...`); await model.load(); -console.log('✓ Model loaded'); +console.log('✓ Model loaded and ready'); // Start the web service console.log('\nStarting web service...'); diff --git a/samples/python/agent-framework/.env.example b/samples/python/agent-framework/.env.example new file mode 100644 index 00000000..2f0bc87c --- /dev/null +++ b/samples/python/agent-framework/.env.example @@ -0,0 +1,8 @@ +# ── Model ────────────────────────────────────────────────── +MODEL_ALIAS=qwen2.5-7b + +# ── Documents folder ────────────────────────────────────── +DOCS_PATH=./data + +# ── Logging ─────────────────────────────────────────────── +LOG_LEVEL=INFO diff --git a/samples/python/agent-framework/README.md b/samples/python/agent-framework/README.md new file mode 100644 index 00000000..23368e94 --- /dev/null +++ b/samples/python/agent-framework/README.md @@ -0,0 +1,132 @@ +# Agent Framework + Foundry Local + +A multi-agent orchestration sample powered by [Microsoft Agent Framework](https://pypi.org/project/agent-framework-core/) and [Foundry Local](https://foundrylocal.ai). All inference runs **on-device** through Foundry Local's OpenAI-compatible endpoint — no cloud API keys required. + +## What It Does + +Five specialised agents collaborate in configurable pipelines to research a user question: + +| Agent | Role | +|-------------|-------------------------------------------------| +| **Planner** | Breaks the question into 2-4 sub-tasks | +| **Retriever** | Searches local documents for relevant excerpts | +| **Critic** | Reviews output for gaps and contradictions | +| **Writer** | Synthesises a final report with citations | +| **ToolAgent** | Runs deterministic tools (word count, keywords)| + +### Orchestration Patterns + +- **Sequential** — Planner → Retriever → Critic ⇄ Retriever → Writer +- **Concurrent** — Retriever ‖ ToolAgent (fan-out with `asyncio.gather`) +- **Full (hybrid)** — Sequential planning, concurrent retrieval, feedback loop, then synthesis + +### Interactive Demos + +The web UI also ships five standalone demos: Weather Tools, Math Agent, Sentiment Analyser, Code Reviewer, and Multi-Agent Debate. + +## Prerequisites + +- **Python 3.10+** +- **Foundry Local** installed and available on PATH — see [foundrylocal.ai](https://foundrylocal.ai) + +## Quick Start + +```bash +# Clone the repo and navigate to this sample +cd samples/python/agent-framework + +# Create a virtual environment (recommended) +python -m venv .venv +source .venv/bin/activate # Linux/macOS +.venv\Scripts\activate # Windows + +# Install dependencies +pip install -e ".[dev]" + +# (Optional) copy and edit the environment config +cp .env.example .env + +# Run the web UI (starts Flask on http://localhost:5000) +python -m src.app --web + +# Or run a question directly from the CLI +python -m src.app "What orchestration patterns exist for multi-agent systems?" + +# Choose orchestration mode (sequential or full) +python -m src.app --mode sequential "Explain Foundry Local architecture" +``` + +The web UI starts at **http://localhost:5000**. On first run, Foundry Local will download the model if it is not already cached. + +## Project Structure + +``` +agent-framework/ +├── data/ # Sample documents loaded by the Retriever agent +│ ├── agent_framework_guide.md +│ ├── foundry_local_overview.md +│ └── orchestration_patterns.md +├── src/app/ +│ ├── __init__.py +│ ├── __main__.py # CLI entrypoint (web / cli / tools) +│ ├── foundry_boot.py # Bootstrap Foundry Local, get connection info +│ ├── agents.py # Agent factories and tool functions +│ ├── documents.py # Document loader with chunking +│ ├── orchestrator.py # Sequential, concurrent, and hybrid pipelines +│ ├── tool_demo.py # Standalone tool-calling demo +│ ├── web.py # Flask server with SSE streaming +│ ├── templates/ +│ │ └── index.html # Web UI with real-time pipeline visualisation +│ └── demos/ # Interactive demo modules +│ ├── __init__.py +│ ├── registry.py # Demo registry +│ ├── weather_tools.py +│ ├── math_agent.py +│ ├── sentiment_analyzer.py +│ ├── code_reviewer.py +│ └── multi_agent_debate.py +├── tests/ +│ └── test_smoke.py # Smoke tests (imports, doc loader, etc.) +├── pyproject.toml # Project metadata & dependencies +├── requirements.txt # Pip-installable dependencies +├── .env.example # Environment variable template +└── README.md # This file +``` + +## Configuration + +| Variable | Default | Description | +|--------------------|----------------|---------------------------------------------| +| `MODEL_ALIAS` | `qwen2.5-0.5b` | Foundry Local model alias | +| `DOCS_PATH` | `./data` | Path to documents folder | +| `LOG_LEVEL` | `INFO` | Python logging level | +| `FOUNDRY_ENDPOINT` | *(auto)* | Override to skip local bootstrap | +| `FOUNDRY_API_KEY` | `none` | API key when using an external endpoint | + +## How It Works + +1. **Bootstrap** — `FoundryLocalBootstrapper` starts the Foundry Local service, resolves the model alias, and downloads the model if not cached. +2. **Document loading** — Markdown and text files from `data/` are chunked and passed as context to the Retriever agent. +3. **Orchestration** — agents are wired together per the selected pattern. Each step emits structured JSON events. +4. **Streaming** — the Flask server streams events via SSE so the web UI can render pipeline progress in real time. + +## Tests + +```bash +pytest tests/ -v +``` + +The smoke tests verify imports, document loading, the bootstrapper's environment override path, and the demo registry. + +## Troubleshooting + +| Symptom | Fix | +|---------|-----| +| `ModuleNotFoundError: agent_framework` | `pip install agent-framework-core==1.0.0b260130` | +| Model download hangs | Check network and ensure Foundry Local is on PATH | +| `Connection refused` on port 5273 | Foundry Local service failed to start — run `foundry-local` manually to see errors | +| Flask port 5000 in use | Set `FLASK_PORT` env var or kill the conflicting process | + +## License + +This sample is provided under the [MIT License](../../../LICENSE). diff --git a/samples/python/agent-framework/data/agent_framework_guide.md b/samples/python/agent-framework/data/agent_framework_guide.md new file mode 100644 index 00000000..5564b9a1 --- /dev/null +++ b/samples/python/agent-framework/data/agent_framework_guide.md @@ -0,0 +1,54 @@ +# Microsoft Agent Framework Guide + +The Microsoft Agent Framework provides building blocks for creating multi-agent +applications in Python. Agents are autonomous units that receive instructions, +process messages, and optionally invoke tools. + +## Core Concepts + +### ChatAgent +A `ChatAgent` wraps a language model client with a persona (system instructions), +an optional set of tools, and a conversation history. You create agents via: + +```python +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient + +client = OpenAIChatClient(api_key="...", base_url="...", model_id="phi-4-mini") +agent = ChatAgent(chat_client=client, name="Planner", instructions="...") +response = await agent.run("What is the capital of France?") +``` + +### Tools +Tools are plain Python functions annotated with `Annotated[, Field(...)]` +parameters. The framework automatically generates JSON Schema for tool calling: + +```python +from typing import Annotated +from pydantic import Field + +def calculate_sum( + a: Annotated[int, Field(description="First number")], + b: Annotated[int, Field(description="Second number")], +) -> str: + return f"The sum is {a + b}" +``` + +Register tools when creating an agent: `ChatAgent(..., tools=[calculate_sum])`. + +### Orchestration Patterns + +| Pattern | Description | +|-------------|------------------------------------------------------------------| +| Sequential | Agents run one after another; each receives the previous output. | +| Concurrent | Multiple agents run in parallel (fan-out) on the same input. | +| Feedback | A Critic agent reviews output and can request re-processing. | +| Hybrid | Combines sequential, concurrent, and feedback patterns. | + +## Best Practices + +1. **Keep instructions focused** — each agent should have a single responsibility. +2. **Limit context length** — chunk large documents before passing to agents. +3. **Use tool calling** — delegate structured tasks to deterministic code. +4. **Set loop limits** — always cap iterative feedback loops to prevent runaway costs. +5. **Stream results** — use Server-Sent Events (SSE) for real-time UI updates. diff --git a/samples/python/agent-framework/data/foundry_local_overview.md b/samples/python/agent-framework/data/foundry_local_overview.md new file mode 100644 index 00000000..f6b55488 --- /dev/null +++ b/samples/python/agent-framework/data/foundry_local_overview.md @@ -0,0 +1,42 @@ +# Foundry Local Overview + +Foundry Local is a lightweight runtime that lets developers run AI models directly on their +local machine — no cloud connection required. It is part of the Microsoft AI Foundry family +and designed for offline-first development, edge scenarios, and privacy-sensitive workloads. + +## Key Features + +- **Local-first inference** — models run on your device using CPU or GPU acceleration. +- **Model catalog** — browse and download curated models (Phi, Mistral, Qwen, etc.) via the SDK. +- **Cache management** — models are cached locally after the first download. The SDK exposes + helpers to check cache status before downloading, giving apps instant startup on repeat runs. +- **OpenAI-compatible endpoint** — Foundry Local exposes a REST API compatible with the + OpenAI Chat Completions spec, so existing OpenAI SDK code works with minimal changes. +- **Multi-language SDKs** — official SDKs for Python, JavaScript/TypeScript, C#, and Rust. + +## Architecture + +``` +┌──────────────┐ ┌──────────────────────┐ +│ Your App │──SDK──▶│ Foundry Local │ +│ (Python, │ │ Service (REST API) │ +│ JS, C#, │ │ │ +│ Rust) │ │ ┌────────────────┐ │ +│ │ HTTP │ │ Loaded Model │ │ +│ │◀───────│ │ (ONNX / GGUF) │ │ +└──────────────┘ │ └────────────────┘ │ + └──────────────────────┘ +``` + +The SDK handles service bootstrapping, model resolution, downloading, loading, and exposes +convenience methods for chat completions and audio transcription. + +## Typical Lifecycle + +1. **Bootstrap** — `FoundryLocalManager` starts (or connects to) the local service. +2. **Model resolution** — the SDK resolves an alias (e.g. `phi-4-mini`) to a specific variant. +3. **Cache check** — if the model is already cached, loading is near-instant. +4. **Download** — if not cached, the model is downloaded with progress feedback. +5. **Load** — the model is loaded into the inference engine. +6. **Inference** — your app sends chat messages and receives completions. +7. **Cleanup** — unload models and stop the service when done. diff --git a/samples/python/agent-framework/data/orchestration_patterns.md b/samples/python/agent-framework/data/orchestration_patterns.md new file mode 100644 index 00000000..d269f2c9 --- /dev/null +++ b/samples/python/agent-framework/data/orchestration_patterns.md @@ -0,0 +1,67 @@ +# Orchestration Patterns for Multi-Agent Systems + +This document describes common orchestration patterns used to coordinate +multiple AI agents in a workflow. + +## 1. Sequential Pipeline + +Agents run in a fixed order. Each agent receives the output of the previous one. + +``` +User Question → Planner → Retriever → Critic → Writer → Final Report +``` + +**When to use:** Research tasks where each step depends on the previous result. + +**Trade-offs:** Simple to reason about, but total latency equals the sum of all +agent execution times. + +## 2. Concurrent Fan-Out + +Multiple agents process the same input simultaneously using `asyncio.gather()`. + +``` + ┌─ Retriever ──┐ +Plan Text ──────┤ ├─► Merge + └─ ToolAgent ──┘ +``` + +**When to use:** Independent sub-tasks that can run in parallel, such as +document retrieval and text analysis. + +**Trade-offs:** Faster than sequential for independent work, but merging results +requires careful design. + +## 3. Critic Feedback Loop + +A Critic agent iteratively reviews outputs and may trigger re-processing. + +``` +Retriever output ──► Critic ──► Gap found? ──► Yes ──► Re-retrieve ──┐ + ▲ │ + └─────────────────────────────────────────────┘ + │ + No gaps ──► Continue +``` + +**When to use:** Tasks requiring quality assurance or iterative refinement. + +**Trade-offs:** Improves output quality but adds latency. Always set a maximum +loop count (e.g. `MAX_CRITIC_LOOPS = 2`) to prevent infinite loops. + +## 4. Hybrid Orchestration + +Combines sequential, concurrent, and feedback patterns into a single workflow. + +``` +Question ──► Planner (seq) ──► Retriever ‖ ToolAgent (concurrent) ──► Critic Loop ──► Writer (seq) +``` + +This is the pattern used by the agent-framework sample's `run_full_workflow()`. + +## Key Implementation Notes + +- Use `async`/`await` throughout for non-blocking execution. +- Wrap agent calls with timing (`time.perf_counter()`) for observability. +- Emit structured step events (JSON) for UI streaming via SSE. +- Handle agent errors gracefully — a single agent failure shouldn't crash the workflow. diff --git a/samples/python/agent-framework/pyproject.toml b/samples/python/agent-framework/pyproject.toml new file mode 100644 index 00000000..68feeb61 --- /dev/null +++ b/samples/python/agent-framework/pyproject.toml @@ -0,0 +1,24 @@ +[build-system] +requires = ["setuptools>=68.0"] +build-backend = "setuptools.backends._legacy:_Backend" + +[project] +name = "agent-framework-foundry-local" +version = "0.1.0" +description = "Multi-agent orchestration with Microsoft Agent Framework + Foundry Local" +requires-python = ">=3.10" +dependencies = [ + "foundry-local-sdk>=0.5.1", + "agent-framework-core==1.0.0b260130", + "openai>=1.40.0", + "python-dotenv>=1.0.0", + "rich>=13.0.0", + "flask>=3.0.0", +] + +[project.optional-dependencies] +dev = ["pytest>=7.0", "pytest-asyncio>=0.21"] + +[tool.setuptools.packages.find] +where = ["."] +include = ["src*"] diff --git a/samples/python/agent-framework/requirements.txt b/samples/python/agent-framework/requirements.txt new file mode 100644 index 00000000..066679a1 --- /dev/null +++ b/samples/python/agent-framework/requirements.txt @@ -0,0 +1,6 @@ +foundry-local-sdk>=0.5.1 +agent-framework-core==1.0.0b260130 +openai>=1.40.0 +python-dotenv>=1.0.0 +rich>=13.0.0 +flask>=3.0.0 diff --git a/samples/python/agent-framework/src/app/__init__.py b/samples/python/agent-framework/src/app/__init__.py new file mode 100644 index 00000000..58134b02 --- /dev/null +++ b/samples/python/agent-framework/src/app/__init__.py @@ -0,0 +1 @@ +# src/app — Agent Framework + Foundry Local demo package diff --git a/samples/python/agent-framework/src/app/__main__.py b/samples/python/agent-framework/src/app/__main__.py new file mode 100644 index 00000000..c9943511 --- /dev/null +++ b/samples/python/agent-framework/src/app/__main__.py @@ -0,0 +1,85 @@ +""" +CLI Entry Point +──────────────── +Run as: python -m src.app +""" + +from __future__ import annotations + +import argparse +import asyncio +import logging +import os + +from dotenv import load_dotenv +from rich.console import Console +from rich.logging import RichHandler + +load_dotenv() +console = Console() + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Agent Framework + Foundry Local — Multi-Agent Research Demo", + ) + parser.add_argument("question", nargs="?", help="Research question (CLI mode)") + parser.add_argument("--docs", default=os.getenv("DOCS_PATH", "./data"), help="Documents folder") + parser.add_argument("--model", default=os.getenv("MODEL_ALIAS", "qwen2.5-0.5b"), help="Model alias") + parser.add_argument("--mode", choices=["sequential", "full"], default="full", help="Orchestration mode") + parser.add_argument("--web", action="store_true", help="Start Flask web server") + parser.add_argument("--port", type=int, default=5000, help="Web server port") + parser.add_argument("--log-level", default=os.getenv("LOG_LEVEL", "INFO"), help="Logging level") + args = parser.parse_args() + + logging.basicConfig( + level=getattr(logging, args.log_level.upper(), logging.INFO), + format="%(message)s", + handlers=[RichHandler(rich_tracebacks=True)], + ) + + os.environ["DOCS_PATH"] = args.docs + os.environ["MODEL_ALIAS"] = args.model + + from .foundry_boot import FoundryLocalBootstrapper + + boot = FoundryLocalBootstrapper(alias=args.model) + conn = boot.bootstrap() + + if args.web or args.question is None: + # Web mode + from .web import create_app + app = create_app(conn) + console.print(f"\n[bold green]Server running at http://localhost:{args.port}[/]\n") + app.run(host="0.0.0.0", port=args.port, debug=False) + else: + # CLI mode + from .documents import load_documents + from .orchestrator import run_full_workflow, run_sequential + + docs = load_documents(args.docs) + console.print(f"[cyan]Loaded {docs.file_count} files → {len(docs.chunks)} chunks[/]\n") + + async def run_cli(): + if args.mode == "sequential": + gen = run_sequential(conn, docs, args.question) + else: + gen = run_full_workflow(conn, docs, args.question) + + async for evt in gen: + if evt["type"] == "step_start": + console.print(f"\n[yellow]▶ {evt['agent']}:[/] {evt.get('description', '')}") + elif evt["type"] == "step_done": + console.print(f"[green]✓ {evt['agent']}[/] ({evt.get('elapsed', '?')}s)") + console.print(evt.get("output", "")) + elif evt["type"] == "complete": + console.print("\n[bold green]═══ Final Report ═══[/]") + console.print(evt.get("report", "")) + elif evt["type"] == "error": + console.print(f"[red]✗ Error:[/] {evt.get('message', '')}") + + asyncio.run(run_cli()) + + +if __name__ == "__main__": + main() diff --git a/samples/python/agent-framework/src/app/agents.py b/samples/python/agent-framework/src/app/agents.py new file mode 100644 index 00000000..7a4fe4e1 --- /dev/null +++ b/samples/python/agent-framework/src/app/agents.py @@ -0,0 +1,116 @@ +""" +Agent Factories +──────────────── +Create specialised ChatAgents for the multi-agent research workflow. +""" + +from __future__ import annotations + +import re +from typing import Annotated + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient +from pydantic import Field + +from .foundry_boot import FoundryConnection + + +# ─── Tool Functions ────────────────────────────────────────────── + +def word_count( + text: Annotated[str, Field(description="Text to count words in")], +) -> str: + """Count the number of words in the given text.""" + count = len(text.split()) + return f"Word count: {count}" + + +def extract_keywords( + text: Annotated[str, Field(description="Text to extract keywords from")], +) -> str: + """Extract the most frequently repeated keywords from text.""" + words = re.findall(r"\b\w{4,}\b", text.lower()) + freq: dict[str, int] = {} + for w in words: + freq[w] = freq.get(w, 0) + 1 + repeated = {w: c for w, c in freq.items() if c >= 2} + if not repeated: + return "Keywords: (none detected)" + top = sorted(repeated, key=repeated.get, reverse=True)[:10] + return "Keywords: " + ", ".join(top) + + +# ─── Agent Factories ──────────────────────────────────────────── + +def _make_client(conn: FoundryConnection) -> OpenAIChatClient: + return OpenAIChatClient( + api_key=conn.api_key, + base_url=conn.endpoint, + model_id=conn.model_id, + ) + + +def create_planner(conn: FoundryConnection) -> ChatAgent: + """Create the Planner agent that breaks a question into sub-tasks.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Planner", + instructions=( + "You are a research planner. Given a user question, break it into " + "2-4 concise sub-tasks that other agents can handle.\n" + "Output a numbered list of sub-tasks, nothing else." + ), + ) + + +def create_retriever(conn: FoundryConnection, documents_text: str) -> ChatAgent: + """Create the Retriever agent that finds relevant snippets from documents.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Retriever", + instructions=( + "You are a document retriever. Given sub-tasks, search the documents " + "below and return relevant excerpts with [source: filename] citations.\n\n" + "─── DOCUMENTS ───\n" + documents_text + ), + ) + + +def create_critic(conn: FoundryConnection) -> ChatAgent: + """Create the Critic agent that reviews output for gaps.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Critic", + instructions=( + "You are a research critic. Review the plan AND retrieved snippets.\n" + "List any gaps, contradictions, or missing sub-topics.\n" + "If nothing is missing, respond with exactly: NO_GAPS_FOUND" + ), + ) + + +def create_writer(conn: FoundryConnection) -> ChatAgent: + """Create the Writer agent that produces the final report.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Writer", + instructions=( + "You are a technical writer. Synthesize the plan, retrieved snippets, " + "keywords, and critic feedback into a clear, well-structured report.\n" + "Include [source: filename] citations where applicable." + ), + ) + + +def create_tool_agent(conn: FoundryConnection) -> ChatAgent: + """Create the ToolAgent that uses word_count and extract_keywords tools.""" + return ChatAgent( + chat_client=_make_client(conn), + name="ToolAgent", + instructions=( + "You are a text analysis agent. Use the provided tools to count words " + "and extract keywords from the text you receive." + ), + tools=[word_count, extract_keywords], + ) diff --git a/samples/python/agent-framework/src/app/demos/__init__.py b/samples/python/agent-framework/src/app/demos/__init__.py new file mode 100644 index 00000000..f5a7b94a --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/__init__.py @@ -0,0 +1,20 @@ +# src/app/demos — Demo Modules +# Each demo showcases different MAF + Foundry Local capabilities + +from .weather_tools import WeatherDemo +from .math_agent import MathAgentDemo +from .sentiment_analyzer import SentimentDemo +from .code_reviewer import CodeReviewerDemo +from .multi_agent_debate import DebateDemo +from .registry import DEMO_REGISTRY, get_demo, list_demos + +__all__ = [ + "WeatherDemo", + "MathAgentDemo", + "SentimentDemo", + "CodeReviewerDemo", + "DebateDemo", + "DEMO_REGISTRY", + "get_demo", + "list_demos", +] diff --git a/samples/python/agent-framework/src/app/demos/code_reviewer.py b/samples/python/agent-framework/src/app/demos/code_reviewer.py new file mode 100644 index 00000000..23860448 --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/code_reviewer.py @@ -0,0 +1,228 @@ +""" +Demo: Code Reviewer +─────────────────── +Demonstrates code analysis tools for reviewing code snippets. +""" + +from __future__ import annotations + +import re +from typing import Annotated + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient +from pydantic import Field + +from ..foundry_boot import FoundryConnection +from .registry import DemoInfo, register_demo + + +# ─── Tool Functions ────────────────────────────────────────────── + +def check_code_style( + code: Annotated[str, Field(description="Code snippet to check for style issues")], + language: Annotated[str, Field(description="Programming language (python, javascript, etc.)")] = "python", +) -> str: + """Check code for common style issues.""" + issues = [] + lines = code.split("\n") + for i, line in enumerate(lines, 1): + if len(line) > 100: + issues.append(f"Line {i}: Line too long ({len(line)} chars, max 100)") + if line != line.rstrip(): + issues.append(f"Line {i}: Trailing whitespace") + if "\t" in line and " " in line: + issues.append(f"Line {i}: Mixed tabs and spaces") + + if language.lower() == "python": + if "import *" in code: + issues.append("Avoid 'import *' \u2014 use explicit imports") + if re.search(r"except\s*:", code): + issues.append("Avoid bare 'except:' \u2014 catch specific exceptions") + if re.search(r"def\s+\w+\([^)]*=\s*\[\]", code) or re.search(r"def\s+\w+\([^)]*=\s*\{\}", code): + issues.append("Mutable default argument detected \u2014 use None instead") + + if language.lower() == "javascript": + if "var " in code: + issues.append("Consider using 'let' or 'const' instead of 'var'") + if "==" in code and "===" not in code: + issues.append("Consider using '===' for strict equality") + + if not issues: + return "\u2705 No style issues found!" + return "Style issues found:\n \u2022 " + "\n \u2022 ".join(issues) + + +def analyze_complexity( + code: Annotated[str, Field(description="Code snippet to analyze")], +) -> str: + """Analyze code complexity metrics.""" + lines = code.split("\n") + total_lines = len(lines) + code_lines = sum(1 for line in lines if line.strip() and not line.strip().startswith("#")) + comment_lines = sum(1 for line in lines if line.strip().startswith("#")) + blank_lines = sum(1 for line in lines if not line.strip()) + + func_pattern = r"(?:def|function|async function|const\s+\w+\s*=\s*(?:async\s*)?\()" + functions = len(re.findall(func_pattern, code)) + classes = len(re.findall(r"class\s+\w+", code)) + + control_keywords = ["if", "elif", "else", "for", "while", "try", "except", "with", "case", "switch"] + branches = sum(len(re.findall(rf"\b{kw}\b", code)) for kw in control_keywords) + + if branches <= 5: + complexity = "Low" + elif branches <= 15: + complexity = "Medium" + else: + complexity = "High" + + return ( + f"Code Complexity Analysis:\n" + f" Lines: {total_lines} total ({code_lines} code, {comment_lines} comments, {blank_lines} blank)\n" + f" Functions/Methods: {functions}\n" + f" Classes: {classes}\n" + f" Branches: {branches}\n" + f" Estimated complexity: {complexity}" + ) + + +def find_potential_bugs( + code: Annotated[str, Field(description="Code snippet to scan for potential bugs")], +) -> str: + """Scan code for potential bugs and issues.""" + warnings = [] + if re.search(r"==\s*None", code): + warnings.append("Use 'is None' instead of '== None'") + if re.search(r"!=\s*None", code): + warnings.append("Use 'is not None' instead of '!= None'") + if re.search(r"print\s*\(", code): + warnings.append("Debug print statement found \u2014 remove before production") + if re.search(r"TODO|FIXME|HACK|XXX", code, re.IGNORECASE): + warnings.append("TODO/FIXME comment found \u2014 address before release") + if re.search(r"password\s*=\s*[\"'][^\"']+[\"']", code, re.IGNORECASE): + warnings.append("\u26a0\ufe0f CRITICAL: Hardcoded password detected!") + if re.search(r"api[_-]?key\s*=\s*[\"'][^\"']+[\"']", code, re.IGNORECASE): + warnings.append("\u26a0\ufe0f CRITICAL: Hardcoded API key detected!") + if "eval(" in code: + warnings.append("\u26a0\ufe0f eval() is dangerous \u2014 avoid if possible") + if "exec(" in code: + warnings.append("\u26a0\ufe0f exec() is dangerous \u2014 avoid if possible") + if re.search(r"except[^:]*:\s*\n\s*pass", code): + warnings.append("Empty except block found \u2014 handle or log the exception") + if not warnings: + return "\u2705 No obvious bugs or issues detected!" + return "Potential issues found:\n \u2022 " + "\n \u2022 ".join(warnings) + + +def suggest_improvements( + code: Annotated[str, Field(description="Code snippet to review for improvements")], +) -> str: + """Suggest code improvements and best practices.""" + suggestions = [] + func_pattern = r"def\s+\w+\s*\([^)]*\):\s*\n((?:\s+.*\n)*)" + for match in re.finditer(func_pattern, code): + body = match.group(1) + if body.count("\n") > 30: + suggestions.append("Consider breaking long functions into smaller ones (>30 lines)") + break + if re.search(r"def\s+\w+", code) and not re.search(r'""".*?"""', code, re.DOTALL): + suggestions.append("Add docstrings to functions for better documentation") + if re.search(r"def\s+\w+\s*\([^)]+\)", code): + if not re.search(r"def\s+\w+\s*\([^)]*:\s*\w+", code): + suggestions.append("Consider adding type hints for better code clarity") + if re.search(r"[=<>+\-*/]\s*\d{2,}(?!\d)", code): + suggestions.append("Extract magic numbers into named constants") + long_lines = sum(1 for line in code.split("\n") if len(line) > 80) + if long_lines > 3: + suggestions.append(f"Break up {long_lines} long lines for better readability") + if re.search(r"\b[a-z]\s*=", code): + suggestions.append("Use descriptive variable names instead of single letters") + if not suggestions: + return "\u2705 Code looks good! No major improvements suggested." + return "Suggested improvements:\n \u2022 " + "\n \u2022 ".join(suggestions) + + +def count_elements( + code: Annotated[str, Field(description="Code snippet to analyze")], +) -> str: + """Count code elements like variables, functions, loops, etc.""" + elements = { + "variables": len(re.findall(r"\b\w+\s*=\s*(?!=)", code)), + "functions": len(re.findall(r"\bdef\s+\w+", code)), + "classes": len(re.findall(r"\bclass\s+\w+", code)), + "if_statements": len(re.findall(r"\bif\s+", code)), + "for_loops": len(re.findall(r"\bfor\s+", code)), + "while_loops": len(re.findall(r"\bwhile\s+", code)), + "try_blocks": len(re.findall(r"\btry\s*:", code)), + "imports": len(re.findall(r"\bimport\s+", code)), + "returns": len(re.findall(r"\breturn\s+", code)), + "comments": len(re.findall(r"#.*$", code, re.MULTILINE)), + } + lines = ["Code element count:"] + for element, count in elements.items(): + if count > 0: + lines.append(f" {element.replace('_', ' ').title()}: {count}") + return "\n".join(lines) + + +# ─── Demo Class ────────────────────────────────────────────────── + +class CodeReviewerDemo: + def __init__(self, conn: FoundryConnection): + self.conn = conn + self.agent = self._create_agent() + + def _create_agent(self) -> ChatAgent: + client = OpenAIChatClient( + api_key=self.conn.api_key, + base_url=self.conn.endpoint, + model_id=self.conn.model_id, + ) + return ChatAgent( + chat_client=client, + name="CodeReviewer", + instructions=( + "You are a code review assistant. Use the provided tools to analyze code:\n\n" + " \u2022 check_code_style: Check for style issues\n" + " \u2022 analyze_complexity: Get complexity metrics\n" + " \u2022 find_potential_bugs: Scan for bugs and issues\n" + " \u2022 suggest_improvements: Get improvement suggestions\n" + " \u2022 count_elements: Count code elements\n\n" + "When given code to review, use multiple tools to provide a comprehensive " + "review. Summarize your findings clearly." + ), + tools=[check_code_style, analyze_complexity, find_potential_bugs, suggest_improvements, count_elements], + ) + + async def run(self, prompt: str) -> dict: + import time + t0 = time.perf_counter() + result = await self.agent.run(prompt) + elapsed = time.perf_counter() - t0 + text = re.sub(r".*?\s*", "", str(result), flags=re.DOTALL).strip() + return { + "prompt": prompt, + "response": text, + "elapsed": round(elapsed, 2), + "tools_available": ["check_code_style", "analyze_complexity", "find_potential_bugs", "suggest_improvements", "count_elements"], + } + + +# ─── Register ──────────────────────────────────────────────────── + +async def run_code_review_demo(conn: FoundryConnection, prompt: str) -> dict: + demo = CodeReviewerDemo(conn) + return await demo.run(prompt) + + +register_demo(DemoInfo( + id="code_reviewer", + name="Code Reviewer", + description="Code analysis agent that checks style, complexity, potential bugs, and suggests improvements.", + icon="\ud83d\udc68\u200d\ud83d\udcbb", + category="Tool Calling", + runner=run_code_review_demo, + tags=["tools", "function-calling", "code-analysis", "single-agent"], + suggested_prompt="Review this Python code:\n\ndef calc(x,y,z):\n result = x + y\n if result == None:\n return 0\n return result / z", +)) diff --git a/samples/python/agent-framework/src/app/demos/math_agent.py b/samples/python/agent-framework/src/app/demos/math_agent.py new file mode 100644 index 00000000..f2ff00c9 --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/math_agent.py @@ -0,0 +1,201 @@ +""" +Demo: Math Agent +──────────────── +Demonstrates calculation tools and step-by-step reasoning. +""" + +from __future__ import annotations + +import math +import re +from typing import Annotated + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient +from pydantic import Field + +from ..foundry_boot import FoundryConnection +from .registry import DemoInfo, register_demo + + +# ─── Tool Functions ────────────────────────────────────────────── + +def calculate( + expression: Annotated[str, Field(description="Math expression to evaluate, e.g. '(5 + 3) * 2'")], +) -> str: + """Safely evaluate a mathematical expression.""" + allowed = set("0123456789+-*/.() ") + if not all(c in allowed for c in expression): + return f"Error: Expression contains invalid characters. Use only numbers and operators: + - * / ( )" + try: + result = eval(expression, {"__builtins__": {}}, {}) # noqa: S307 + if isinstance(result, float) and result.is_integer(): + result = int(result) + return f"Result: {expression} = {result}" + except Exception as e: + return f"Error evaluating '{expression}': {e}" + + +def percentage( + value: Annotated[float, Field(description="The value to calculate percentage of")], + percent: Annotated[float, Field(description="The percentage to apply")], +) -> str: + """Calculate a percentage of a value.""" + result = value * (percent / 100) + return f"{percent}% of {value} = {result}" + + +def percentage_change( + old_value: Annotated[float, Field(description="The original value")], + new_value: Annotated[float, Field(description="The new value")], +) -> str: + """Calculate the percentage change between two values.""" + if old_value == 0: + return "Error: Cannot calculate percentage change from zero" + change = ((new_value - old_value) / old_value) * 100 + direction = "increase" if change > 0 else "decrease" if change < 0 else "no change" + return f"From {old_value} to {new_value}: {abs(change):.2f}% {direction}" + + +def convert_units( + value: Annotated[float, Field(description="The value to convert")], + from_unit: Annotated[str, Field(description="Source unit (e.g., 'km', 'miles', 'kg', 'lbs', 'celsius', 'fahrenheit')")], + to_unit: Annotated[str, Field(description="Target unit")], +) -> str: + """Convert between common units.""" + conversions = { + ("km", "miles"): lambda x: x * 0.621371, + ("miles", "km"): lambda x: x * 1.60934, + ("kg", "lbs"): lambda x: x * 2.20462, + ("lbs", "kg"): lambda x: x * 0.453592, + ("celsius", "fahrenheit"): lambda x: (x * 9 / 5) + 32, + ("fahrenheit", "celsius"): lambda x: (x - 32) * 5 / 9, + ("meters", "feet"): lambda x: x * 3.28084, + ("feet", "meters"): lambda x: x * 0.3048, + ("liters", "gallons"): lambda x: x * 0.264172, + ("gallons", "liters"): lambda x: x * 3.78541, + } + key = (from_unit.lower(), to_unit.lower()) + if key not in conversions: + available = ", ".join(f"{f}\u2192{t}" for f, t in conversions) + return f"Conversion not supported. Available: {available}" + result = conversions[key](value) + return f"{value} {from_unit} = {result:.4f} {to_unit}" + + +def compound_interest( + principal: Annotated[float, Field(description="Initial investment amount")], + rate: Annotated[float, Field(description="Annual interest rate as percentage (e.g., 5 for 5%)")], + years: Annotated[int, Field(description="Number of years")], + compounds_per_year: Annotated[int, Field(description="Times interest compounds per year (1=annual, 12=monthly, 365=daily)")] = 12, +) -> str: + """Calculate compound interest.""" + r = rate / 100 + n = compounds_per_year + t = years + amount = principal * (1 + r / n) ** (n * t) + interest = amount - principal + return ( + f"Compound Interest Calculation:\n" + f" Principal: ${principal:,.2f}\n" + f" Rate: {rate}% per year\n" + f" Time: {years} years\n" + f" Compounds: {n}x per year\n" + f" \u2192 Final amount: ${amount:,.2f}\n" + f" \u2192 Interest earned: ${interest:,.2f}" + ) + + +def statistics( + numbers: Annotated[str, Field(description="Comma-separated list of numbers, e.g., '1, 2, 3, 4, 5'")], +) -> str: + """Calculate basic statistics for a list of numbers.""" + try: + nums = [float(n.strip()) for n in numbers.split(",")] + except ValueError: + return "Error: Invalid number format. Use comma-separated numbers like '1, 2, 3'" + if not nums: + return "Error: No numbers provided" + n = len(nums) + mean = sum(nums) / n + sorted_nums = sorted(nums) + if n % 2 == 0: + median = (sorted_nums[n // 2 - 1] + sorted_nums[n // 2]) / 2 + else: + median = sorted_nums[n // 2] + variance = sum((x - mean) ** 2 for x in nums) / n + std_dev = math.sqrt(variance) + return ( + f"Statistics for {n} numbers:\n" + f" Sum: {sum(nums)}\n" + f" Mean: {mean:.2f}\n" + f" Median: {median:.2f}\n" + f" Min: {min(nums)}\n" + f" Max: {max(nums)}\n" + f" Range: {max(nums) - min(nums)}\n" + f" Std Dev: {std_dev:.2f}" + ) + + +# ─── Demo Class ────────────────────────────────────────────────── + +class MathAgentDemo: + def __init__(self, conn: FoundryConnection): + self.conn = conn + self.agent = self._create_agent() + + def _create_agent(self) -> ChatAgent: + client = OpenAIChatClient( + api_key=self.conn.api_key, + base_url=self.conn.endpoint, + model_id=self.conn.model_id, + ) + return ChatAgent( + chat_client=client, + name="MathAssistant", + instructions=( + "You are a precise math assistant. ALWAYS use the provided tools " + "for calculations \u2014 never compute in your head.\n\n" + "Available tools:\n" + " \u2022 calculate: Evaluate math expressions\n" + " \u2022 percentage: Calculate percentages\n" + " \u2022 percentage_change: Calculate % change between values\n" + " \u2022 convert_units: Convert between units\n" + " \u2022 compound_interest: Calculate investment growth\n" + " \u2022 statistics: Compute stats for a list of numbers\n\n" + "Show your work by using tools step-by-step for complex problems." + ), + tools=[calculate, percentage, percentage_change, convert_units, compound_interest, statistics], + ) + + async def run(self, prompt: str) -> dict: + import time + t0 = time.perf_counter() + result = await self.agent.run(prompt) + elapsed = time.perf_counter() - t0 + text = re.sub(r".*?\s*", "", str(result), flags=re.DOTALL).strip() + return { + "prompt": prompt, + "response": text, + "elapsed": round(elapsed, 2), + "tools_available": ["calculate", "percentage", "percentage_change", "convert_units", "compound_interest", "statistics"], + } + + +# ─── Register ──────────────────────────────────────────────────── + +async def run_math_demo(conn: FoundryConnection, prompt: str) -> dict: + demo = MathAgentDemo(conn) + return await demo.run(prompt) + + +register_demo(DemoInfo( + id="math_agent", + name="Math Calculator", + description="Precise calculation agent with tools for arithmetic, percentages, unit conversions, compound interest, and statistics.", + icon="\ud83d\udd22", + category="Tool Calling", + runner=run_math_demo, + tags=["tools", "function-calling", "calculations", "single-agent"], + suggested_prompt="If I invest $10,000 at 7% annual interest compounded monthly for 15 years, how much will I have? Also convert that to euros assuming 1 USD = 0.92 EUR.", +)) diff --git a/samples/python/agent-framework/src/app/demos/multi_agent_debate.py b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py new file mode 100644 index 00000000..c0ec7ddb --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py @@ -0,0 +1,189 @@ +""" +Demo: Multi-Agent Debate +──────────────────────── +Demonstrates multi-agent orchestration with opposing viewpoints. +Three agents debate a topic: Proponent, Opponent, and Moderator. +""" + +from __future__ import annotations + +import re +import time +from dataclasses import dataclass + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient + +from ..foundry_boot import FoundryConnection +from .registry import DemoInfo, register_demo + + +# ─── Debate Participants ───────────────────────────────────────── + +def _make_client(conn: FoundryConnection) -> OpenAIChatClient: + return OpenAIChatClient( + api_key=conn.api_key, + base_url=conn.endpoint, + model_id=conn.model_id, + ) + + +def create_proponent(conn: FoundryConnection) -> ChatAgent: + """Create agent that argues FOR the topic.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Proponent", + instructions=( + "You are a skilled debater arguing IN FAVOR of the given topic.\n\n" + "Rules:\n" + " \u2022 Present 2-3 strong arguments supporting the position\n" + " \u2022 Use logic, examples, and evidence\n" + " \u2022 Be persuasive but respectful\n" + " \u2022 Keep your response to 3-4 paragraphs max\n\n" + "Start with: 'I argue IN FAVOR because...'" + ), + ) + + +def create_opponent(conn: FoundryConnection) -> ChatAgent: + """Create agent that argues AGAINST the topic.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Opponent", + instructions=( + "You are a skilled debater arguing AGAINST the given topic.\n\n" + "Rules:\n" + " \u2022 Present 2-3 strong counter-arguments\n" + " \u2022 Respond to the previous speaker's points where relevant\n" + " \u2022 Use logic, examples, and evidence\n" + " \u2022 Keep your response to 3-4 paragraphs max\n\n" + "Start with: 'I argue AGAINST because...'" + ), + ) + + +def create_moderator(conn: FoundryConnection) -> ChatAgent: + """Create moderator agent that summarizes the debate.""" + return ChatAgent( + chat_client=_make_client(conn), + name="Moderator", + instructions=( + "You are an impartial debate moderator.\n\n" + "Your job:\n" + " 1. Summarize the key points from BOTH sides\n" + " 2. Identify the strongest argument from each side\n" + " 3. Declare which side presented a more compelling case\n" + " 4. Explain your reasoning briefly\n\n" + "Be fair and objective. Format:\n" + " \u2022 FOR side summary: ...\n" + " \u2022 AGAINST side summary: ...\n" + " \u2022 Verdict: [FOR/AGAINST] wins because..." + ), + ) + + +# ─── Debate Results ────────────────────────────────────────────── + +@dataclass +class DebateRound: + speaker: str + position: str + argument: str + elapsed: float + + +# ─── Demo Class ────────────────────────────────────────────────── + +class DebateDemo: + def __init__(self, conn: FoundryConnection): + self.conn = conn + self.proponent = create_proponent(conn) + self.opponent = create_opponent(conn) + self.moderator = create_moderator(conn) + + async def _run_agent(self, agent: ChatAgent, prompt: str) -> tuple[str, float]: + t0 = time.perf_counter() + result = await agent.run(prompt) + elapsed = time.perf_counter() - t0 + text = re.sub(r".*?\s*", "", str(result), flags=re.DOTALL).strip() + return text, elapsed + + async def run(self, topic: str) -> dict: + t0_total = time.perf_counter() + rounds = [] + + # Round 1: Proponent opens + pro_prompt = f'Topic for debate: "{topic}"\n\nPresent your opening arguments IN FAVOR of this topic.' + pro_argument, pro_time = await self._run_agent(self.proponent, pro_prompt) + rounds.append(DebateRound("Proponent", "FOR", pro_argument, pro_time)) + + # Round 2: Opponent responds + opp_prompt = ( + f'Topic for debate: "{topic}"\n\n' + f"The speaker FOR this topic argued:\n{pro_argument}\n\n" + "Present your counter-arguments AGAINST this topic." + ) + opp_argument, opp_time = await self._run_agent(self.opponent, opp_prompt) + rounds.append(DebateRound("Opponent", "AGAINST", opp_argument, opp_time)) + + # Final: Moderator verdict + mod_prompt = ( + f'Topic for debate: "{topic}"\n\n' + f"=== Arguments FOR ===\n{pro_argument}\n\n" + f"=== Arguments AGAINST ===\n{opp_argument}\n\n" + "Please summarize the debate and declare a winner." + ) + verdict_text, mod_time = await self._run_agent(self.moderator, mod_prompt) + rounds.append(DebateRound("Moderator", "VERDICT", verdict_text, mod_time)) + + total_time = time.perf_counter() - t0_total + + # Extract verdict + verdict = "TIE" + if "for wins" in verdict_text.lower() or "proponent wins" in verdict_text.lower(): + verdict = "FOR" + elif "against wins" in verdict_text.lower() or "opponent wins" in verdict_text.lower(): + verdict = "AGAINST" + + response_parts = [] + for r in rounds: + response_parts.append(f"=== {r.speaker} ({r.position}) ===\n{r.argument}") + response_text = "\n\n".join(response_parts) + if verdict != "TIE": + response_text += f"\n\nVerdict: {verdict}" + + return { + "response": response_text, + "topic": topic, + "rounds": [ + { + "speaker": r.speaker, + "position": r.position, + "argument": r.argument, + "elapsed": round(r.elapsed, 2), + } + for r in rounds + ], + "verdict": verdict, + "total_time": round(total_time, 2), + "agents_used": ["Proponent (FOR)", "Opponent (AGAINST)", "Moderator"], + } + + +# ─── Register ──────────────────────────────────────────────────── + +async def run_debate_demo(conn: FoundryConnection, prompt: str) -> dict: + demo = DebateDemo(conn) + return await demo.run(prompt) + + +register_demo(DemoInfo( + id="multi_agent_debate", + name="Multi-Agent Debate", + description="Three agents debate a topic: one argues FOR, one argues AGAINST, and a moderator declares a winner.", + icon="\ud83c\udfad", + category="Multi-Agent", + runner=run_debate_demo, + tags=["multi-agent", "orchestration", "sequential", "debate"], + suggested_prompt="Remote work should become the default for all knowledge workers", +)) diff --git a/samples/python/agent-framework/src/app/demos/registry.py b/samples/python/agent-framework/src/app/demos/registry.py new file mode 100644 index 00000000..1ceed6bd --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/registry.py @@ -0,0 +1,42 @@ +""" +Demo Registry +───────────── +Central registry of all available demos with metadata. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable + + +@dataclass +class DemoInfo: + """Metadata for a demo.""" + id: str + name: str + description: str + icon: str + category: str + runner: Callable[..., Any] + tags: list[str] + suggested_prompt: str = "" + + +# Registry populated by each demo module +DEMO_REGISTRY: dict[str, DemoInfo] = {} + + +def register_demo(info: DemoInfo) -> None: + """Register a demo in the global registry.""" + DEMO_REGISTRY[info.id] = info + + +def get_demo(demo_id: str) -> DemoInfo | None: + """Get demo info by ID.""" + return DEMO_REGISTRY.get(demo_id) + + +def list_demos() -> list[DemoInfo]: + """List all demos with their metadata.""" + return list(DEMO_REGISTRY.values()) diff --git a/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py new file mode 100644 index 00000000..abee8526 --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py @@ -0,0 +1,246 @@ +""" +Demo: Sentiment Analyzer +──────────────────────── +Demonstrates text analysis tools for sentiment and emotion detection. +""" + +from __future__ import annotations + +import re +from collections import Counter +from typing import Annotated + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient +from pydantic import Field + +from ..foundry_boot import FoundryConnection +from .registry import DemoInfo, register_demo + +# ─── Lexicon ───────────────────────────────────────────────────── + +POSITIVE_WORDS = { + "good", "great", "excellent", "amazing", "wonderful", "fantastic", + "happy", "love", "best", "awesome", "brilliant", "perfect", + "beautiful", "outstanding", "superb", "delightful", "pleased", + "satisfied", "excited", "thankful", "grateful", "impressed", +} + +NEGATIVE_WORDS = { + "bad", "terrible", "awful", "horrible", "poor", "worst", + "sad", "hate", "disappointed", "angry", "frustrated", "annoyed", + "ugly", "boring", "waste", "useless", "broken", "failed", + "difficult", "confusing", "slow", "expensive", "problem", +} + +EMOTION_PATTERNS = { + "joy": ["happy", "excited", "delighted", "thrilled", "pleased", "love", "wonderful"], + "sadness": ["sad", "disappointed", "unhappy", "depressed", "lonely", "miss", "sorry"], + "anger": ["angry", "frustrated", "annoyed", "furious", "mad", "hate", "outraged"], + "fear": ["afraid", "scared", "worried", "anxious", "nervous", "terrified", "panic"], + "surprise": ["surprised", "amazed", "astonished", "shocked", "unexpected", "wow"], + "trust": ["trust", "believe", "reliable", "confident", "safe", "secure", "honest"], +} + + +# ─── Tool Functions ────────────────────────────────────────────── + +def analyze_sentiment( + text: Annotated[str, Field(description="The text to analyze for sentiment")], +) -> str: + """Analyze the overall sentiment of text.""" + words = re.findall(r"\b\w+\b", text.lower()) + pos_count = sum(1 for w in words if w in POSITIVE_WORDS) + neg_count = sum(1 for w in words if w in NEGATIVE_WORDS) + total = pos_count + neg_count + if total == 0: + sentiment, confidence, score = "neutral", 0.5, 0.0 + else: + score = (pos_count - neg_count) / total + if score > 0.2: + sentiment = "positive" + elif score < -0.2: + sentiment = "negative" + else: + sentiment = "neutral" + confidence = min(0.95, 0.5 + abs(score) * 0.5) + return ( + f"Sentiment Analysis:\n" + f" Overall: {sentiment.upper()}\n" + f" Score: {score:+.2f} (range: -1.0 to +1.0)\n" + f" Confidence: {confidence:.0%}\n" + f" Positive words found: {pos_count}\n" + f" Negative words found: {neg_count}" + ) + + +def detect_emotions( + text: Annotated[str, Field(description="The text to analyze for emotions")], +) -> str: + """Detect specific emotions present in the text.""" + words = set(re.findall(r"\b\w+\b", text.lower())) + detected = [] + for emotion, keywords in EMOTION_PATTERNS.items(): + matches = words.intersection(keywords) + if matches: + detected.append((emotion, len(matches), list(matches))) + if not detected: + return "No strong emotions detected in the text." + detected.sort(key=lambda x: x[1], reverse=True) + lines = ["Emotions detected:"] + for emotion, count, matches in detected: + intensity = "strong" if count >= 2 else "mild" + lines.append(f" \u2022 {emotion.title()} ({intensity}): triggered by '{', '.join(matches)}'") + return "\n".join(lines) + + +def extract_key_phrases( + text: Annotated[str, Field(description="The text to extract key phrases from")], +) -> str: + """Extract and rate important phrases from text.""" + sentences = re.split(r"[.!?]+", text) + results = [] + for sent in sentences: + sent = sent.strip() + if len(sent) < 10: + continue + words = re.findall(r"\b\w+\b", sent.lower()) + pos = sum(1 for w in words if w in POSITIVE_WORDS) + neg = sum(1 for w in words if w in NEGATIVE_WORDS) + if pos > neg: + rating = "positive" + elif neg > pos: + rating = "negative" + else: + rating = "neutral" + display = sent[:80] + "\u2026" if len(sent) > 80 else sent + results.append(f' [{rating:^8}] "{display}"') + if not results: + return "No significant phrases found." + return "Key phrases:\n" + "\n".join(results[:5]) + + +def compare_sentiment( + text1: Annotated[str, Field(description="First text to compare")], + text2: Annotated[str, Field(description="Second text to compare")], +) -> str: + """Compare sentiment between two texts.""" + def score_text(text): + words = re.findall(r"\b\w+\b", text.lower()) + pos = sum(1 for w in words if w in POSITIVE_WORDS) + neg = sum(1 for w in words if w in NEGATIVE_WORDS) + total = pos + neg + return (pos - neg) / total if total > 0 else 0 + + s1 = score_text(text1) + s2 = score_text(text2) + + def label(s): + if s > 0.2: + return "positive" + if s < -0.2: + return "negative" + return "neutral" + + diff = abs(s1 - s2) + if diff < 0.1: + comparison = "Both texts have similar sentiment" + elif s1 > s2: + comparison = f"Text 1 is more positive (by {diff:.2f})" + else: + comparison = f"Text 2 is more positive (by {diff:.2f})" + return ( + f"Sentiment Comparison:\n" + f" Text 1: {label(s1)} ({s1:+.2f})\n" + f" Text 2: {label(s2)} ({s2:+.2f})\n" + f" \u2192 {comparison}" + ) + + +def word_frequency( + text: Annotated[str, Field(description="The text to analyze")], + top_n: Annotated[int, Field(description="Number of top words to return")] = 10, +) -> str: + """Get the most frequent meaningful words in text.""" + stopwords = { + "the", "a", "an", "is", "are", "was", "were", "be", "been", "being", + "have", "has", "had", "do", "does", "did", "will", "would", "could", + "should", "may", "might", "must", "shall", "can", "need", "dare", + "to", "of", "in", "for", "on", "with", "at", "by", "from", "up", + "about", "into", "over", "after", "it", "its", "this", "that", + "and", "but", "or", "nor", "so", "yet", "both", "either", "neither", + "i", "me", "my", "myself", "we", "our", "you", "your", "he", "she", + } + words = re.findall(r"\b\w+\b", text.lower()) + words = [w for w in words if w not in stopwords and len(w) > 2] + counter = Counter(words) + top = counter.most_common(top_n) + if not top: + return "No significant words found." + lines = [f"Top {min(top_n, len(top))} words:"] + for word, count in top: + bar = "\u2588" * min(count, 20) + lines.append(f" {word:15} {count:3} {bar}") + return "\n".join(lines) + + +# ─── Demo Class ────────────────────────────────────────────────── + +class SentimentDemo: + def __init__(self, conn: FoundryConnection): + self.conn = conn + self.agent = self._create_agent() + + def _create_agent(self) -> ChatAgent: + client = OpenAIChatClient( + api_key=self.conn.api_key, + base_url=self.conn.endpoint, + model_id=self.conn.model_id, + ) + return ChatAgent( + chat_client=client, + name="SentimentAnalyst", + instructions=( + "You are a text analysis expert. Use the provided tools to analyze text:\n\n" + " \u2022 analyze_sentiment: Get overall sentiment score\n" + " \u2022 detect_emotions: Find specific emotions\n" + " \u2022 extract_key_phrases: Find important phrases\n" + " \u2022 compare_sentiment: Compare two texts\n" + " \u2022 word_frequency: Find common words\n\n" + "When asked to analyze text, use the appropriate tool(s). " + "Summarize findings in plain language after using tools." + ), + tools=[analyze_sentiment, detect_emotions, extract_key_phrases, compare_sentiment, word_frequency], + ) + + async def run(self, prompt: str) -> dict: + import time + t0 = time.perf_counter() + result = await self.agent.run(prompt) + elapsed = time.perf_counter() - t0 + text = re.sub(r".*?\s*", "", str(result), flags=re.DOTALL).strip() + return { + "prompt": prompt, + "response": text, + "elapsed": round(elapsed, 2), + "tools_available": ["analyze_sentiment", "detect_emotions", "extract_key_phrases", "compare_sentiment", "word_frequency"], + } + + +# ─── Register ──────────────────────────────────────────────────── + +async def run_sentiment_demo(conn: FoundryConnection, prompt: str) -> dict: + demo = SentimentDemo(conn) + return await demo.run(prompt) + + +register_demo(DemoInfo( + id="sentiment_analyzer", + name="Sentiment Analyzer", + description="Text analysis agent that detects sentiment, emotions, key phrases, and word frequency.", + icon="\ud83d\udcac", + category="Tool Calling", + runner=run_sentiment_demo, + tags=["tools", "function-calling", "text-analysis", "single-agent"], + suggested_prompt="Analyze this review: 'The product arrived quickly and the quality exceeded my expectations. However, the packaging was disappointing and customer support was slow to respond. Overall I'm satisfied but not thrilled.'", +)) diff --git a/samples/python/agent-framework/src/app/demos/weather_tools.py b/samples/python/agent-framework/src/app/demos/weather_tools.py new file mode 100644 index 00000000..89ab6579 --- /dev/null +++ b/samples/python/agent-framework/src/app/demos/weather_tools.py @@ -0,0 +1,177 @@ +""" +Demo: Weather Tools +─────────────────── +Demonstrates function/tool calling with multiple weather tools. +""" + +from __future__ import annotations + +import random +import re +from typing import Annotated + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient +from pydantic import Field + +from ..foundry_boot import FoundryConnection +from .registry import DemoInfo, register_demo + +# ─── Mock weather data ─────────────────────────────────────────── + +WEATHER_CONDITIONS = ["sunny", "cloudy", "rainy", "partly cloudy", "stormy", "foggy", "snowy"] +CITIES_DATA = { + "london": {"lat": 51.5, "country": "UK"}, + "new york": {"lat": 40.7, "country": "USA"}, + "tokyo": {"lat": 35.7, "country": "Japan"}, + "sydney": {"lat": -33.9, "country": "Australia"}, + "paris": {"lat": 48.9, "country": "France"}, + "seattle": {"lat": 47.6, "country": "USA"}, + "berlin": {"lat": 52.5, "country": "Germany"}, +} + + +def _mock_temp(city: str) -> int: + info = CITIES_DATA.get(city.lower(), {"lat": 45}) + base = 25 - abs(info["lat"] - 25) * 0.3 + return int(base + random.randint(-5, 5)) + + +# ─── Tool Functions ────────────────────────────────────────────── + +def get_current_weather( + city: Annotated[str, Field(description="Name of the city to get weather for")], +) -> str: + """Get the current weather conditions for a city.""" + city_lower = city.lower() + if city_lower not in CITIES_DATA: + return f"Weather data not available for '{city}'. Try: London, New York, Tokyo, Sydney, Paris, Seattle, Berlin." + temp = _mock_temp(city) + condition = random.choice(WEATHER_CONDITIONS) + humidity = random.randint(30, 90) + wind = random.randint(5, 30) + return ( + f"Current weather in {city.title()}:\n" + f" \u2022 Temperature: {temp}\u00b0C\n" + f" \u2022 Condition: {condition}\n" + f" \u2022 Humidity: {humidity}%\n" + f" \u2022 Wind: {wind} km/h" + ) + + +def get_forecast( + city: Annotated[str, Field(description="Name of the city")], + days: Annotated[int, Field(description="Number of days (1-5)", ge=1, le=5)] = 3, +) -> str: + """Get a weather forecast for the next N days.""" + city_lower = city.lower() + if city_lower not in CITIES_DATA: + return f"Forecast not available for '{city}'." + lines = [f"Weather forecast for {city.title()} ({days} days):"] + for i in range(days): + temp = _mock_temp(city) + random.randint(-3, 3) + condition = random.choice(WEATHER_CONDITIONS) + lines.append(f" Day {i + 1}: {temp}\u00b0C, {condition}") + return "\n".join(lines) + + +def compare_weather( + city1: Annotated[str, Field(description="First city to compare")], + city2: Annotated[str, Field(description="Second city to compare")], +) -> str: + """Compare current weather between two cities.""" + temp1 = _mock_temp(city1) + temp2 = _mock_temp(city2) + cond1 = random.choice(WEATHER_CONDITIONS) + cond2 = random.choice(WEATHER_CONDITIONS) + diff = abs(temp1 - temp2) + warmer = city1 if temp1 > temp2 else city2 + return ( + f"Weather comparison:\n" + f" {city1.title()}: {temp1}\u00b0C, {cond1}\n" + f" {city2.title()}: {temp2}\u00b0C, {cond2}\n" + f" \u2192 {warmer.title()} is {diff}\u00b0C warmer" + ) + + +def recommend_activity( + city: Annotated[str, Field(description="City to get activity recommendations for")], +) -> str: + """Recommend outdoor activities based on current weather.""" + temp = _mock_temp(city) + condition = random.choice(WEATHER_CONDITIONS) + activities = [] + if "sunny" in condition or "partly" in condition: + activities.extend(["hiking", "picnic", "cycling", "sightseeing"]) + if "cloudy" in condition: + activities.extend(["museum visit", "walking tour", "photography"]) + if "rainy" in condition or "stormy" in condition: + activities.extend(["visit indoor attractions", "try local caf\u00e9s", "shopping"]) + if temp > 25: + activities.extend(["swimming", "beach"]) + if temp < 10: + activities.extend(["hot chocolate tour", "indoor sports"]) + return ( + f"Activity recommendations for {city.title()} ({temp}\u00b0C, {condition}):\n" + f" \u2022 {', '.join(activities[:4])}" + ) + + +# ─── Demo Class ────────────────────────────────────────────────── + +class WeatherDemo: + def __init__(self, conn: FoundryConnection): + self.conn = conn + self.agent = self._create_agent() + + def _create_agent(self) -> ChatAgent: + client = OpenAIChatClient( + api_key=self.conn.api_key, + base_url=self.conn.endpoint, + model_id=self.conn.model_id, + ) + return ChatAgent( + chat_client=client, + name="WeatherAssistant", + instructions=( + "You are a helpful weather assistant. Use the available tools to:\n" + " \u2022 Get current weather for cities\n" + " \u2022 Provide forecasts\n" + " \u2022 Compare weather between locations\n" + " \u2022 Recommend activities\n\n" + "Always use the tools when asked about weather. Be concise." + ), + tools=[get_current_weather, get_forecast, compare_weather, recommend_activity], + ) + + async def run(self, prompt: str) -> dict: + import time + t0 = time.perf_counter() + result = await self.agent.run(prompt) + elapsed = time.perf_counter() - t0 + text = re.sub(r".*?\s*", "", str(result), flags=re.DOTALL).strip() + return { + "prompt": prompt, + "response": text, + "elapsed": round(elapsed, 2), + "tools_available": ["get_current_weather", "get_forecast", "compare_weather", "recommend_activity"], + } + + +# ─── Register ──────────────────────────────────────────────────── + +async def run_weather_demo(conn: FoundryConnection, prompt: str) -> dict: + demo = WeatherDemo(conn) + return await demo.run(prompt) + + +register_demo(DemoInfo( + id="weather_tools", + name="Weather Tools", + description="Multi-tool agent that provides weather information, forecasts, city comparisons, and activity recommendations.", + icon="\ud83c\udf24\ufe0f", + category="Tool Calling", + runner=run_weather_demo, + tags=["tools", "function-calling", "single-agent"], + suggested_prompt="What's the weather in Seattle and San Francisco? Compare them and recommend activities for the warmer city.", +)) diff --git a/samples/python/agent-framework/src/app/documents.py b/samples/python/agent-framework/src/app/documents.py new file mode 100644 index 00000000..166925a5 --- /dev/null +++ b/samples/python/agent-framework/src/app/documents.py @@ -0,0 +1,89 @@ +""" +Document Loader +──────────────── +Load and chunk local text/markdown files for the retriever agent. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from pathlib import Path + +log = logging.getLogger(__name__) + +SUPPORTED_EXTENSIONS = {".txt", ".md", ".markdown"} + + +@dataclass +class DocumentChunk: + """A chunk of text from a source file.""" + source: str + text: str + index: int + + +@dataclass +class LoadedDocuments: + """All loaded document chunks and metadata.""" + chunks: list[DocumentChunk] = field(default_factory=list) + file_count: int = 0 + combined_text: str = "" + + +def load_documents( + docs_path: str, + max_chars_per_chunk: int = 2000, +) -> LoadedDocuments: + """Load all supported files from *docs_path* and split into chunks.""" + folder = Path(docs_path) + if not folder.is_dir(): + log.warning("Documents folder not found: %s", docs_path) + return LoadedDocuments() + + chunks: list[DocumentChunk] = [] + file_count = 0 + + for fp in sorted(folder.iterdir()): + if fp.suffix.lower() not in SUPPORTED_EXTENSIONS: + continue + try: + content = fp.read_text(encoding="utf-8") + except Exception as exc: + log.warning("Skipping %s: %s", fp.name, exc) + continue + + file_count += 1 + + # Split into chunks of roughly max_chars_per_chunk on line boundaries + lines = content.splitlines(keepends=True) + buf: list[str] = [] + buf_len = 0 + idx = 0 + + for line in lines: + if buf_len + len(line) > max_chars_per_chunk and buf: + chunks.append(DocumentChunk( + source=fp.name, + text="".join(buf), + index=idx, + )) + idx += 1 + buf = [] + buf_len = 0 + buf.append(line) + buf_len += len(line) + + if buf: + chunks.append(DocumentChunk( + source=fp.name, + text="".join(buf), + index=idx, + )) + + combined = "\n\n".join( + f"[{c.source} chunk {c.index}]\n{c.text}" for c in chunks + ) + + log.info("Loaded %d files → %d chunks", file_count, len(chunks)) + return LoadedDocuments(chunks=chunks, file_count=file_count, combined_text=combined) diff --git a/samples/python/agent-framework/src/app/foundry_boot.py b/samples/python/agent-framework/src/app/foundry_boot.py new file mode 100644 index 00000000..3bde3388 --- /dev/null +++ b/samples/python/agent-framework/src/app/foundry_boot.py @@ -0,0 +1,76 @@ +""" +Foundry Local Bootstrapper +─────────────────────────── +Manages Foundry Local service lifecycle: starts the service, +checks/downloads/loads the model, and returns connection info. +""" + +from __future__ import annotations + +import logging +import os +from dataclasses import dataclass + +from dotenv import load_dotenv +from foundry_local import FoundryLocalManager +from rich.console import Console + +load_dotenv() +log = logging.getLogger(__name__) +console = Console() + + +@dataclass +class FoundryConnection: + """Connection details returned after bootstrap.""" + endpoint: str + api_key: str + model_id: str + model_alias: str + + +class FoundryLocalBootstrapper: + """Bootstrap Foundry Local: start service → resolve model → download → load.""" + + def __init__(self, alias: str | None = None): + self.alias = alias or os.getenv("MODEL_ALIAS", "qwen2.5-0.5b") + + def bootstrap(self) -> FoundryConnection: + """Start Foundry Local and return a ready-to-use connection.""" + endpoint_override = os.getenv("FOUNDRY_ENDPOINT") + + if endpoint_override: + # External endpoint provided — skip local bootstrap + console.print(f"[cyan]Using external endpoint:[/] {endpoint_override}") + return FoundryConnection( + endpoint=endpoint_override, + api_key=os.getenv("FOUNDRY_API_KEY", "none"), + model_id=self.alias, + model_alias=self.alias, + ) + + console.print(f"[cyan]Bootstrapping Foundry Local with alias:[/] {self.alias}") + + # FoundryLocalManager(alias) auto-starts service + resolves model + manager = FoundryLocalManager(self.alias) + + endpoint = manager.endpoint + api_key = manager.api_key + + # List cached models to find the resolved variant + cached = manager.list_cached_models() + model_id = self.alias + for m in cached: + if self.alias in str(m): + model_id = str(m) + break + + console.print(f"[green]✓ Foundry Local ready[/] endpoint={endpoint}") + log.info("Foundry Local ready: endpoint=%s model=%s", endpoint, model_id) + + return FoundryConnection( + endpoint=endpoint, + api_key=api_key, + model_id=model_id, + model_alias=self.alias, + ) diff --git a/samples/python/agent-framework/src/app/orchestrator.py b/samples/python/agent-framework/src/app/orchestrator.py new file mode 100644 index 00000000..dbea57d7 --- /dev/null +++ b/samples/python/agent-framework/src/app/orchestrator.py @@ -0,0 +1,182 @@ +""" +Orchestrator +───────────── +Three orchestration patterns for the multi-agent workflow: + 1. Sequential — Planner → Retriever → Critic ⇄ Retriever → Writer + 2. Concurrent — Retriever ‖ ToolAgent (fan-out) + 3. Full — Combines sequential + concurrent +""" + +from __future__ import annotations + +import asyncio +import logging +import re +import time +from typing import AsyncGenerator + +from .agents import ( + create_critic, + create_planner, + create_retriever, + create_tool_agent, + create_writer, +) +from .documents import LoadedDocuments +from .foundry_boot import FoundryConnection + +log = logging.getLogger(__name__) + +MAX_CRITIC_LOOPS = 2 + + +def _critic_found_gaps(critique: str) -> bool: + """Return True if the critic found gaps (i.e. didn't say NO_GAPS_FOUND).""" + return "NO_GAPS_FOUND" not in critique.upper().replace(" ", "") + + +# ─── Streaming helpers ─────────────────────────────────────────── + +StepEvent = dict # {"type": str, ...} + + +async def run_sequential( + conn: FoundryConnection, + docs: LoadedDocuments, + question: str, +) -> AsyncGenerator[StepEvent, None]: + """Sequential pipeline: Planner → Retriever → Critic → Writer.""" + + planner = create_planner(conn) + retriever = create_retriever(conn, docs.combined_text) + critic = create_critic(conn) + writer = create_writer(conn) + + # ── Planner ── + yield {"type": "step_start", "agent": "Planner", "description": "Breaking question into sub-tasks"} + t0 = time.perf_counter() + plan = await planner.run(question) + plan_text = str(plan) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Planner", "output": plan_text, "elapsed": elapsed} + + # ── Retriever ── + yield {"type": "step_start", "agent": "Retriever", "description": "Searching documents"} + t0 = time.perf_counter() + snippets = await retriever.run(plan_text) + snippets_text = str(snippets) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Retriever", "output": snippets_text, "elapsed": elapsed} + + # ── Critic loop ── + combined = f"Plan:\n{plan_text}\n\nRetrieved:\n{snippets_text}" + for loop in range(MAX_CRITIC_LOOPS): + yield {"type": "step_start", "agent": "Critic", "description": f"Reviewing for gaps (round {loop + 1})"} + t0 = time.perf_counter() + critique = await critic.run(combined) + critique_text = str(critique) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Critic", "output": critique_text, "elapsed": elapsed} + + if not _critic_found_gaps(critique_text): + break + + # Re-retrieve with critic feedback + yield {"type": "step_start", "agent": "Retriever", "description": "Re-searching based on critic feedback"} + t0 = time.perf_counter() + snippets = await retriever.run(f"{plan_text}\n\nCritic feedback:\n{critique_text}") + snippets_text = str(snippets) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Retriever", "output": snippets_text, "elapsed": elapsed} + + combined = f"Plan:\n{plan_text}\n\nRetrieved:\n{snippets_text}\n\nCritique:\n{critique_text}" + + # ── Writer ── + yield {"type": "step_start", "agent": "Writer", "description": "Synthesising final report"} + t0 = time.perf_counter() + report = await writer.run(combined) + report_text = str(report) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Writer", "output": report_text, "elapsed": elapsed} + + yield {"type": "complete", "report": report_text} + + +async def run_concurrent_retrieval( + conn: FoundryConnection, + docs: LoadedDocuments, + plan_text: str, +) -> AsyncGenerator[StepEvent, None]: + """Concurrent fan-out: Retriever ‖ ToolAgent on the same plan.""" + + retriever = create_retriever(conn, docs.combined_text) + tool_agent = create_tool_agent(conn) + + yield {"type": "step_start", "agent": "Concurrent", "description": "Retriever + ToolAgent in parallel"} + t0 = time.perf_counter() + snippets_task = retriever.run(plan_text) + keywords_task = tool_agent.run(f"Analyze this text:\n{docs.combined_text[:3000]}") + snippets, keywords = await asyncio.gather(snippets_task, keywords_task) + elapsed = round(time.perf_counter() - t0, 2) + + yield { + "type": "step_done", + "agent": "Concurrent", + "output": f"**Retriever:**\n{snippets}\n\n**ToolAgent:**\n{keywords}", + "elapsed": elapsed, + } + + +async def run_full_workflow( + conn: FoundryConnection, + docs: LoadedDocuments, + question: str, +) -> AsyncGenerator[StepEvent, None]: + """Full hybrid: Sequential plan → Concurrent retrieve → Sequential critique + write.""" + + planner = create_planner(conn) + critic = create_critic(conn) + writer = create_writer(conn) + + # ── Planner (sequential) ── + yield {"type": "step_start", "agent": "Planner", "description": "Breaking question into sub-tasks"} + t0 = time.perf_counter() + plan = await planner.run(question) + plan_text = str(plan) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Planner", "output": plan_text, "elapsed": elapsed} + + # ── Concurrent fan-out ── + snippets_text = "" + keywords_text = "" + async for evt in run_concurrent_retrieval(conn, docs, plan_text): + yield evt + if evt["type"] == "step_done" and evt["agent"] == "Concurrent": + # Parse out retriever/tool output + output = evt.get("output", "") + snippets_text = output + keywords_text = "" + + # ── Critic (sequential) ── + combined = f"Plan:\n{plan_text}\n\nRetrieved + Keywords:\n{snippets_text}" + for loop in range(MAX_CRITIC_LOOPS): + yield {"type": "step_start", "agent": "Critic", "description": f"Reviewing for gaps (round {loop + 1})"} + t0 = time.perf_counter() + critique = await critic.run(combined) + critique_text = str(critique) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Critic", "output": critique_text, "elapsed": elapsed} + + if not _critic_found_gaps(critique_text): + break + combined += f"\n\nCritique:\n{critique_text}" + + # ── Writer (sequential) ── + yield {"type": "step_start", "agent": "Writer", "description": "Synthesising final report"} + t0 = time.perf_counter() + report = await writer.run(combined) + report_text = str(report) + elapsed = round(time.perf_counter() - t0, 2) + yield {"type": "step_done", "agent": "Writer", "output": report_text, "elapsed": elapsed} + + yield {"type": "complete", "report": report_text} diff --git a/samples/python/agent-framework/src/app/templates/index.html b/samples/python/agent-framework/src/app/templates/index.html new file mode 100644 index 00000000..ee81f780 --- /dev/null +++ b/samples/python/agent-framework/src/app/templates/index.html @@ -0,0 +1,628 @@ + + + + + + + + + Local Research & Synthesis Desk + + + +
+
+

🧠 Local Research & Synthesis Desk

+

Multi-Agent Orchestration • Microsoft Agent Framework + Foundry Local

+
+ Connecting… +
+
+ +
+ + + + +
+ + +
+
+
+ + +
+
+
+ + +
+ +
+
+
+
🗂 Planner
+ +
+ concurrent +
+
🔍 Retriever
+
🔧 ToolAgent
+
+
+ +
🧐 Critic
+ +
✍️ Writer
+
+
+
🗂 Planner
+ +
🔍 Retriever
+ +
🧐 Critic
+ +
✍️ Writer
+
+
+
+
📝
+

Ready to Research

+

Type a question above and click Run.

+
+
+
+

📋 Final Report

+
+
+
+ + +
+
+

🎮 MAF Tool Calling Demos

+

Explore different ways the Microsoft Agent Framework uses tool calling and multi-agent orchestration.

+
+

Loading demos...

+
+
+ 🔧 +
+
Select a Demo
+
category
+
+
+
Select a demo from above.
+
+
+ 💡 + Try this prompt: + +
+
+
+
+ + +
+
+
+
+ + +
+
+

Validate function/tool calling with Foundry Local + MAF.

+ +
+
+
+ + +
+
+

About This Demo

+

This demo shows multi-agent orchestration using two Microsoft technologies:

+
    +
  • Microsoft Agent Framework (MAF) — unified SDK for building AI agents with tool calling, orchestration patterns, and session management.
  • +
  • Foundry Local — on-device AI inference runtime. Models run on your GPU/NPU/CPU with no cloud connection needed.
  • +
+

Agents

+ + + + + + +
🗂 PlannerBreaks your question into 2-4 sub-tasks
🔍 RetrieverReads local files, extracts relevant snippets
🔧 ToolAgentComputes word counts, keyword extraction via function calling
🧐 CriticReviews for gaps and contradictions
✍️ WriterProduces the final report with citations
+

Orchestration Patterns

+

Sequential: Planner → Retriever → Critic → Writer.

+

Full: Sequential plan → Concurrent (Retriever + ToolAgent) → Critic → Writer.

+

References

+ +
+
+
+ + + + + diff --git a/samples/python/agent-framework/src/app/tool_demo.py b/samples/python/agent-framework/src/app/tool_demo.py new file mode 100644 index 00000000..738572d1 --- /dev/null +++ b/samples/python/agent-framework/src/app/tool_demo.py @@ -0,0 +1,97 @@ +""" +Tool Demo +────────── +Standalone validation: run tool-calling with Foundry Local to verify +that both direct invocation and LLM-driven function calling work. +""" + +from __future__ import annotations + +import asyncio +import time + +from agent_framework import ChatAgent +from agent_framework.openai import OpenAIChatClient +from rich.console import Console + +from .agents import extract_keywords, word_count +from .foundry_boot import FoundryConnection + +console = Console() + + +async def run_tool_demo(conn: FoundryConnection) -> list[dict]: + """Run direct + LLM-driven tool tests and return results.""" + results: list[dict] = [] + + # ── Direct function calls ──────────────────────────── + t0 = time.perf_counter() + wc = word_count("Foundry Local runs models on device") + results.append({ + "test": "Direct: word_count", + "result": wc, + "status": "pass" if "6" in wc else "fail", + "elapsed": round(time.perf_counter() - t0, 4), + }) + + t0 = time.perf_counter() + kw = extract_keywords("foundry foundry local local model model agent") + results.append({ + "test": "Direct: extract_keywords", + "result": kw, + "status": "pass" if "foundry" in kw.lower() else "fail", + "elapsed": round(time.perf_counter() - t0, 4), + }) + + # ── LLM-driven tool call ───────────────────────────── + client = OpenAIChatClient( + api_key=conn.api_key, + base_url=conn.endpoint, + model_id=conn.model_id, + ) + agent = ChatAgent( + chat_client=client, + name="ToolTester", + instructions="Use the provided tools to answer.", + tools=[word_count, extract_keywords], + ) + + t0 = time.perf_counter() + try: + result = await agent.run("Count the words in: 'hello world from foundry local'") + result_text = str(result) + results.append({ + "test": "LLM: word_count via agent", + "result": result_text, + "status": "pass" if any(c.isdigit() for c in result_text) else "fail", + "elapsed": round(time.perf_counter() - t0, 2), + }) + except Exception as exc: + results.append({ + "test": "LLM: word_count via agent", + "result": str(exc), + "status": "fail", + "elapsed": round(time.perf_counter() - t0, 2), + }) + + t0 = time.perf_counter() + try: + result = await agent.run( + "Extract keywords from: 'foundry foundry local local model model inference inference'" + ) + result_text = str(result) + results.append({ + "test": "LLM: extract_keywords via agent", + "result": result_text, + "status": "pass" if "foundry" in result_text.lower() or "keyword" in result_text.lower() else "fail", + "elapsed": round(time.perf_counter() - t0, 2), + }) + except Exception as exc: + results.append({ + "test": "LLM: extract_keywords via agent", + "result": str(exc), + "status": "fail", + "elapsed": round(time.perf_counter() - t0, 2), + }) + + return results diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py new file mode 100644 index 00000000..5074e3b0 --- /dev/null +++ b/samples/python/agent-framework/src/app/web.py @@ -0,0 +1,179 @@ +""" +Flask Web Server +───────────────── +Serves the web UI and exposes API endpoints with SSE streaming +for real-time agent pipeline visualisation. +""" + +from __future__ import annotations + +import asyncio +import json +import logging +import os +import traceback + +from flask import Flask, Response, jsonify, render_template, request + +from .documents import load_documents +from .foundry_boot import FoundryConnection, FoundryLocalBootstrapper +from .orchestrator import run_full_workflow, run_sequential +from .tool_demo import run_tool_demo + +log = logging.getLogger(__name__) + +# ── Global state (set in create_app) ──────────────────────────── +_conn: FoundryConnection | None = None +_docs_path: str = "./data" +_docs = None + + +def create_app(conn: FoundryConnection | None = None) -> Flask: + """Create and configure the Flask application.""" + global _conn, _docs, _docs_path + + app = Flask(__name__, template_folder="templates") + + _docs_path = os.getenv("DOCS_PATH", "./data") + + if conn is not None: + _conn = conn + else: + boot = FoundryLocalBootstrapper() + _conn = boot.bootstrap() + + _docs = load_documents(_docs_path) + + # ── Routes ─────────────────────────────────────────── + + @app.route("/") + def index(): + return render_template("index.html") + + @app.route("/api/status") + def api_status(): + if _conn is None: + return jsonify({"status": "error", "message": "Not bootstrapped"}) + return jsonify({ + "status": "ok", + "model_alias": _conn.model_alias, + "model_id": _conn.model_id, + "endpoint": _conn.endpoint, + "documents": _docs.file_count if _docs else 0, + }) + + @app.route("/api/run", methods=["POST"]) + def api_run(): + """Run the research workflow and stream events via SSE.""" + if _conn is None: + return jsonify({"status": "error", "message": "Not bootstrapped"}), 503 + + data = request.get_json(silent=True) or {} + question = data.get("question", "").strip() + mode = data.get("mode", "full") + + if not question: + return jsonify({"status": "error", "message": "No question provided"}), 400 + + def generate(): + loop = asyncio.new_event_loop() + try: + if mode == "sequential": + gen = run_sequential(_conn, _docs, question) + else: + gen = run_full_workflow(_conn, _docs, question) + + async def drain(): + events = [] + async for evt in gen: + events.append(evt) + return events + + events = loop.run_until_complete(drain()) + for evt in events: + yield f"data: {json.dumps(evt)}\n\n" + except Exception as exc: + log.exception("Workflow error") + yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n" + finally: + loop.close() + + return Response(generate(), mimetype="text/event-stream") + + @app.route("/api/tools", methods=["POST"]) + def api_tools(): + """Run the tool demo and return results.""" + if _conn is None: + return jsonify({"status": "error", "message": "Not bootstrapped"}), 503 + + loop = asyncio.new_event_loop() + try: + results = loop.run_until_complete(run_tool_demo(_conn)) + return jsonify({"status": "ok", "results": results}) + except Exception as exc: + log.exception("Tool demo error") + return jsonify({"status": "error", "message": str(exc)}), 500 + finally: + loop.close() + + @app.route("/api/documents") + def api_documents(): + return jsonify({ + "status": "ok", + "file_count": _docs.file_count if _docs else 0, + "chunk_count": len(_docs.chunks) if _docs else 0, + "files": list({c.source for c in _docs.chunks}) if _docs else [], + }) + + @app.route("/api/demos") + def api_demos(): + from .demos import list_demos + return jsonify({ + "status": "ok", + "demos": [ + { + "id": d.id, + "name": d.name, + "description": d.description, + "icon": d.icon, + "category": d.category, + "tags": d.tags, + "suggested_prompt": d.suggested_prompt, + } + for d in list_demos() + ], + }) + + @app.route("/api/demo//run", methods=["POST"]) + def api_demo_run(demo_id: str): + """Run a specific demo and stream results via SSE.""" + from .demos import get_demo + + if _conn is None: + return jsonify({"status": "error", "message": "Not bootstrapped"}), 503 + + demo = get_demo(demo_id) + if demo is None: + return jsonify({"status": "error", "message": f"Demo '{demo_id}' not found"}), 404 + + data = request.get_json(silent=True) or {} + prompt = data.get("prompt", "").strip() + if not prompt: + return jsonify({"status": "error", "message": "No prompt provided"}), 400 + + def generate(): + loop = asyncio.new_event_loop() + try: + yield f"data: {json.dumps({'type': 'step_start', 'agent': demo.name})}\n\n" + result = loop.run_until_complete(demo.runner(_conn, prompt)) + yield f"data: {json.dumps({'type': 'step_done', 'agent': demo.name, 'output': result.get('response', ''), 'elapsed': result.get('elapsed')})}\n\n" + yield f"data: {json.dumps({'type': 'complete', 'report': result.get('response', '')})}\n\n" + except Exception as exc: + log.exception("Demo error: %s", demo_id) + yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n" + finally: + loop.close() + + return Response(generate(), mimetype="text/event-stream") + + return app diff --git a/samples/python/agent-framework/tests/test_smoke.py b/samples/python/agent-framework/tests/test_smoke.py new file mode 100644 index 00000000..9b86ec6e --- /dev/null +++ b/samples/python/agent-framework/tests/test_smoke.py @@ -0,0 +1,88 @@ +""" +Smoke Tests +──────────── +Quick tests to verify imports, document loading, and bootstrapper configuration. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + + +def test_imports(): + """All core modules can be imported.""" + from src.app.agents import create_planner, create_retriever, create_critic, create_writer, create_tool_agent + from src.app.documents import load_documents, LoadedDocuments + from src.app.foundry_boot import FoundryLocalBootstrapper, FoundryConnection + from src.app.orchestrator import run_sequential, run_full_workflow + from src.app.web import create_app + + +def test_document_loader(): + """Document loader reads data/ folder and produces chunks.""" + from src.app.documents import load_documents + + data_dir = Path(__file__).resolve().parent.parent / "data" + if not data_dir.is_dir(): + pytest.skip("data/ directory not found") + + docs = load_documents(str(data_dir)) + assert docs.file_count > 0, "Expected at least one document file" + assert len(docs.chunks) > 0, "Expected at least one chunk" + assert len(docs.combined_text) > 0, "Expected non-empty combined text" + + +def test_document_loader_missing_dir(): + """Document loader returns empty result for missing directory.""" + from src.app.documents import load_documents + + docs = load_documents("/nonexistent/path/nothing/here") + assert docs.file_count == 0 + assert len(docs.chunks) == 0 + + +def test_foundry_connection_dataclass(): + """FoundryConnection stores fields correctly.""" + from src.app.foundry_boot import FoundryConnection + + conn = FoundryConnection( + endpoint="http://localhost:5273", + api_key="test-key", + model_id="phi-4-mini-onnx-cpu", + model_alias="phi-4-mini", + ) + assert conn.endpoint == "http://localhost:5273" + assert conn.model_alias == "phi-4-mini" + + +def test_bootstrapper_uses_env_override(monkeypatch): + """Bootstrapper returns external endpoint when FOUNDRY_ENDPOINT is set.""" + from src.app.foundry_boot import FoundryLocalBootstrapper + + monkeypatch.setenv("FOUNDRY_ENDPOINT", "http://remote:8080/v1") + monkeypatch.setenv("FOUNDRY_API_KEY", "my-key") + + boot = FoundryLocalBootstrapper(alias="test-model") + conn = boot.bootstrap() + + assert conn.endpoint == "http://remote:8080/v1" + assert conn.api_key == "my-key" + assert conn.model_alias == "test-model" + + +def test_demo_registry(): + """Demo registry imports and has demos registered.""" + from src.app.demos.registry import list_demos, DEMO_REGISTRY + + # Import demo modules to trigger registration + import src.app.demos.weather_tools + import src.app.demos.math_agent + import src.app.demos.sentiment_analyzer + import src.app.demos.code_reviewer + import src.app.demos.multi_agent_debate + + demos = list_demos() + assert len(demos) >= 1, "Expected at least one registered demo" diff --git a/samples/python/functioncalling/README.md b/samples/python/functioncalling/README.md index 71048eae..44068fe1 100644 --- a/samples/python/functioncalling/README.md +++ b/samples/python/functioncalling/README.md @@ -2,10 +2,20 @@ This guide walks you through enabling function calling support in Foundry Local with Phi-4-mini. +## Features + +- **Cache-aware**: The notebook checks the local model cache before downloading — if the model is already cached, the download is skipped automatically. +- **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening. +- **Parallel & single function calling**: Demonstrates both multi-tool and single-tool invocation patterns. + ## Prerequisites - Foundry Local version 0.5.100 or higher -- Access to modify model configuration files +- Python packages: `foundry-local-sdk`, `openai` + + ```bash + pip install foundry-local-sdk openai + ``` ## Setup Instructions @@ -40,11 +50,11 @@ foundry service restart ### Step 4: Test the Configuration -Run the provided [Notebook](./fl_tools..ipynb) to test and validate the function calling functionality. +Run the provided [Notebook](./fl_tools.ipynb) to test and validate the function calling functionality. ## Related Resources -- **Test Notebook**: [fl_tools.ipynb](./fl_tools..ipynb) +- **Test Notebook**: [fl_tools.ipynb](./fl_tools.ipynb) ## Notes diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb index 0f9c76ed..2b226e2c 100644 --- a/samples/python/functioncalling/fl_tools.ipynb +++ b/samples/python/functioncalling/fl_tools.ipynb @@ -113,35 +113,44 @@ "id": "9335da67", "metadata": {}, "source": [ - "Define the model alias that will be used throughout this example:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "503f23fa", - "metadata": {}, - "outputs": [], - "source": [ - "alias = \"phi-4-mini\"" - ] - }, - { - "cell_type": "markdown", - "id": "5a9b1ecf", - "metadata": {}, - "source": [ - "Create a FoundryLocalManager instance using the specified model alias:" + "Initialize the Foundry Local manager and ensure the model is downloaded and loaded.\n", + "\n", + "The SDK checks the local cache first — if the model is already cached, it skips the download:" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "804611d5", "metadata": {}, "outputs": [], "source": [ - "manager = FoundryLocalManager(alias)" + "alias = \"phi-4-mini\"\n", + "\n", + "# Initialize manager without auto-bootstrapping so we can show each step\n", + "manager = FoundryLocalManager(bootstrap=False)\n", + "\n", + "# Start the Foundry Local service\n", + "print(\"Starting Foundry Local service...\")\n", + "manager.start_service()\n", + "print(\" ✓ Service is running\")\n", + "\n", + "# Check if the model is already cached\n", + "cached_models = manager.list_cached_models()\n", + "cached_ids = {m.id for m in cached_models}\n", + "model_info = manager.get_model_info(alias)\n", + "\n", + "if model_info.id in cached_ids:\n", + " print(f\" ✓ Model '{alias}' is already cached — skipping download\")\n", + "else:\n", + " print(f\" Downloading model '{alias}'...\")\n", + " manager.download_model(alias)\n", + " print(f\" ✓ Download complete\")\n", + "\n", + "# Load the model into memory\n", + "print(f\" Loading model '{alias}'...\")\n", + "manager.load_model(alias)\n", + "print(f\" ✓ Model loaded and ready\")" ] }, { diff --git a/samples/python/hello-foundry-local/README.md b/samples/python/hello-foundry-local/README.md index c7753a88..68c7cb12 100644 --- a/samples/python/hello-foundry-local/README.md +++ b/samples/python/hello-foundry-local/README.md @@ -2,6 +2,13 @@ This is a simple example of how to use the Foundry Local SDK to run a model locally and make requests to it. The example demonstrates how to set up the SDK, initialize a model, and make a request to the model. +## Features + +- **Cache-aware**: Checks the local model cache before downloading — if the model is already cached, the download is skipped automatically. +- **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening. + +## Setup + Install the Foundry Local SDK and OpenAI packages using pip: ```bash @@ -11,7 +18,7 @@ pip install foundry-local-sdk openai > [!TIP] > We recommend using a virtual environment to manage your Python packages using `venv` or `conda` to avoid conflicts with other packages. -Run the application using Python: +## Run ```bash python src/app.py diff --git a/samples/python/hello-foundry-local/requirements.txt b/samples/python/hello-foundry-local/requirements.txt new file mode 100644 index 00000000..2a12d4eb --- /dev/null +++ b/samples/python/hello-foundry-local/requirements.txt @@ -0,0 +1,2 @@ +foundry-local-sdk>=0.5.1 +openai>=1.0.0 diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py index 8bd21c62..4d47f5eb 100644 --- a/samples/python/hello-foundry-local/src/app.py +++ b/samples/python/hello-foundry-local/src/app.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import sys import openai from foundry_local import FoundryLocalManager @@ -8,26 +9,49 @@ # to your end-user's device. alias = "qwen2.5-coder-0.5b" -# Create a FoundryLocalManager instance. This will start the Foundry -# Local service if it is not already running and load the specified model. -manager = FoundryLocalManager(alias) +# Create a FoundryLocalManager instance without bootstrapping +# so we can show each step visually. +print("Initializing Foundry Local...") +manager = FoundryLocalManager(bootstrap=False) +manager.start_service() +print("✓ Service started") -# The remaining code uses the OpenAI Python SDK to interact with the local model. +# Check if the model is already cached (downloaded) +cached_models = manager.list_cached_models() +cached_ids = {m.id for m in cached_models} +model_info = manager.get_model_info(alias) +if model_info is None: + print(f"✗ Model \"{alias}\" not found in catalog") + sys.exit(1) -# Configure the client to use the local Foundry service +if model_info.id in cached_ids: + print(f"✓ Model \"{alias}\" ({model_info.id}) already cached — skipping download") +else: + print(f"Model \"{alias}\" not found in cache. Downloading {model_info.id}...") + manager.download_model(alias) + print(f"✓ Model downloaded") + +# Load the model into memory +print(f"Loading model {model_info.id}...") +manager.load_model(alias) +print("✓ Model loaded and ready") + +# Configure the OpenAI client to use the local Foundry service client = openai.OpenAI( base_url=manager.endpoint, api_key=manager.api_key, # API key is not required for local usage ) -# Set the model to use and generate a streaming response +# Generate a streaming response stream = client.chat.completions.create( - model=manager.get_model_info(alias).id, + model=model_info.id, messages=[{"role": "user", "content": "What is the golden ratio?"}], stream=True, ) # Print the streaming response +print("\nAssistant: ", end="") for chunk in stream: if chunk.choices[0].delta.content is not None: print(chunk.choices[0].delta.content, end="", flush=True) +print() diff --git a/samples/python/summarize/README.md b/samples/python/summarize/README.md index 9fa753d1..a944ecff 100644 --- a/samples/python/summarize/README.md +++ b/samples/python/summarize/README.md @@ -2,6 +2,12 @@ A simple command-line utility that uses Foundry Local to generate summaries of text files or direct text input. +## Features + +- **Cache-aware**: Checks the local model cache before downloading — if the model is already cached, the download is skipped automatically. +- **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening. +- **Flexible model selection**: Use `--model` to pick a specific model alias, or let the script default to `phi-4-mini` (falls back to the first cached model if unavailable). + ## Setup 1. Install the required dependencies: diff --git a/samples/python/summarize/requirements.txt b/samples/python/summarize/requirements.txt index 7b37f256..e29653d7 100644 --- a/samples/python/summarize/requirements.txt +++ b/samples/python/summarize/requirements.txt @@ -1,3 +1,3 @@ openai>=1.0.0 python-dotenv>=0.19.0 -foundry-local-sdk>=0.3.1 +foundry-local-sdk>=0.5.1 diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py index c2b00ba7..9b2a330a 100644 --- a/samples/python/summarize/summarize.py +++ b/samples/python/summarize/summarize.py @@ -42,28 +42,44 @@ def main(): parser.add_argument("--model", help="Model alias to use for summarization") args = parser.parse_args() - fl_manager = FoundryLocalManager() - + # Initialize Foundry Local without bootstrapping for visibility + print("Initializing Foundry Local...") + fl_manager = FoundryLocalManager(bootstrap=False) fl_manager.start_service() + print("✓ Service started") - model_list = fl_manager.list_cached_models() - - if not model_list: - print("No downloaded models available") - sys.exit(1) + # Check what's available in cache + cached_models = fl_manager.list_cached_models() + cached_ids = {m.id for m in cached_models} - # Select model based on alias or use first one if args.model: - selected_model = next((model for model in model_list if model.alias == args.model), None) - if selected_model: - model_name = selected_model.id + # User specified a model — check cache, download if needed + model_info = fl_manager.get_model_info(args.model) + if model_info is None: + print(f"✗ Model alias '{args.model}' not found in catalog") + sys.exit(1) + + if model_info.id in cached_ids: + print(f"✓ Model \"{args.model}\" ({model_info.id}) already cached — skipping download") else: - model_name = model_list[0].id - print(f"Model alias '{args.model}' not found, using default model: {model_name}") + print(f"Model \"{args.model}\" not in cache. Downloading {model_info.id}...") + fl_manager.download_model(args.model) + print("✓ Model downloaded") + + print(f"Loading model {model_info.id}...") + fl_manager.load_model(args.model) + model_name = model_info.id else: - model_name = model_list[0].id + # No model specified — use the first cached model, or fail + if not cached_models: + print("No downloaded models available. Run with --model to download one.") + sys.exit(1) + + model_name = cached_models[0].id + print(f"✓ Using cached model: {model_name}") + fl_manager.load_model(model_name) - print(f"Using model: {model_name}") + print(f"✓ Model loaded and ready\n") # Initialize OpenAI client client = OpenAI(base_url=fl_manager.endpoint, api_key=fl_manager.api_key) @@ -76,7 +92,7 @@ def main(): # Get and print summary summary = get_summary(text, client, model_name) - print("\nSummary:") + print("Summary:") print("-" * 50) print(summary) print("-" * 50) diff --git a/samples/rag/README.md b/samples/rag/README.md index 2225fd01..11cf0116 100644 --- a/samples/rag/README.md +++ b/samples/rag/README.md @@ -2,14 +2,14 @@ ## Overview -This guide demonstrates how to build a complete offline RAG (Retrieval-Augmented Generation) solution using Foundry Local, combining local embedding models with vector search capabilities for enhanced AI inference on edge devices. +This guide demonstrates how to build a complete offline RAG (Retrieval-Augmented Generation) solution using Foundry Local with the **Foundry Local C# SDK**, combining local embedding models with vector search capabilities for enhanced AI inference on edge devices. The SDK manages the full model lifecycle — cache checking, downloading, loading, and providing an OpenAI-compatible endpoint. ## Prerequisites -- **Qdrant**: Local vector database installation +- **Qdrant**: Local vector database — `docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant` - **.NET 8+**: Runtime environment - **.NET Interactive Notebook**: For development and testing -- **Foundry Local 0.5.100+**: Local AI model execution platform +- **Foundry Local**: Latest — see [foundrylocal.ai](https://foundrylocal.ai) ### Hardware Considerations @@ -45,9 +45,40 @@ Download and place these files in a `./jina/` directory: #r "nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha" #r "nuget: Microsoft.SemanticKernel.Connectors.Qdrant, 1.60.0-preview" #r "nuget: Qdrant.Client, 1.14.1" +#r "nuget: Microsoft.AI.Foundry.Local" ``` -### 2. Kernel Configuration +### 2. SDK Initialization and Model Lifecycle + +```csharp +using Microsoft.AI.Foundry.Local; +using Microsoft.Extensions.Logging.Abstractions; + +// Initialize the SDK with web service support +await FoundryLocalManager.CreateAsync( + new Configuration + { + AppName = "rag-notebook", + Web = new Configuration.WebService { Urls = "http://127.0.0.1:0" } + }, + NullLogger.Instance); + +var manager = FoundryLocalManager.Instance; + +// Look up model by alias — SDK auto-selects the best variant +var catalog = await manager.GetCatalogAsync(); +var model = await catalog.GetModelAsync("qwen2.5-0.5b"); + +// Cache-aware download: only downloads on first run +if (!await model.IsCachedAsync()) + await model.DownloadAsync(progress => Console.Write($"\rDownload: {progress:F1}%")); + +await model.LoadAsync(); +await manager.StartWebServiceAsync(); +var endpoint = manager.Urls![0]; +``` + +### 3. Kernel Configuration ```csharp var builder = Kernel.CreateBuilder(); @@ -55,11 +86,11 @@ var builder = Kernel.CreateBuilder(); // Local embedding model builder.AddBertOnnxEmbeddingGenerator("./jina/model.onnx", "./jina/vocab.txt"); -// Foundry Local chat completion +// Foundry Local chat completion — endpoint and variant from SDK builder.AddOpenAIChatCompletion( - "qwen2.5-0.5b-instruct-generic-gpu", - new Uri("http://localhost:5273/v1"), - apiKey: "", + model.SelectedVariant.Id, + new Uri($"{endpoint}/v1"), + apiKey: "", serviceId: "qwen2.5-0.5b"); var kernel = builder.Build(); diff --git a/samples/rag/rag_foundrylocal_demo.ipynb b/samples/rag/rag_foundrylocal_demo.ipynb index d12cd5d1..9ae6be67 100644 --- a/samples/rag/rag_foundrylocal_demo.ipynb +++ b/samples/rag/rag_foundrylocal_demo.ipynb @@ -7,11 +7,13 @@ "source": [ "# Foundry Local RAG Implementation Guide\n", "\n", - "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using Foundry Local with Semantic Kernel, ONNX embeddings, and Qdrant vector database.\n", + "This notebook demonstrates how to build a Retrieval-Augmented Generation (RAG) system using **Foundry Local** with the **Foundry Local C# SDK**, Semantic Kernel, ONNX embeddings, and Qdrant vector database.\n", + "\n", + "The Foundry Local SDK manages the model lifecycle (cache check, download, load) and provides an OpenAI-compatible endpoint for Semantic Kernel to use — no hardcoded URLs or variant IDs needed.\n", "\n", "## Package Installation\n", "\n", - "First, we install the required NuGet packages for Semantic Kernel and related components." + "First, we install the required NuGet packages." ] }, { @@ -88,43 +90,6 @@ "Installing the ONNX connector package which enables using ONNX models for embeddings generation in Semantic Kernel." ] }, - { - "cell_type": "code", - "execution_count": 3, - "id": "bc62e7be", - "metadata": { - "language_info": { - "name": "polyglot-notebook" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [ - { - "data": { - "text/html": [ - "
Installed Packages
  • Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "#r \"nuget: Microsoft.SemanticKernel.Connectors.Onnx, 1.60.0-alpha\"" - ] - }, - { - "cell_type": "markdown", - "id": "70bff756", - "metadata": {}, - "source": [ - "### Duplicate ONNX Connector Installation\n", - "\n", - "Note: This is a duplicate installation of the ONNX connector package (same as the previous cell)." - ] - }, { "cell_type": "code", "execution_count": 4, @@ -199,7 +164,27 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, + "id": "d051a66f", + "metadata": {}, + "outputs": [], + "source": [ + "#r \"nuget: Microsoft.AI.Foundry.Local\"" + ] + }, + { + "cell_type": "markdown", + "id": "e649c627", + "metadata": {}, + "source": [ + "### Install Foundry Local SDK\n", + "\n", + "Installing the Foundry Local C# SDK which manages model lifecycle — cache checking, downloading, loading, and providing an OpenAI-compatible endpoint." + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "6ab040e4", "metadata": { "language_info": { @@ -211,7 +196,10 @@ }, "outputs": [], "source": [ - "using Microsoft.SemanticKernel;" + "using Microsoft.SemanticKernel;\n", + "using Microsoft.AI.Foundry.Local;\n", + "using Microsoft.Extensions.Logging;\n", + "using Microsoft.Extensions.Logging.Abstractions;" ] }, { @@ -221,9 +209,9 @@ "source": [ "## Setup and Configuration\n", "\n", - "### Import Semantic Kernel\n", + "### Import Namespaces\n", "\n", - "Importing the core Semantic Kernel namespace to access the main functionality." + "Importing core Semantic Kernel, Foundry Local SDK, and logging namespaces." ] }, { @@ -253,6 +241,83 @@ "Creating a kernel builder instance which will be used to configure and build the Semantic Kernel with various services." ] }, + { + "cell_type": "markdown", + "id": "9d88b6ee", + "metadata": {}, + "source": [ + "### Initialize Foundry Local SDK\n", + "\n", + "Initialize the Foundry Local SDK singleton with a web service configuration. The SDK will manage the model lifecycle and provide an OpenAI-compatible endpoint for Semantic Kernel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f039c8a", + "metadata": {}, + "outputs": [], + "source": [ + "// Initialize the Foundry Local SDK with web service support\n", + "await FoundryLocalManager.CreateAsync(\n", + " new Configuration\n", + " {\n", + " AppName = \"rag-notebook\",\n", + " Web = new Configuration.WebService { Urls = \"http://127.0.0.1:0\" } // port 0 = auto-assign\n", + " },\n", + " NullLogger.Instance);\n", + "\n", + "var manager = FoundryLocalManager.Instance;\n", + "Console.WriteLine(\"Foundry Local SDK initialized.\");" + ] + }, + { + "cell_type": "markdown", + "id": "9ff58f97", + "metadata": {}, + "source": [ + "### Model Lifecycle — Cache Check, Download, Load\n", + "\n", + "Look up the model by alias, check whether it is already cached locally, download if needed (with progress), then load into memory and start the web service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06ecda82", + "metadata": {}, + "outputs": [], + "source": [ + "// Look up the model by alias (SDK auto-selects the best variant for the hardware)\n", + "var modelAlias = \"qwen2.5-0.5b\";\n", + "var catalog = await manager.GetCatalogAsync();\n", + "var model = await catalog.GetModelAsync(modelAlias)\n", + " ?? throw new Exception($\"Model '{modelAlias}' not found in catalog.\");\n", + "\n", + "Console.WriteLine($\"Model: {model.Alias} — Variant: {model.SelectedVariant.Id}\");\n", + "\n", + "// Check if cached, download if needed\n", + "var isCached = await model.IsCachedAsync();\n", + "Console.WriteLine($\"Cached: {isCached}\");\n", + "\n", + "if (!isCached)\n", + "{\n", + " Console.WriteLine(\"Downloading model (first time only)...\");\n", + " await model.DownloadAsync(progress =>\n", + " Console.Write($\"\\rDownload: {progress:F1}% \"));\n", + " Console.WriteLine(\"\\nDownload complete.\");\n", + "}\n", + "\n", + "// Load the model into memory\n", + "await model.LoadAsync();\n", + "Console.WriteLine(\"Model loaded.\");\n", + "\n", + "// Start the web service — Semantic Kernel will connect to this endpoint\n", + "await manager.StartWebServiceAsync();\n", + "var endpoint = manager.Urls![0];\n", + "Console.WriteLine($\"Foundry Local endpoint: {endpoint}/v1\");" + ] + }, { "cell_type": "code", "execution_count": null, @@ -267,8 +332,10 @@ }, "outputs": [], "source": [ - "var embeddModelPath = \"Your Jinaai jina-embeddings-v2-base-en onnx model path\";\n", - "var embedVocab = \"Your Jinaai ina-embeddings-v2-base-en vocab file path\";" + "// Download from https://huggingface.co/jinaai/jina-embeddings-v2-base-en\n", + "// Place model.onnx and vocab.txt in a ./jina/ directory relative to this notebook\n", + "var embeddModelPath = \"./jina/model.onnx\";\n", + "var embedVocab = \"./jina/vocab.txt\";" ] }, { @@ -278,12 +345,14 @@ "source": [ "### Define Embedding Model Paths\n", "\n", - "Setting up file paths for the JINA embedding model files - the ONNX model file and vocabulary file needed for text embeddings." + "Setting up file paths for the JINA embedding model. Download both files from [HuggingFace](https://huggingface.co/jinaai/jina-embeddings-v2-base-en) and place them in a `./jina/` directory:\n", + "- `model.onnx` — [download](https://huggingface.co/jinaai/jina-embeddings-v2-base-en/resolve/main/model.onnx)\n", + "- `vocab.txt` — [download](https://huggingface.co/jinaai/jina-embeddings-v2-base-en/resolve/main/vocab.txt)" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "f48625de", "metadata": { "language_info": { @@ -296,7 +365,14 @@ "outputs": [], "source": [ "builder.AddBertOnnxEmbeddingGenerator(embeddModelPath, embedVocab);\n", - "builder.AddOpenAIChatCompletion(\"qwen2.5-0.5b-instruct-generic-gpu\", new Uri(\"http://localhost:5273/v1\"), apiKey: \"\", serviceId: \"qwen2.5-0.5b\");" + "\n", + "// Use the SDK-managed endpoint instead of a hardcoded URL.\n", + "// The model variant ID is obtained from the SDK (no hardcoded variant names).\n", + "builder.AddOpenAIChatCompletion(\n", + " model.SelectedVariant.Id,\n", + " new Uri($\"{endpoint}/v1\"),\n", + " apiKey: \"\",\n", + " serviceId: modelAlias);" ] }, { @@ -306,7 +382,7 @@ "source": [ "### Configure AI Services\n", "\n", - "Adding the BERT ONNX embedding generator and OpenAI-compatible chat completion service to the kernel builder. The chat service connects to a local Foundry Local instance running the Qwen2.5 model." + "Adding the BERT ONNX embedding generator for local embeddings and the OpenAI-compatible chat completion service. The chat endpoint and model variant ID are obtained from the Foundry Local SDK — no hardcoded URLs or variant names." ] }, { @@ -764,7 +840,7 @@ }, "outputs": [], "source": [ - "var chatService = kernel.GetRequiredService(serviceKey: \"qwen2.5-0.5b\");\n", + "var chatService = kernel.GetRequiredService(serviceKey: modelAlias);\n", "var embeddingService = kernel.GetRequiredService>>();" ] }, @@ -782,7 +858,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": { "language_info": { "name": "polyglot-notebook" @@ -793,8 +869,10 @@ }, "outputs": [], "source": [ + "// Ensure Qdrant is running locally: docker run -p 6333:6333 -p 6334:6334 qdrant/qdrant\n", + "var qdrantEndpoint = \"http://localhost:6334\";\n", "var vectorStoreService = new VectorStoreService(\n", - " \"http://localhost:6334\",\n", + " qdrantEndpoint,\n", " \"\",\n", " \"demodocs\");\n", "\n", @@ -1014,6 +1092,28 @@ "\n", "Displaying the final answer generated by the RAG system, which should contain information about Foundry Local based on the ingested document." ] + }, + { + "cell_type": "markdown", + "id": "da1bc7e9", + "metadata": {}, + "source": [ + "## Cleanup\n", + "\n", + "Stop the web service and dispose of the Foundry Local SDK when done." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f8a3f84", + "metadata": {}, + "outputs": [], + "source": [ + "await manager.StopWebServiceAsync();\n", + "manager.Dispose();\n", + "Console.WriteLine(\"Foundry Local SDK cleaned up.\");" + ] } ], "metadata": { From 78206d0a8ab01c8d2a5b14417760c49dc91f3916 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Tue, 24 Mar 2026 16:43:41 -0700 Subject: [PATCH 02/13] Update --- .../Services/FoundryModelService.cs | 5 ++++- .../Services/TranscriptionService.cs | 9 +++++---- samples/js/local-cag/src/modelSelector.js | 11 ++++++----- samples/js/local-rag/src/chatEngine.js | 4 ++-- .../agent-framework/src/app/foundry_boot.py | 10 +++------- samples/python/agent-framework/src/app/web.py | 17 ++++++++--------- samples/python/summarize/summarize.py | 5 +++-- 7 files changed, 31 insertions(+), 30 deletions(-) diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs index 97f34bbe..2b22c3c8 100644 --- a/samples/cs/whisper-transcription/Services/FoundryModelService.cs +++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs @@ -68,10 +68,13 @@ public async Task EnsureModelReadyAsync(Model model) if (!await model.IsCachedAsync()) { _logger.LogInformation("Model \"{ModelId}\" not cached — downloading...", model.Id); + var lastLoggedBucket = -1; await model.DownloadAsync(progress => { - if (progress % 10 == 0) + var bucket = (int)Math.Floor(progress / 10); + if (bucket > lastLoggedBucket) { + lastLoggedBucket = bucket; _logger.LogInformation("Download progress: {Progress:F0}%", progress); } }); diff --git a/samples/cs/whisper-transcription/Services/TranscriptionService.cs b/samples/cs/whisper-transcription/Services/TranscriptionService.cs index bef7001e..e533be1f 100644 --- a/samples/cs/whisper-transcription/Services/TranscriptionService.cs +++ b/samples/cs/whisper-transcription/Services/TranscriptionService.cs @@ -15,20 +15,21 @@ public TranscriptionService( _logger = logger; } - public async Task TranscribeAsync(string filePath, string? modelAlias = null) + public async Task TranscribeAsync(string filePath, string? modelAlias = null, + CancellationToken ct = default) { var model = await _modelService.GetModelAsync(modelAlias); await _modelService.EnsureModelReadyAsync(model); - var audioClient = await model.GetAudioClientAsync() + var audioClient = await model.GetAudioClientAsync(ct) ?? throw new InvalidOperationException("Failed to get audio client"); _logger.LogInformation("Transcribing \"{FilePath}\" with model {ModelId}", filePath, model.Id); // Use streaming transcription for real-time output var textParts = new List(); - var response = audioClient.TranscribeAudioStreamingAsync(filePath, CancellationToken.None); - await foreach (var chunk in response) + var response = audioClient.TranscribeAudioStreamingAsync(filePath, ct); + await foreach (var chunk in response.WithCancellation(ct)) { if (!string.IsNullOrEmpty(chunk.Text)) { diff --git a/samples/js/local-cag/src/modelSelector.js b/samples/js/local-cag/src/modelSelector.js index 36d98910..ba2c26a9 100644 --- a/samples/js/local-cag/src/modelSelector.js +++ b/samples/js/local-cag/src/modelSelector.js @@ -68,9 +68,10 @@ export async function selectBestModel(catalog, opts = {}) { // Filter to chat-completion models that fit within the RAM budget const candidates = []; for (const m of allModels) { - const info = m.selectedVariant?._modelInfo; - if (!info) continue; - if (info.task !== "chat-completion") continue; + // Use the public API: iterate model.variants and use variant.modelInfo + const variant = m.variants.find(v => v.modelInfo?.task === "chat-completion"); + if (!variant) continue; + const info = variant.modelInfo; if (SKIP_ALIASES.has(info.alias)) continue; if (info.fileSizeMb > budgetMb) { console.log(`[ModelSelector] skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > RAM budget)`); @@ -97,7 +98,7 @@ export async function selectBestModel(catalog, opts = {}) { const qualityScore = rankIndex >= 0 ? (QUALITY_RANK.length - rankIndex) * 10 : 1; - const cacheBonus = info.cached ? 5 : 0; + const cacheBonus = model.isCached ? 5 : 0; const score = qualityScore + cacheBonus; return { model, info, score }; }); @@ -107,7 +108,7 @@ export async function selectBestModel(catalog, opts = {}) { const best = scored[0]; const reason = `auto-selected (${(best.info.fileSizeMb / 1024).toFixed(1)} GB, ` + - `${best.info.cached ? "cached" : "will download"}, ` + + `${best.model.isCached ? "cached" : "will download"}, ` + `rank ${scored.indexOf(best) + 1}/${scored.length})`; console.log(`[ModelSelector] Selected: ${best.info.alias} – ${reason}`); diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js index 2d878839..0684a5a1 100644 --- a/samples/js/local-rag/src/chatEngine.js +++ b/samples/js/local-rag/src/chatEngine.js @@ -54,8 +54,8 @@ export class ChatEngine { if (!this.model.isCached) { this._emitStatus("download", `Downloading ${this.modelAlias}... This may take a few minutes on first run.`, 0); await this.model.download((progress) => { - const pct = Math.round(progress * 100); - this._emitStatus("download", `Downloading ${this.modelAlias}... ${pct}%`, progress); + const pct = Math.round(progress); + this._emitStatus("download", `Downloading ${this.modelAlias}... ${pct}%`, progress / 100); }); this._emitStatus("download", `Download complete.`, 1); } else { diff --git a/samples/python/agent-framework/src/app/foundry_boot.py b/samples/python/agent-framework/src/app/foundry_boot.py index 3bde3388..78cf1c51 100644 --- a/samples/python/agent-framework/src/app/foundry_boot.py +++ b/samples/python/agent-framework/src/app/foundry_boot.py @@ -57,13 +57,9 @@ def bootstrap(self) -> FoundryConnection: endpoint = manager.endpoint api_key = manager.api_key - # List cached models to find the resolved variant - cached = manager.list_cached_models() - model_id = self.alias - for m in cached: - if self.alias in str(m): - model_id = str(m) - break + # Resolve alias to the actual model ID via the SDK's catalog API + model_info = manager.get_model_info(self.alias) + model_id = model_info.id if model_info else self.alias console.print(f"[green]✓ Foundry Local ready[/] endpoint={endpoint}") log.info("Foundry Local ready: endpoint=%s model=%s", endpoint, model_id) diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py index 5074e3b0..05e5d737 100644 --- a/samples/python/agent-framework/src/app/web.py +++ b/samples/python/agent-framework/src/app/web.py @@ -83,15 +83,14 @@ def generate(): else: gen = run_full_workflow(_conn, _docs, question) - async def drain(): - events = [] - async for evt in gen: - events.append(evt) - return events - - events = loop.run_until_complete(drain()) - for evt in events: - yield f"data: {json.dumps(evt)}\n\n" + # Stream each event as it arrives instead of buffering + agen = gen.__aiter__() + while True: + try: + evt = loop.run_until_complete(agen.__anext__()) + yield f"data: {json.dumps(evt)}\n\n" + except StopAsyncIteration: + break except Exception as exc: log.exception("Workflow error") yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n" diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py index 9b2a330a..6c5943e8 100644 --- a/samples/python/summarize/summarize.py +++ b/samples/python/summarize/summarize.py @@ -75,9 +75,10 @@ def main(): print("No downloaded models available. Run with --model to download one.") sys.exit(1) + cached_alias = cached_models[0].alias model_name = cached_models[0].id - print(f"✓ Using cached model: {model_name}") - fl_manager.load_model(model_name) + print(f"✓ Using cached model: {cached_alias} ({model_name})") + fl_manager.load_model(cached_alias) print(f"✓ Model loaded and ready\n") From 0f5e7a9e1ffd1eada41d6b13c91e1e4ea0561f9a Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Tue, 24 Mar 2026 16:50:53 -0700 Subject: [PATCH 03/13] fix: address review feedback - thread safety, README accuracy, TF-IDF claims - FoundryModelService.cs: add SemaphoreSlim for thread-safe InitializeAsync to prevent concurrent callers from double-initializing in ASP.NET - summarize/README.md: align docs with code (uses first cached model, not phi-4-mini default) - local-rag/README.md: replace 'TF-IDF' with 'term-frequency' throughout since the implementation uses raw term-frequency maps without IDF weighting --- .../Services/FoundryModelService.cs | 35 ++++++++++++------- samples/js/local-rag/README.md | 14 ++++---- samples/python/summarize/README.md | 2 +- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs index 2b22c3c8..3c8cd5ba 100644 --- a/samples/cs/whisper-transcription/Services/FoundryModelService.cs +++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs @@ -8,6 +8,7 @@ public class FoundryModelService private readonly ILogger _logger; private readonly ILoggerFactory _loggerFactory; private readonly FoundryOptions _options; + private readonly SemaphoreSlim _initLock = new(1, 1); private bool _initialized; public FoundryModelService( @@ -24,20 +25,30 @@ public async Task InitializeAsync() { if (_initialized) return; - _logger.LogInformation("Initializing Foundry Local Manager"); - var config = new Configuration + await _initLock.WaitAsync(); + try { - AppName = "WhisperTranscription", - LogLevel = Enum.TryParse( - _options.LogLevel, true, out var lvl) - ? lvl - : Microsoft.AI.Foundry.Local.LogLevel.Information, - }; + if (_initialized) return; - await FoundryLocalManager.CreateAsync(config, _loggerFactory.CreateLogger("FoundryLocal")); - var mgr = FoundryLocalManager.Instance; - await mgr.EnsureEpsDownloadedAsync(); - _initialized = true; + _logger.LogInformation("Initializing Foundry Local Manager"); + var config = new Configuration + { + AppName = "WhisperTranscription", + LogLevel = Enum.TryParse( + _options.LogLevel, true, out var lvl) + ? lvl + : Microsoft.AI.Foundry.Local.LogLevel.Information, + }; + + await FoundryLocalManager.CreateAsync(config, _loggerFactory.CreateLogger("FoundryLocal")); + var mgr = FoundryLocalManager.Instance; + await mgr.EnsureEpsDownloadedAsync(); + _initialized = true; + } + finally + { + _initLock.Release(); + } } public async Task GetModelAsync(string? aliasOrId = null) diff --git a/samples/js/local-rag/README.md b/samples/js/local-rag/README.md index 0b71e307..dfcdb9a1 100644 --- a/samples/js/local-rag/README.md +++ b/samples/js/local-rag/README.md @@ -4,7 +4,7 @@ A fully offline **Retrieval-Augmented Generation (RAG)** sample application that ## What is RAG? -RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with TF-IDF vectors, and retrieves only the most relevant chunks** at query time — no cloud APIs, no embedding models, no external vector databases. This makes it ideal for: +RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with term-frequency vectors, and retrieves only the most relevant chunks** via cosine similarity at query time — no cloud APIs, no embedding models, no external vector databases. This makes it ideal for: - **Large knowledge bases** — scales beyond what fits in a single prompt - **Offline / air-gapped** environments (e.g., field operations) @@ -26,12 +26,12 @@ RAG (Retrieval-Augmented Generation) **chunks documents, indexes them with TF-ID │ ┌──────┴───────┐ │ SQLite DB │ - │ (TF-IDF idx) │ + │ (TF vectors) │ └──────────────┘ ``` -1. **Ingest**: Documents in `docs/` are chunked (200 tokens, 25-token overlap) and stored in SQLite with TF-IDF vectors and an inverted index. -2. **Query**: Each user question is vectorised using TF-IDF, then cosine similarity finds the top-K most relevant chunks. +1. **Ingest**: Documents in `docs/` are chunked (200 tokens, 25-token overlap) and stored in SQLite with term-frequency vectors and an inverted index. +2. **Query**: Each user question is vectorised using term-frequency, then cosine similarity finds the top-K most relevant chunks. 3. **Prompt**: Retrieved chunks are injected into the system prompt with source citations. 4. **Inference**: Foundry Local runs the model in-process — no external HTTP server needed. @@ -107,7 +107,7 @@ local-rag/ ├── server.js # Express server with SSE status + chat + upload ├── chatEngine.js # RAG engine: SDK init, retrieval, inference ├── config.js # Configuration (model, chunking, paths) - ├── chunker.js # Document parsing, chunking, TF-IDF math + ├── chunker.js # Document parsing, chunking, term-frequency math ├── vectorStore.js # SQLite-backed vector store with inverted index ├── ingest.js # Batch document ingestion script └── prompts.js # System prompts (full + compact/edge mode) @@ -116,7 +116,7 @@ local-rag/ ## Key Features - **Cache-aware** — skips model download if already in the Foundry cache -- **TF-IDF vector search** — no embedding model needed; lightweight and fast +- **Term-frequency vector search** — no embedding model needed; lightweight and fast - **SQLite storage** — single-file database, no external services - **Runtime document upload** — add documents via the web UI without restarting - **Source citations** — each response shows which chunks were used and their relevance scores @@ -129,7 +129,7 @@ local-rag/ | Feature | RAG (this sample) | CAG | |---------|-------------------|-----| | Document loading | Chunked + indexed | All loaded at startup | -| Vector search | TF-IDF + cosine similarity | Keyword scoring | +| Vector search | Term-frequency + cosine similarity | Keyword scoring | | Storage | SQLite database | In-memory | | Knowledge base size | Any size | Small–medium | | Runtime upload | Yes | No | diff --git a/samples/python/summarize/README.md b/samples/python/summarize/README.md index a944ecff..5e1405f6 100644 --- a/samples/python/summarize/README.md +++ b/samples/python/summarize/README.md @@ -6,7 +6,7 @@ A simple command-line utility that uses Foundry Local to generate summaries of t - **Cache-aware**: Checks the local model cache before downloading — if the model is already cached, the download is skipped automatically. - **Visual feedback**: Shows step-by-step status (service start → cache check → download/skip → load → ready) so you always know what's happening. -- **Flexible model selection**: Use `--model` to pick a specific model alias, or let the script default to `phi-4-mini` (falls back to the first cached model if unavailable). +- **Flexible model selection**: Use `--model` to pick a specific model alias, or omit it to automatically use the first cached model. ## Setup From acf06fc15f28fe1596a42ee61de5d9f5bb84c2aa Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Tue, 24 Mar 2026 17:06:09 -0700 Subject: [PATCH 04/13] =?UTF-8?q?fix:=20address=20round-3=20review=20issue?= =?UTF-8?q?s=20=E2=80=94=20env=20vars,=20event=20loop,=20CancellationToken?= =?UTF-8?q?,=20README=20accuracy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Services/FoundryModelService.cs | 8 ++++---- .../Services/TranscriptionService.cs | 2 +- samples/js/local-cag/src/config.js | 2 +- samples/js/local-rag/package.json | 2 +- samples/js/local-rag/src/config.js | 10 +++++----- samples/python/agent-framework/README.md | 2 +- samples/python/agent-framework/src/app/web.py | 2 ++ 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/samples/cs/whisper-transcription/Services/FoundryModelService.cs b/samples/cs/whisper-transcription/Services/FoundryModelService.cs index 3c8cd5ba..42ea828c 100644 --- a/samples/cs/whisper-transcription/Services/FoundryModelService.cs +++ b/samples/cs/whisper-transcription/Services/FoundryModelService.cs @@ -65,7 +65,7 @@ public async Task GetModelAsync(string? aliasOrId = null) return model; } - public async Task EnsureModelReadyAsync(Model model) + public async Task EnsureModelReadyAsync(Model model, CancellationToken ct = default) { // Prefer CPU variant var cpuVariant = model.Variants.FirstOrDefault( @@ -76,7 +76,7 @@ public async Task EnsureModelReadyAsync(Model model) } // Check cache and download if needed - if (!await model.IsCachedAsync()) + if (!await model.IsCachedAsync(ct)) { _logger.LogInformation("Model \"{ModelId}\" not cached — downloading...", model.Id); var lastLoggedBucket = -1; @@ -88,7 +88,7 @@ await model.DownloadAsync(progress => lastLoggedBucket = bucket; _logger.LogInformation("Download progress: {Progress:F0}%", progress); } - }); + }, ct); _logger.LogInformation("Model downloaded"); } else @@ -97,7 +97,7 @@ await model.DownloadAsync(progress => } _logger.LogInformation("Loading model \"{ModelId}\"...", model.Id); - await model.LoadAsync(); + await model.LoadAsync(ct); _logger.LogInformation("Model loaded and ready"); } } diff --git a/samples/cs/whisper-transcription/Services/TranscriptionService.cs b/samples/cs/whisper-transcription/Services/TranscriptionService.cs index e533be1f..f436aa3a 100644 --- a/samples/cs/whisper-transcription/Services/TranscriptionService.cs +++ b/samples/cs/whisper-transcription/Services/TranscriptionService.cs @@ -19,7 +19,7 @@ public async Task TranscribeAsync(string filePath, string? CancellationToken ct = default) { var model = await _modelService.GetModelAsync(modelAlias); - await _modelService.EnsureModelReadyAsync(model); + await _modelService.EnsureModelReadyAsync(model, ct); var audioClient = await model.GetAudioClientAsync(ct) ?? throw new InvalidOperationException("Failed to get audio client"); diff --git a/samples/js/local-cag/src/config.js b/samples/js/local-cag/src/config.js index 8c928df5..9133fad6 100644 --- a/samples/js/local-cag/src/config.js +++ b/samples/js/local-cag/src/config.js @@ -28,7 +28,7 @@ export const config = { // Server port: parseInt(process.env.PORT, 10) || 3000, - host: "127.0.0.1", + host: process.env.HOST || "127.0.0.1", // UI publicDir: path.join(ROOT, "public"), diff --git a/samples/js/local-rag/package.json b/samples/js/local-rag/package.json index 8c76bae9..247c4bb4 100644 --- a/samples/js/local-rag/package.json +++ b/samples/js/local-rag/package.json @@ -23,7 +23,7 @@ "gas-field", "support-agent", "sqlite", - "tfidf" + "term-frequency" ], "engines": { "node": ">=20.0.0" diff --git a/samples/js/local-rag/src/config.js b/samples/js/local-rag/src/config.js index 12b2937e..b53f08c1 100644 --- a/samples/js/local-rag/src/config.js +++ b/samples/js/local-rag/src/config.js @@ -7,18 +7,18 @@ const ROOT = path.resolve(__dirname, ".."); export const config = { // Model - model: "phi-3.5-mini", + model: process.env.FOUNDRY_MODEL || "phi-3.5-mini", // RAG docsDir: path.join(ROOT, "docs"), dbPath: path.join(ROOT, "data", "rag.db"), - chunkSize: 200, // tokens (approx) – kept small for NPU compatibility + chunkSize: 200, // tokens (approx) \u2013 kept small for NPU compatibility chunkOverlap: 25, // tokens overlap between chunks - topK: 3, // number of chunks to retrieve – limited for NPU context window + topK: 3, // number of chunks to retrieve \u2013 limited for NPU context window // Server - port: 3000, - host: "127.0.0.1", + port: parseInt(process.env.PORT, 10) || 3000, + host: process.env.HOST || "127.0.0.1", // UI publicDir: path.join(ROOT, "public"), diff --git a/samples/python/agent-framework/README.md b/samples/python/agent-framework/README.md index 23368e94..6802ff65 100644 --- a/samples/python/agent-framework/README.md +++ b/samples/python/agent-framework/README.md @@ -125,7 +125,7 @@ The smoke tests verify imports, document loading, the bootstrapper's environment | `ModuleNotFoundError: agent_framework` | `pip install agent-framework-core==1.0.0b260130` | | Model download hangs | Check network and ensure Foundry Local is on PATH | | `Connection refused` on port 5273 | Foundry Local service failed to start — run `foundry-local` manually to see errors | -| Flask port 5000 in use | Set `FLASK_PORT` env var or kill the conflicting process | +| Flask port 5000 in use | Use `--port ` flag or kill the conflicting process | ## License diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py index 05e5d737..fc424bb2 100644 --- a/samples/python/agent-framework/src/app/web.py +++ b/samples/python/agent-framework/src/app/web.py @@ -77,6 +77,7 @@ def api_run(): def generate(): loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) try: if mode == "sequential": gen = run_sequential(_conn, _docs, question) @@ -95,6 +96,7 @@ def generate(): log.exception("Workflow error") yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n" finally: + asyncio.set_event_loop(None) loop.close() return Response(generate(), mimetype="text/event-stream") From 050fbede49664050f2c19f9adf38d11862212a69 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Tue, 24 Mar 2026 19:54:15 -0700 Subject: [PATCH 05/13] update --- samples/cs/whisper-transcription/Program.cs | 7 ++++--- .../js/copilot-sdk-foundry-local/src/tool-calling.ts | 4 +++- samples/js/electron-chat-application/main.js | 8 ++++---- samples/js/local-cag/src/modelSelector.js | 4 ++++ samples/js/local-rag/src/chatEngine.js | 9 +++++---- samples/js/tool-calling-foundry-local/src/app.js | 2 +- samples/python/agent-framework/src/app/orchestrator.py | 8 ++------ samples/python/agent-framework/src/app/web.py | 10 +++++++--- samples/python/functioncalling/fl_tools.ipynb | 3 +++ 9 files changed, 33 insertions(+), 22 deletions(-) diff --git a/samples/cs/whisper-transcription/Program.cs b/samples/cs/whisper-transcription/Program.cs index f985f26c..5ae320a6 100644 --- a/samples/cs/whisper-transcription/Program.cs +++ b/samples/cs/whisper-transcription/Program.cs @@ -59,7 +59,8 @@ [FromServices] TranscriptionService svc, [FromForm] IFormFile file, [FromForm] string? model, - [FromForm] string? format) => + [FromForm] string? format, + CancellationToken ct) => { if (file is null || file.Length == 0) { @@ -70,12 +71,12 @@ var tmp = Path.Combine(Path.GetTempPath(), Guid.NewGuid() + Path.GetExtension(file.FileName)); await using (var fs = File.Create(tmp)) { - await file.CopyToAsync(fs); + await file.CopyToAsync(fs, ct); } try { - var result = await svc.TranscribeAsync(tmp, model); + var result = await svc.TranscribeAsync(tmp, model, ct); var outputFormat = format?.ToLowerInvariant() ?? "text"; return outputFormat switch { diff --git a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts index 039849cf..96dad4af 100644 --- a/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts +++ b/samples/js/copilot-sdk-foundry-local/src/tool-calling.ts @@ -60,7 +60,9 @@ function defineCalculateTool() { handler: async (args) => { try { // Only allow safe math characters and Math.* calls - const sanitized = args.expression.replace(/[^0-9+\-*/().,%\s]|Math\.\w+/g, (m) => + // Math\.\w+ must come first so "Math.sqrt" is matched as a token + // before the single-char class strips individual letters. + const sanitized = args.expression.replace(/Math\.\w+|[^0-9+\-*/().,%\s]/g, (m) => m.startsWith("Math.") ? m : "", ); const result = new Function(`"use strict"; return (${sanitized})`)(); diff --git a/samples/js/electron-chat-application/main.js b/samples/js/electron-chat-application/main.js index 22a1fc1d..935c1c45 100644 --- a/samples/js/electron-chat-application/main.js +++ b/samples/js/electron-chat-application/main.js @@ -91,11 +91,11 @@ ipcMain.handle('get-models', async () => { variants: m.variants.map(v => ({ id: v.id, alias: v.alias, - displayName: v.modelInfo.displayName || v.alias, + displayName: v.modelInfo?.displayName || v.alias, isCached: cachedIds.has(v.id), - fileSizeMb: v.modelInfo.fileSizeMb, - modelType: v.modelInfo.modelType, - publisher: v.modelInfo.publisher + fileSizeMb: v.modelInfo?.fileSizeMb, + modelType: v.modelInfo?.modelType, + publisher: v.modelInfo?.publisher })) })); diff --git a/samples/js/local-cag/src/modelSelector.js b/samples/js/local-cag/src/modelSelector.js index ba2c26a9..7f3f1e98 100644 --- a/samples/js/local-cag/src/modelSelector.js +++ b/samples/js/local-cag/src/modelSelector.js @@ -73,6 +73,10 @@ export async function selectBestModel(catalog, opts = {}) { if (!variant) continue; const info = variant.modelInfo; if (SKIP_ALIASES.has(info.alias)) continue; + if (info.fileSizeMb == null || info.fileSizeMb <= 0) { + console.log(`[ModelSelector] skip ${info.alias} (unknown file size)`); + continue; + } if (info.fileSizeMb > budgetMb) { console.log(`[ModelSelector] skip ${info.alias} (${(info.fileSizeMb / 1024).toFixed(1)} GB > RAM budget)`); continue; diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js index 0684a5a1..21927abf 100644 --- a/samples/js/local-rag/src/chatEngine.js +++ b/samples/js/local-rag/src/chatEngine.js @@ -202,12 +202,13 @@ export class ChatEngine { }; // Yield text chunks from the SDK streaming callback buffer - while (!done || textChunks.length > 0) { - if (textChunks.length === 0 && !done) { + let head = 0; + while (!done || head < textChunks.length) { + if (head >= textChunks.length && !done) { await new Promise((r) => { resolve = r; }); } - while (textChunks.length > 0) { - const chunk = textChunks.shift(); + while (head < textChunks.length) { + const chunk = textChunks[head++]; const content = chunk.choices?.[0]?.delta?.content; if (content) { yield { type: "text", data: content }; diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js index 57fc5d01..c9fe3f7f 100644 --- a/samples/js/tool-calling-foundry-local/src/app.js +++ b/samples/js/tool-calling-foundry-local/src/app.js @@ -22,7 +22,7 @@ async function runToolCallingExample() { console.log("Initializing Foundry Local SDK..."); manager = FoundryLocalManager.create({ appName: "FoundryLocalSample", - serviceEndpoint: "http://localhost:5000", + webServiceUrls: "http://localhost:5000", logLevel: "info" }); diff --git a/samples/python/agent-framework/src/app/orchestrator.py b/samples/python/agent-framework/src/app/orchestrator.py index dbea57d7..95886edf 100644 --- a/samples/python/agent-framework/src/app/orchestrator.py +++ b/samples/python/agent-framework/src/app/orchestrator.py @@ -148,17 +148,13 @@ async def run_full_workflow( # ── Concurrent fan-out ── snippets_text = "" - keywords_text = "" async for evt in run_concurrent_retrieval(conn, docs, plan_text): yield evt if evt["type"] == "step_done" and evt["agent"] == "Concurrent": - # Parse out retriever/tool output - output = evt.get("output", "") - snippets_text = output - keywords_text = "" + snippets_text = evt.get("output", "") # ── Critic (sequential) ── - combined = f"Plan:\n{plan_text}\n\nRetrieved + Keywords:\n{snippets_text}" + combined = f"Plan:\n{plan_text}\n\nRetrieved:\n{snippets_text}" for loop in range(MAX_CRITIC_LOOPS): yield {"type": "step_start", "agent": "Critic", "description": f"Reviewing for gaps (round {loop + 1})"} t0 = time.perf_counter() diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py index fc424bb2..ee95fc0a 100644 --- a/samples/python/agent-framework/src/app/web.py +++ b/samples/python/agent-framework/src/app/web.py @@ -94,7 +94,7 @@ def generate(): break except Exception as exc: log.exception("Workflow error") - yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n" + yield f"data: {json.dumps({'type': 'error', 'message': 'An internal error occurred. Check server logs for details.'})}\n\n" finally: asyncio.set_event_loop(None) loop.close() @@ -108,13 +108,15 @@ def api_tools(): return jsonify({"status": "error", "message": "Not bootstrapped"}), 503 loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) try: results = loop.run_until_complete(run_tool_demo(_conn)) return jsonify({"status": "ok", "results": results}) except Exception as exc: log.exception("Tool demo error") - return jsonify({"status": "error", "message": str(exc)}), 500 + return jsonify({"status": "error", "message": "An internal error occurred. Check server logs for details."}), 500 finally: + asyncio.set_event_loop(None) loop.close() @app.route("/api/documents") @@ -164,6 +166,7 @@ def api_demo_run(demo_id: str): def generate(): loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) try: yield f"data: {json.dumps({'type': 'step_start', 'agent': demo.name})}\n\n" result = loop.run_until_complete(demo.runner(_conn, prompt)) @@ -171,8 +174,9 @@ def generate(): yield f"data: {json.dumps({'type': 'complete', 'report': result.get('response', '')})}\n\n" except Exception as exc: log.exception("Demo error: %s", demo_id) - yield f"data: {json.dumps({'type': 'error', 'message': str(exc), 'traceback': traceback.format_exc()})}\n\n" + yield f"data: {json.dumps({'type': 'error', 'message': 'An internal error occurred. Check server logs for details.'})}\n\n" finally: + asyncio.set_event_loop(None) loop.close() return Response(generate(), mimetype="text/event-stream") diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb index 2b226e2c..887f34be 100644 --- a/samples/python/functioncalling/fl_tools.ipynb +++ b/samples/python/functioncalling/fl_tools.ipynb @@ -140,6 +140,9 @@ "cached_ids = {m.id for m in cached_models}\n", "model_info = manager.get_model_info(alias)\n", "\n", + "if model_info is None:\n", + " raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n", + "\n", "if model_info.id in cached_ids:\n", " print(f\" ✓ Model '{alias}' is already cached — skipping download\")\n", "else:\n", From e373a2bd6b1f1eb13676d6aefa20fe16a42bfd73 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Tue, 24 Mar 2026 20:50:11 -0700 Subject: [PATCH 06/13] update --- samples/js/local-cag/README.md | 2 +- .../js/tool-calling-foundry-local/src/app.js | 3 ++- samples/python/agent-framework/src/app/web.py | 20 +++++++++++++++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/samples/js/local-cag/README.md b/samples/js/local-cag/README.md index ece03869..d9772a0d 100644 --- a/samples/js/local-cag/README.md +++ b/samples/js/local-cag/README.md @@ -55,7 +55,7 @@ Set these environment variables (all optional): |----------|---------|-------------| | `FOUNDRY_MODEL` | *(auto-select)* | Force a specific model alias (e.g., `phi-3.5-mini`) | | `PORT` | `3000` | Server port | -| `HOST` | `localhost` | Server bind address | +| `HOST` | `127.0.0.1` | Server bind address | ## Adding Domain Documents diff --git a/samples/js/tool-calling-foundry-local/src/app.js b/samples/js/tool-calling-foundry-local/src/app.js index c9fe3f7f..20f56d9d 100644 --- a/samples/js/tool-calling-foundry-local/src/app.js +++ b/samples/js/tool-calling-foundry-local/src/app.js @@ -19,10 +19,11 @@ async function runToolCallingExample() { let model = null; try { + const webServiceUrl = process.env.FOUNDRY_SERVICE_URL || "http://127.0.0.1:0"; console.log("Initializing Foundry Local SDK..."); manager = FoundryLocalManager.create({ appName: "FoundryLocalSample", - webServiceUrls: "http://localhost:5000", + webServiceUrls: webServiceUrl, logLevel: "info" }); diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py index ee95fc0a..31155cfe 100644 --- a/samples/python/agent-framework/src/app/web.py +++ b/samples/python/agent-framework/src/app/web.py @@ -99,7 +99,15 @@ def generate(): asyncio.set_event_loop(None) loop.close() - return Response(generate(), mimetype="text/event-stream") + return Response( + generate(), + mimetype="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) @app.route("/api/tools", methods=["POST"]) def api_tools(): @@ -179,6 +187,14 @@ def generate(): asyncio.set_event_loop(None) loop.close() - return Response(generate(), mimetype="text/event-stream") + return Response( + generate(), + mimetype="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + }, + ) return app From 10d78b3948085af6267e0304669ef1a9d2ff3826 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 07:26:47 -0700 Subject: [PATCH 07/13] Update --- samples/python/summarize/summarize.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/samples/python/summarize/summarize.py b/samples/python/summarize/summarize.py index 6c5943e8..f0cfe408 100644 --- a/samples/python/summarize/summarize.py +++ b/samples/python/summarize/summarize.py @@ -50,7 +50,6 @@ def main(): # Check what's available in cache cached_models = fl_manager.list_cached_models() - cached_ids = {m.id for m in cached_models} if args.model: # User specified a model — check cache, download if needed @@ -59,26 +58,29 @@ def main(): print(f"✗ Model alias '{args.model}' not found in catalog") sys.exit(1) - if model_info.id in cached_ids: - print(f"✓ Model \"{args.model}\" ({model_info.id}) already cached — skipping download") + # Check if *any* variant of this alias is already cached + cached_variant = next((m for m in cached_models if m.alias == args.model), None) + if cached_variant is not None: + print(f"✓ Model \"{args.model}\" ({cached_variant.id}) already cached — skipping download") + model_name = cached_variant.id else: print(f"Model \"{args.model}\" not in cache. Downloading {model_info.id}...") fl_manager.download_model(args.model) print("✓ Model downloaded") + model_name = model_info.id - print(f"Loading model {model_info.id}...") - fl_manager.load_model(args.model) - model_name = model_info.id + print(f"Loading model {model_name}...") + fl_manager.load_model(model_name) else: # No model specified — use the first cached model, or fail if not cached_models: print("No downloaded models available. Run with --model to download one.") sys.exit(1) - cached_alias = cached_models[0].alias model_name = cached_models[0].id - print(f"✓ Using cached model: {cached_alias} ({model_name})") - fl_manager.load_model(cached_alias) + print(f"✓ Using cached model: {cached_models[0].alias} ({model_name})") + # Load by model ID to guarantee we load the exact cached variant + fl_manager.load_model(model_name) print(f"✓ Model loaded and ready\n") From 26908ec52079fc893861e325b3b0a570a97a9654 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 07:30:26 -0700 Subject: [PATCH 08/13] update --- .../Middleware/ErrorHandlingMiddleware.cs | 2 +- samples/python/hello-foundry-local/src/app.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs index e44fd2e9..01fe75e8 100644 --- a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs +++ b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs @@ -25,7 +25,7 @@ public async Task InvokeAsync(HttpContext context) _logger.LogError(ex, "Unhandled exception"); context.Response.StatusCode = (int)HttpStatusCode.InternalServerError; context.Response.ContentType = "application/json"; - var payload = JsonSerializer.Serialize(new { error = ex.Message }); + var payload = JsonSerializer.Serialize(new { error = "An unexpected error occurred." }); await context.Response.WriteAsync(payload); } } diff --git a/samples/python/hello-foundry-local/src/app.py b/samples/python/hello-foundry-local/src/app.py index 4d47f5eb..788bd5b1 100644 --- a/samples/python/hello-foundry-local/src/app.py +++ b/samples/python/hello-foundry-local/src/app.py @@ -18,22 +18,26 @@ # Check if the model is already cached (downloaded) cached_models = manager.list_cached_models() -cached_ids = {m.id for m in cached_models} model_info = manager.get_model_info(alias) if model_info is None: print(f"✗ Model \"{alias}\" not found in catalog") sys.exit(1) -if model_info.id in cached_ids: - print(f"✓ Model \"{alias}\" ({model_info.id}) already cached — skipping download") +# Check if *any* variant of this alias is already cached +cached_variant = next((m for m in cached_models if m.alias == alias), None) +if cached_variant is not None: + print(f"✓ Model \"{alias}\" ({cached_variant.id}) already cached — skipping download") + model_id = cached_variant.id else: print(f"Model \"{alias}\" not found in cache. Downloading {model_info.id}...") manager.download_model(alias) print(f"✓ Model downloaded") + model_id = model_info.id -# Load the model into memory -print(f"Loading model {model_info.id}...") -manager.load_model(alias) +# Load the model into memory — use the exact model ID to guarantee +# we load the variant that is actually cached. +print(f"Loading model {model_id}...") +manager.load_model(model_id) print("✓ Model loaded and ready") # Configure the OpenAI client to use the local Foundry service @@ -44,7 +48,7 @@ # Generate a streaming response stream = client.chat.completions.create( - model=model_info.id, + model=model_id, messages=[{"role": "user", "content": "What is the golden ratio?"}], stream=True, ) From 16cfcac1e39877ca15e430bab16ac4432d596969 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 07:42:38 -0700 Subject: [PATCH 09/13] Update --- samples/js/local-cag/src/chatEngine.js | 5 +++- samples/js/local-rag/src/chatEngine.js | 18 +++++++------ samples/python/functioncalling/fl_tools.ipynb | 25 ++++++++++--------- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/samples/js/local-cag/src/chatEngine.js b/samples/js/local-cag/src/chatEngine.js index 675edf10..5a8a02f2 100644 --- a/samples/js/local-cag/src/chatEngine.js +++ b/samples/js/local-cag/src/chatEngine.js @@ -210,7 +210,10 @@ export class ChatEngine { let index = 0; while (!done || index < chunks.length) { if (index < chunks.length) { - yield { type: "text", data: chunks[index++] }; + const text = chunks[index]; + chunks[index] = null; // release for GC + index++; + yield { type: "text", data: text }; } else { await new Promise((r) => { resolve = r; }); } diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js index 21927abf..d640ffea 100644 --- a/samples/js/local-rag/src/chatEngine.js +++ b/samples/js/local-rag/src/chatEngine.js @@ -177,14 +177,17 @@ export class ChatEngine { // 3. Stream from the local model via the SDK's callback-based streaming this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; - // Buffer chunks from the callback and yield them as an async iterable + // Buffer extracted content strings from the callback and yield as an async iterable const textChunks = []; let resolve; let done = false; const streamPromise = this.chatClient.completeStreamingChat(messages, (chunk) => { - textChunks.push(chunk); - if (resolve) { resolve(); resolve = null; } + const content = chunk.choices?.[0]?.delta?.content; + if (content) { + textChunks.push(content); + if (resolve) { resolve(); resolve = null; } + } }).then(() => { done = true; if (resolve) { resolve(); resolve = null; } @@ -208,11 +211,10 @@ export class ChatEngine { await new Promise((r) => { resolve = r; }); } while (head < textChunks.length) { - const chunk = textChunks[head++]; - const content = chunk.choices?.[0]?.delta?.content; - if (content) { - yield { type: "text", data: content }; - } + const text = textChunks[head]; + textChunks[head] = null; // release for GC + head++; + yield { type: "text", data: text }; } } diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb index 887f34be..27e35683 100644 --- a/samples/python/functioncalling/fl_tools.ipynb +++ b/samples/python/functioncalling/fl_tools.ipynb @@ -135,24 +135,25 @@ "manager.start_service()\n", "print(\" ✓ Service is running\")\n", "\n", - "# Check if the model is already cached\n", + "# Check if any variant of this alias is already cached\n", "cached_models = manager.list_cached_models()\n", - "cached_ids = {m.id for m in cached_models}\n", - "model_info = manager.get_model_info(alias)\n", + "cached_variant = next((m for m in cached_models if m.alias == alias), None)\n", "\n", - "if model_info is None:\n", - " raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n", - "\n", - "if model_info.id in cached_ids:\n", + "if cached_variant is not None:\n", + " model_id = cached_variant.id\n", " print(f\" ✓ Model '{alias}' is already cached — skipping download\")\n", "else:\n", + " model_info = manager.get_model_info(alias)\n", + " if model_info is None:\n", + " raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n", " print(f\" Downloading model '{alias}'...\")\n", " manager.download_model(alias)\n", + " model_id = model_info.id\n", " print(f\" ✓ Download complete\")\n", "\n", "# Load the model into memory\n", "print(f\" Loading model '{alias}'...\")\n", - "manager.load_model(alias)\n", + "manager.load_model(model_id)\n", "print(f\" ✓ Model loaded and ready\")" ] }, @@ -233,13 +234,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "8e616290", "metadata": {}, "outputs": [], "source": [ "stream = client.chat.completions.create(\n", - " model=manager.get_model_info(alias).id,\n", + " model=model_id,\n", " messages=[{\"role\": \"user\", \"content\": \"book flight ticket from Beijing to Paris(using airport code) in 2025-12-04 to 2025-12-10 , then book hotel from 2025-12-04 to 2025-12-10 in Paris\"}],\n", " tools=[{\"name\": \"booking_flight_tickets\", \"description\": \"booking flights\", \"parameters\": {\"origin_airport_code\": {\"description\": \"The name of Departure airport code\", \"type\": \"string\"}, \"destination_airport_code\": {\"description\": \"The name of Destination airport code\", \"type\": \"string\"}, \"departure_date\": {\"description\": \"The date of outbound flight\", \"type\": \"string\"}, \"return_date\": {\"description\": \"The date of return flight\", \"type\": \"string\"}}}, {\"name\": \"booking_hotels\", \"description\": \"booking hotel\", \"parameters\": {\"destination\": {\"description\": \"The name of the city\", \"type\": \"string\"}, \"check_in_date\": {\"description\": \"The date of check in\", \"type\": \"string\"}, \"checkout_date\": {\"description\": \"The date of check out\", \"type\": \"string\"}}}],\n", " temperature=0.00001,\n", @@ -295,13 +296,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "2d5ed823", "metadata": {}, "outputs": [], "source": [ "stream = client.chat.completions.create(\n", - " model=manager.get_model_info(alias).id,\n", + " model=model_id,\n", " messages=[{\"role\": \"user\", \"content\": \"What is the weather today in Paris?\"}],\n", " tools=[\n", " {\n", From bf1b5ca5e1ed420adb525d84d54e133a10349b01 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 09:27:55 -0700 Subject: [PATCH 10/13] update --- .../agent-framework/src/app/demos/code_reviewer.py | 2 +- .../python/agent-framework/src/app/demos/math_agent.py | 2 +- .../src/app/demos/multi_agent_debate.py | 2 +- .../src/app/demos/sentiment_analyzer.py | 2 +- .../agent-framework/src/app/demos/weather_tools.py | 2 +- samples/python/agent-framework/src/app/web.py | 10 ++++++++-- 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/samples/python/agent-framework/src/app/demos/code_reviewer.py b/samples/python/agent-framework/src/app/demos/code_reviewer.py index 23860448..5d36b679 100644 --- a/samples/python/agent-framework/src/app/demos/code_reviewer.py +++ b/samples/python/agent-framework/src/app/demos/code_reviewer.py @@ -220,7 +220,7 @@ async def run_code_review_demo(conn: FoundryConnection, prompt: str) -> dict: id="code_reviewer", name="Code Reviewer", description="Code analysis agent that checks style, complexity, potential bugs, and suggests improvements.", - icon="\ud83d\udc68\u200d\ud83d\udcbb", + icon="👨‍💻", category="Tool Calling", runner=run_code_review_demo, tags=["tools", "function-calling", "code-analysis", "single-agent"], diff --git a/samples/python/agent-framework/src/app/demos/math_agent.py b/samples/python/agent-framework/src/app/demos/math_agent.py index f2ff00c9..0c97aa11 100644 --- a/samples/python/agent-framework/src/app/demos/math_agent.py +++ b/samples/python/agent-framework/src/app/demos/math_agent.py @@ -193,7 +193,7 @@ async def run_math_demo(conn: FoundryConnection, prompt: str) -> dict: id="math_agent", name="Math Calculator", description="Precise calculation agent with tools for arithmetic, percentages, unit conversions, compound interest, and statistics.", - icon="\ud83d\udd22", + icon="🔢", category="Tool Calling", runner=run_math_demo, tags=["tools", "function-calling", "calculations", "single-agent"], diff --git a/samples/python/agent-framework/src/app/demos/multi_agent_debate.py b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py index c0ec7ddb..568bd6e8 100644 --- a/samples/python/agent-framework/src/app/demos/multi_agent_debate.py +++ b/samples/python/agent-framework/src/app/demos/multi_agent_debate.py @@ -181,7 +181,7 @@ async def run_debate_demo(conn: FoundryConnection, prompt: str) -> dict: id="multi_agent_debate", name="Multi-Agent Debate", description="Three agents debate a topic: one argues FOR, one argues AGAINST, and a moderator declares a winner.", - icon="\ud83c\udfad", + icon="🎭", category="Multi-Agent", runner=run_debate_demo, tags=["multi-agent", "orchestration", "sequential", "debate"], diff --git a/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py index abee8526..5671713f 100644 --- a/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py +++ b/samples/python/agent-framework/src/app/demos/sentiment_analyzer.py @@ -238,7 +238,7 @@ async def run_sentiment_demo(conn: FoundryConnection, prompt: str) -> dict: id="sentiment_analyzer", name="Sentiment Analyzer", description="Text analysis agent that detects sentiment, emotions, key phrases, and word frequency.", - icon="\ud83d\udcac", + icon="💬", category="Tool Calling", runner=run_sentiment_demo, tags=["tools", "function-calling", "text-analysis", "single-agent"], diff --git a/samples/python/agent-framework/src/app/demos/weather_tools.py b/samples/python/agent-framework/src/app/demos/weather_tools.py index 89ab6579..24910085 100644 --- a/samples/python/agent-framework/src/app/demos/weather_tools.py +++ b/samples/python/agent-framework/src/app/demos/weather_tools.py @@ -169,7 +169,7 @@ async def run_weather_demo(conn: FoundryConnection, prompt: str) -> dict: id="weather_tools", name="Weather Tools", description="Multi-tool agent that provides weather information, forecasts, city comparisons, and activity recommendations.", - icon="\ud83c\udf24\ufe0f", + icon="🌤️", category="Tool Calling", runner=run_weather_demo, tags=["tools", "function-calling", "single-agent"], diff --git a/samples/python/agent-framework/src/app/web.py b/samples/python/agent-framework/src/app/web.py index 31155cfe..58949d30 100644 --- a/samples/python/agent-framework/src/app/web.py +++ b/samples/python/agent-framework/src/app/web.py @@ -69,7 +69,10 @@ def api_run(): return jsonify({"status": "error", "message": "Not bootstrapped"}), 503 data = request.get_json(silent=True) or {} - question = data.get("question", "").strip() + question = data.get("question", "") + if not isinstance(question, str): + return jsonify({"status": "error", "message": "question must be a string"}), 400 + question = question.strip() mode = data.get("mode", "full") if not question: @@ -168,7 +171,10 @@ def api_demo_run(demo_id: str): return jsonify({"status": "error", "message": f"Demo '{demo_id}' not found"}), 404 data = request.get_json(silent=True) or {} - prompt = data.get("prompt", "").strip() + prompt = data.get("prompt", "") + if not isinstance(prompt, str): + return jsonify({"status": "error", "message": "prompt must be a string"}), 400 + prompt = prompt.strip() if not prompt: return jsonify({"status": "error", "message": "No prompt provided"}), 400 From a60d8e01e7b813de4dff7e9c8af98eff4983bde4 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 09:49:48 -0700 Subject: [PATCH 11/13] update --- samples/cs/whisper-transcription/README.md | 1 + .../WhisperTranscription.csproj | 3 +- samples/cs/whisper-transcription/nuget.config | 2 +- samples/js/local-cag/src/chatEngine.js | 40 +++++++++++-------- samples/js/local-rag/src/chatEngine.js | 28 +++++++++---- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/samples/cs/whisper-transcription/README.md b/samples/cs/whisper-transcription/README.md index 3d13b434..bb22721c 100644 --- a/samples/cs/whisper-transcription/README.md +++ b/samples/cs/whisper-transcription/README.md @@ -17,6 +17,7 @@ Based on the [FLWhisper](https://github.com/leestott/FLWhisper) project. - **Windows 10/11** (ARM64 or x64) - **.NET 9 SDK** — [Download here](https://dotnet.microsoft.com/download/dotnet/9.0) +- **Windows 11 SDK 10.0.26100.0 or newer** — required by the `Microsoft.AI.Foundry.Local.WinML` package used by this sample - **Foundry Local** — installed and on PATH ## Quick Start diff --git a/samples/cs/whisper-transcription/WhisperTranscription.csproj b/samples/cs/whisper-transcription/WhisperTranscription.csproj index f6d1d553..e4acba26 100644 --- a/samples/cs/whisper-transcription/WhisperTranscription.csproj +++ b/samples/cs/whisper-transcription/WhisperTranscription.csproj @@ -1,7 +1,8 @@ - net9.0-windows10.0.26100 + net9.0-windows10.0.26100.0 + 10.0.17763.0 enable enable ARM64;x64 diff --git a/samples/cs/whisper-transcription/nuget.config b/samples/cs/whisper-transcription/nuget.config index 5ae1c6b2..82e36490 100644 --- a/samples/cs/whisper-transcription/nuget.config +++ b/samples/cs/whisper-transcription/nuget.config @@ -10,7 +10,7 @@ - + diff --git a/samples/js/local-cag/src/chatEngine.js b/samples/js/local-cag/src/chatEngine.js index 5a8a02f2..bd45f0cf 100644 --- a/samples/js/local-cag/src/chatEngine.js +++ b/samples/js/local-cag/src/chatEngine.js @@ -181,38 +181,44 @@ export class ChatEngine { this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; - // Collect streamed chunks via callback and yield them + // Buffer streamed chunks in a compacting queue so memory tracks only unread data. const chunks = []; + let head = 0; let resolve; let done = false; + const notify = () => { + if (resolve) { + const wake = resolve; + resolve = null; + wake(); + } + }; + + const compactChunks = () => { + if (head > 0 && head * 2 >= chunks.length) { + chunks.splice(0, head); + head = 0; + } + }; + const promise = this.chatClient .completeStreamingChat(messages, (chunk) => { const content = chunk.choices?.[0]?.delta?.content; if (content) { chunks.push(content); - if (resolve) { - const r = resolve; - resolve = null; - r(); - } + notify(); } }) .then(() => { done = true; - if (resolve) { - const r = resolve; - resolve = null; - r(); - } + notify(); }); - let index = 0; - while (!done || index < chunks.length) { - if (index < chunks.length) { - const text = chunks[index]; - chunks[index] = null; // release for GC - index++; + while (!done || head < chunks.length) { + if (head < chunks.length) { + const text = chunks[head++]; + compactChunks(); yield { type: "text", data: text }; } else { await new Promise((r) => { resolve = r; }); diff --git a/samples/js/local-rag/src/chatEngine.js b/samples/js/local-rag/src/chatEngine.js index d640ffea..d60aebd2 100644 --- a/samples/js/local-rag/src/chatEngine.js +++ b/samples/js/local-rag/src/chatEngine.js @@ -177,20 +177,36 @@ export class ChatEngine { // 3. Stream from the local model via the SDK's callback-based streaming this.chatClient.settings.maxTokens = this.compactMode ? 512 : 1024; - // Buffer extracted content strings from the callback and yield as an async iterable + // Buffer extracted content strings in a compacting queue so memory tracks unread data. const textChunks = []; + let head = 0; let resolve; let done = false; + const notify = () => { + if (resolve) { + const wake = resolve; + resolve = null; + wake(); + } + }; + + const compactTextChunks = () => { + if (head > 0 && head * 2 >= textChunks.length) { + textChunks.splice(0, head); + head = 0; + } + }; + const streamPromise = this.chatClient.completeStreamingChat(messages, (chunk) => { const content = chunk.choices?.[0]?.delta?.content; if (content) { textChunks.push(content); - if (resolve) { resolve(); resolve = null; } + notify(); } }).then(() => { done = true; - if (resolve) { resolve(); resolve = null; } + notify(); }); // Yield sources metadata first @@ -205,15 +221,13 @@ export class ChatEngine { }; // Yield text chunks from the SDK streaming callback buffer - let head = 0; while (!done || head < textChunks.length) { if (head >= textChunks.length && !done) { await new Promise((r) => { resolve = r; }); } while (head < textChunks.length) { - const text = textChunks[head]; - textChunks[head] = null; // release for GC - head++; + const text = textChunks[head++]; + compactTextChunks(); yield { type: "text", data: text }; } } From 0a82b17230c26ce16a51d5f9da2595897cb36200 Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 09:52:39 -0700 Subject: [PATCH 12/13] update --- samples/python/functioncalling/fl_tools.ipynb | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/samples/python/functioncalling/fl_tools.ipynb b/samples/python/functioncalling/fl_tools.ipynb index 27e35683..bae0c182 100644 --- a/samples/python/functioncalling/fl_tools.ipynb +++ b/samples/python/functioncalling/fl_tools.ipynb @@ -135,24 +135,25 @@ "manager.start_service()\n", "print(\" ✓ Service is running\")\n", "\n", - "# Check if any variant of this alias is already cached\n", + "# Check if any cached variant already satisfies this alias.\n", "cached_models = manager.list_cached_models()\n", - "cached_variant = next((m for m in cached_models if m.alias == alias), None)\n", + "cached_variant = next((model for model in cached_models if model.alias == alias), None)\n", "\n", "if cached_variant is not None:\n", " model_id = cached_variant.id\n", - " print(f\" ✓ Model '{alias}' is already cached — skipping download\")\n", + " print(f\" ✓ Model '{alias}' is already cached as '{model_id}' — skipping download\")\n", "else:\n", " model_info = manager.get_model_info(alias)\n", " if model_info is None:\n", " raise ValueError(f\"Model '{alias}' not found in the catalog. Check the alias and try again.\")\n", + "\n", " print(f\" Downloading model '{alias}'...\")\n", - " manager.download_model(alias)\n", - " model_id = model_info.id\n", - " print(f\" ✓ Download complete\")\n", + " downloaded_model = manager.download_model(alias)\n", + " model_id = downloaded_model.id\n", + " print(f\" ✓ Download complete: cached '{model_id}'\")\n", "\n", - "# Load the model into memory\n", - "print(f\" Loading model '{alias}'...\")\n", + "# Load the exact cached variant into memory for deterministic behavior.\n", + "print(f\" Loading model '{model_id}'...\")\n", "manager.load_model(model_id)\n", "print(f\" ✓ Model loaded and ready\")" ] From c6c1cab867be6729cd17fd1ca3409b109f43c10d Mon Sep 17 00:00:00 2001 From: Lee Stott Date: Wed, 25 Mar 2026 10:17:14 -0700 Subject: [PATCH 13/13] update --- .../Middleware/ErrorHandlingMiddleware.cs | 19 ++++++++++++------- samples/cs/whisper-transcription/Program.cs | 17 ++++++++++++----- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs index 01fe75e8..fc14fbab 100644 --- a/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs +++ b/samples/cs/whisper-transcription/Middleware/ErrorHandlingMiddleware.cs @@ -1,6 +1,3 @@ -using System.Net; -using System.Text.Json; - namespace WhisperTranscription; public class ErrorHandlingMiddleware @@ -23,10 +20,18 @@ public async Task InvokeAsync(HttpContext context) catch (Exception ex) { _logger.LogError(ex, "Unhandled exception"); - context.Response.StatusCode = (int)HttpStatusCode.InternalServerError; - context.Response.ContentType = "application/json"; - var payload = JsonSerializer.Serialize(new { error = "An unexpected error occurred." }); - await context.Response.WriteAsync(payload); + + if (context.Response.HasStarted) + { + throw; + } + + await Results.Problem( + statusCode: StatusCodes.Status500InternalServerError, + title: "Internal Server Error", + detail: "An unexpected error occurred.", + instance: context.Request.Path) + .ExecuteAsync(context); } } } diff --git a/samples/cs/whisper-transcription/Program.cs b/samples/cs/whisper-transcription/Program.cs index 5ae320a6..e47ef394 100644 --- a/samples/cs/whisper-transcription/Program.cs +++ b/samples/cs/whisper-transcription/Program.cs @@ -10,6 +10,7 @@ builder.Services.AddHealthChecks() .AddCheck("foundry"); builder.Services.AddEndpointsApiExplorer(); +builder.Services.AddProblemDetails(); builder.Services.AddSwaggerGen(); builder.Services.ConfigureHttpJsonOptions(options => @@ -64,7 +65,10 @@ { if (file is null || file.Length == 0) { - return Results.BadRequest(new { error = "No audio file provided" }); + return Results.Problem( + statusCode: 400, + title: "Invalid transcription request", + detail: "No audio file provided."); } // Save upload to temp file @@ -80,7 +84,7 @@ var outputFormat = format?.ToLowerInvariant() ?? "text"; return outputFormat switch { - "json" => Results.Ok(new { text = result.Text, model = result.ModelId }), + "json" => Results.Ok(new TranscriptionResponse(result.Text, result.ModelId)), _ => Results.Text(result.Text, "text/plain"), }; } @@ -90,10 +94,13 @@ } }).WithName("TranscribeAudio") .DisableAntiforgery() - .Produces(200) - .ProducesProblem(400) - .ProducesProblem(500); + .Produces(200, "application/json") + .Produces(200, "text/plain") + .ProducesProblem(400) + .ProducesProblem(500); app.MapFallbackToFile("index.html"); app.Run(); + +sealed record TranscriptionResponse(string Text, string Model);