From c045bf38eb964685458e0bbf8113260b8fc73f62 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Thu, 5 Mar 2026 10:24:31 -0800 Subject: [PATCH 01/22] support audio streaming-csharp --- sdk_v2/cs/src/Detail/CoreInterop.cs | 188 +++++++++ sdk_v2/cs/src/Detail/ICoreInterop.cs | 23 + .../cs/src/Detail/JsonSerializationContext.cs | 3 + sdk_v2/cs/src/IModel.cs | 7 + sdk_v2/cs/src/Model.cs | 5 + sdk_v2/cs/src/ModelVariant.cs | 17 + .../OpenAI/AudioStreamTranscriptionTypes.cs | 65 +++ sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs | 399 ++++++++++++++++++ .../AudioStreamingClientTests.cs | 221 ++++++++++ 9 files changed, 928 insertions(+) create mode 100644 sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs create mode 100644 sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs create mode 100644 sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs index 8411473b..a178bdca 100644 --- a/sdk_v2/cs/src/Detail/CoreInterop.cs +++ b/sdk_v2/cs/src/Detail/CoreInterop.cs @@ -158,6 +158,28 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer* nint callbackPtr, // NativeCallbackFn pointer nint userData); + // --- Audio streaming P/Invoke imports --- + + [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")] + [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] + private static unsafe partial void CoreAudioStreamStart( + RequestBuffer* request, + ResponseBuffer* response, + nint callbackPtr, + nint userData); + + [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")] + [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] + private static unsafe partial void CoreAudioStreamPush( + StreamingRequestBuffer* request, + ResponseBuffer* response); + + [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")] + [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] + private static unsafe partial void CoreAudioStreamStop( + RequestBuffer* request, + ResponseBuffer* response); + // helper to capture exceptions in callbacks internal class CallbackHelper { @@ -331,4 +353,170 @@ public Task ExecuteCommandWithCallbackAsync(string commandName, CoreIn return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct); } + // --- Audio streaming managed implementations --- + + public AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback) + { + try + { + var commandInputJson = request.ToJson(); + byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_start"); + byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); + + IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length); + Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length); + + IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); + Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); + + var reqBuf = new RequestBuffer + { + Command = commandPtr, + CommandLength = commandBytes.Length, + Data = inputPtr, + DataLength = inputBytes.Length + }; + + ResponseBuffer response = default; + + var helper = new CallbackHelper(transcriptionCallback); + var funcPtr = Marshal.GetFunctionPointerForDelegate(handleCallbackDelegate); + var helperHandle = GCHandle.Alloc(helper); + var helperPtr = GCHandle.ToIntPtr(helperHandle); + + try + { + unsafe + { + CoreAudioStreamStart(&reqBuf, &response, funcPtr, helperPtr); + } + } + catch + { + // Free on failure — native core never saw the handle + helperHandle.Free(); + throw; + } + finally + { + Marshal.FreeHGlobal(commandPtr); + Marshal.FreeHGlobal(inputPtr); + } + + // Marshal response inline (matching existing ExecuteCommandImpl pattern) + Response result = new(); + if (response.Data != IntPtr.Zero && response.DataLength > 0) + { + byte[] managedResponse = new byte[response.DataLength]; + Marshal.Copy(response.Data, managedResponse, 0, response.DataLength); + result.Data = System.Text.Encoding.UTF8.GetString(managedResponse); + } + if (response.Error != IntPtr.Zero && response.ErrorLength > 0) + { + result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!; + } + Marshal.FreeHGlobal(response.Data); + Marshal.FreeHGlobal(response.Error); + + // Return the GCHandle alongside the response — caller is responsible for + // keeping it alive during the session and freeing it in StopAudioStream. + return new AudioStreamSession(result, helperHandle); + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger); + } + } + + public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData) + { + try + { + var commandInputJson = request.ToJson(); + byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_push"); + byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); + + IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length); + Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length); + + IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); + Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); + + // Pin the managed audio data so GC won't move it during the native call + using var audioHandle = audioData.Pin(); + + unsafe + { + var reqBuf = new StreamingRequestBuffer + { + Command = commandPtr, + CommandLength = commandBytes.Length, + Data = inputPtr, + DataLength = inputBytes.Length, + BinaryData = (nint)audioHandle.Pointer, + BinaryDataLength = audioData.Length + }; + + ResponseBuffer response = default; + + try + { + CoreAudioStreamPush(&reqBuf, &response); + } + finally + { + Marshal.FreeHGlobal(commandPtr); + Marshal.FreeHGlobal(inputPtr); + } + + // Marshal response inline + Response result = new(); + if (response.Data != IntPtr.Zero && response.DataLength > 0) + { + byte[] managedResponse = new byte[response.DataLength]; + Marshal.Copy(response.Data, managedResponse, 0, response.DataLength); + result.Data = System.Text.Encoding.UTF8.GetString(managedResponse); + } + if (response.Error != IntPtr.Zero && response.ErrorLength > 0) + { + result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!; + } + Marshal.FreeHGlobal(response.Data); + Marshal.FreeHGlobal(response.Error); + + return result; + } + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + throw new FoundryLocalException("Error executing audio_stream_push", ex, _logger); + } + } + + public Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle) + { + try + { + var result = ExecuteCommand("audio_stream_stop", request); + + // Free the GCHandle that was keeping the callback delegate alive. + // After this point, the native core must not invoke the callback. + if (callbackHandle.IsAllocated) + { + callbackHandle.Free(); + } + + return result; + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + // Still free the handle on failure to avoid leaks + if (callbackHandle.IsAllocated) + { + callbackHandle.Free(); + } + throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger); + } + } + } diff --git a/sdk_v2/cs/src/Detail/ICoreInterop.cs b/sdk_v2/cs/src/Detail/ICoreInterop.cs index 1fff9dde..cd342ce5 100644 --- a/sdk_v2/cs/src/Detail/ICoreInterop.cs +++ b/sdk_v2/cs/src/Detail/ICoreInterop.cs @@ -51,4 +51,27 @@ Task ExecuteCommandAsync(string commandName, CoreInteropRequest? comma Task ExecuteCommandWithCallbackAsync(string commandName, CoreInteropRequest? commandInput, CallbackFn callback, CancellationToken? ct = null); + + // --- Audio streaming session support --- + + [StructLayout(LayoutKind.Sequential)] + protected unsafe struct StreamingRequestBuffer + { + public nint Command; + public int CommandLength; + public nint Data; // JSON params + public int DataLength; + public nint BinaryData; // raw PCM audio bytes + public int BinaryDataLength; + } + + /// + /// Returned by StartAudioStream. Holds the session handle and the GCHandle + /// that must remain alive for the callback lifetime. + /// + internal record AudioStreamSession(Response Response, GCHandle CallbackHandle); + + AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback); + Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData); + Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle); } diff --git a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs index 894f9454..3cc079f3 100644 --- a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs @@ -33,6 +33,9 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(IList))] [JsonSerializable(typeof(PropertyDefinition))] [JsonSerializable(typeof(IList))] +// --- NEW: Audio streaming types --- +[JsonSerializable(typeof(AudioStreamTranscriptionResult))] +[JsonSerializable(typeof(CoreErrorResponse))] [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, WriteIndented = false)] internal partial class JsonSerializationContext : JsonSerializerContext diff --git a/sdk_v2/cs/src/IModel.cs b/sdk_v2/cs/src/IModel.cs index c3acba61..20eca014 100644 --- a/sdk_v2/cs/src/IModel.cs +++ b/sdk_v2/cs/src/IModel.cs @@ -67,4 +67,11 @@ Task DownloadAsync(Action? downloadProgress = null, /// Optional cancellation token. /// OpenAI.AudioClient Task GetAudioClientAsync(CancellationToken? ct = null); + + /// + /// Get a real-time audio streaming client for ASR. + /// + /// Optional cancellation token. + /// OpenAIAudioStreamingClient for real-time transcription. + Task GetAudioStreamingClientAsync(CancellationToken? ct = null); } diff --git a/sdk_v2/cs/src/Model.cs b/sdk_v2/cs/src/Model.cs index 83bcef69..ffe8bb1c 100644 --- a/sdk_v2/cs/src/Model.cs +++ b/sdk_v2/cs/src/Model.cs @@ -114,6 +114,11 @@ public async Task GetAudioClientAsync(CancellationToken? ct = return await SelectedVariant.GetAudioClientAsync(ct).ConfigureAwait(false); } + public async Task GetAudioStreamingClientAsync(CancellationToken? ct = null) + { + return await SelectedVariant.GetAudioStreamingClientAsync(ct).ConfigureAwait(false); + } + public async Task UnloadAsync(CancellationToken? ct = null) { await SelectedVariant.UnloadAsync(ct).ConfigureAwait(false); diff --git a/sdk_v2/cs/src/ModelVariant.cs b/sdk_v2/cs/src/ModelVariant.cs index 6ca7cda7..d5285c1c 100644 --- a/sdk_v2/cs/src/ModelVariant.cs +++ b/sdk_v2/cs/src/ModelVariant.cs @@ -190,4 +190,21 @@ private async Task GetAudioClientImplAsync(CancellationToken? return new OpenAIAudioClient(Id); } + + public async Task GetAudioStreamingClientAsync(CancellationToken? ct = null) + { + return await Utils.CallWithExceptionHandling(() => GetAudioStreamingClientImplAsync(ct), + "Error getting audio streaming client for model", _logger) + .ConfigureAwait(false); + } + + private async Task GetAudioStreamingClientImplAsync(CancellationToken? ct = null) + { + if (!await IsLoadedAsync(ct)) + { + throw new FoundryLocalException($"Model {Id} is not loaded. Call LoadAsync first."); + } + + return new OpenAIAudioStreamingClient(Id); + } } diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs new file mode 100644 index 00000000..7736cb47 --- /dev/null +++ b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs @@ -0,0 +1,65 @@ +namespace Microsoft.AI.Foundry.Local; + +using System.Text.Json; +using System.Text.Json.Serialization; +using Microsoft.AI.Foundry.Local.Detail; + +public record AudioStreamTranscriptionResult +{ + /// Whether this is a partial (interim) or final result for this segment. + [JsonPropertyName("is_final")] + public bool IsFinal { get; init; } + + /// The transcribed text. + [JsonPropertyName("text")] + public string Text { get; init; } = string.Empty; + + /// Start time offset of this segment in the audio stream (seconds). + [JsonPropertyName("start_time")] + public double? StartTime { get; init; } + + /// End time offset of this segment in the audio stream (seconds). + [JsonPropertyName("end_time")] + public double? EndTime { get; init; } + + /// Confidence score (0.0 - 1.0) if available. + [JsonPropertyName("confidence")] + public float? Confidence { get; init; } + + internal static AudioStreamTranscriptionResult FromJson(string json) + { + return JsonSerializer.Deserialize(json, + JsonSerializationContext.Default.AudioStreamTranscriptionResult) + ?? throw new FoundryLocalException("Failed to deserialize AudioStreamTranscriptionResult"); + } +} + +internal record CoreErrorResponse +{ + [JsonPropertyName("code")] + public string Code { get; init; } = ""; + + [JsonPropertyName("message")] + public string Message { get; init; } = ""; + + [JsonPropertyName("isTransient")] + public bool IsTransient { get; init; } + + /// + /// Attempt to parse a native error string as structured JSON. + /// Returns null if the error is not valid JSON or doesn't match the schema, + /// which should be treated as a permanent/unknown error. + /// + internal static CoreErrorResponse? TryParse(string errorString) + { + try + { + return JsonSerializer.Deserialize(errorString, + JsonSerializationContext.Default.CoreErrorResponse); + } + catch + { + return null; // unstructured error — treat as permanent + } + } +} \ No newline at end of file diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs new file mode 100644 index 00000000..27e1bbea --- /dev/null +++ b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs @@ -0,0 +1,399 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local; + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Globalization; +using System.Threading.Channels; +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.Extensions.Logging; + + +/// +/// Client for real-time audio streaming ASR (Automatic Speech Recognition). +/// Audio data from a microphone (or other source) is pushed in as PCM chunks, +/// and partial transcription results are returned as an async stream. +/// +/// Thread safety: PushAudioDataAsync can be called from any thread (including high-frequency +/// audio callbacks). Pushes are internally serialized via a bounded channel to prevent +/// unbounded memory growth and ensure ordering. +/// + + +public sealed class OpenAIAudioStreamingClient : IAsyncDisposable +{ + private readonly string _modelId; + private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop; + private readonly ILogger _logger = FoundryLocalManager.Instance.Logger; + + // Session state — protected by _lock + private readonly AsyncLock _lock = new(); + private string? _sessionHandle; + private GCHandle _callbackHandle; + private bool _started; + private bool _stopped; + + // Output channel: native callback writes, user reads via GetTranscriptionStream + private Channel? _outputChannel; + + // Internal push queue: user writes audio chunks, background loop drains to native core. + // Bounded to prevent unbounded memory growth if native core is slower than real-time. + private Channel>? _pushChannel; + private Task? _pushLoopTask; + + // Dedicated CTS for the push loop — decoupled from StartAsync's caller token. + // Cancelled only during StopAsync/DisposeAsync to allow clean drain. + private CancellationTokenSource? _sessionCts; + + // Stored as a field so the delegate is not garbage collected while native core holds a reference. + private ICoreInterop.CallbackFn? _transcriptionCallback; + + // Snapshot of settings captured at StartAsync — prevents mutation after session starts. + private StreamingAudioSettings? _activeSettings; + + /// + /// Audio format settings for the streaming session. + /// Must be configured before calling . + /// Settings are frozen once the session starts. + /// + public record StreamingAudioSettings + { + /// PCM sample rate in Hz. Default: 16000. + public int SampleRate { get; set; } = 16000; + + /// Number of audio channels. Default: 1 (mono). + public int Channels { get; set; } = 1; + + /// Bits per sample. Default: 16. + public int BitsPerSample { get; set; } = 16; + + /// Optional BCP-47 language hint (e.g., "en", "zh"). + public string? Language { get; set; } + + /// + /// Maximum number of audio chunks buffered in the internal push queue. + /// If the queue is full, PushAudioDataAsync will asynchronously wait. + /// Default: 100 (~3 seconds of audio at typical chunk sizes). + /// + public int PushQueueCapacity { get; set; } = 100; + + internal StreamingAudioSettings Snapshot() => this with { }; // record copy + } + + public StreamingAudioSettings Settings { get; } = new(); + + internal OpenAIAudioStreamingClient(string modelId) + { + _modelId = modelId; + } + + /// + /// Start a real-time audio streaming session. + /// Must be called before or . + /// Settings are frozen after this call. + /// + /// Cancellation token. + public async Task StartAsync(CancellationToken ct = default) + { + using var disposable = await _lock.LockAsync().ConfigureAwait(false); + + if (_started) + { + throw new FoundryLocalException("Streaming session already started. Call StopAsync first."); + } + + // Freeze settings + _activeSettings = Settings.Snapshot(); + + _outputChannel = Channel.CreateUnbounded( + new UnboundedChannelOptions + { + SingleWriter = true, // only the native callback writes + SingleReader = true, + AllowSynchronousContinuations = true + }); + + _pushChannel = Channel.CreateBounded>( + new BoundedChannelOptions(_activeSettings.PushQueueCapacity) + { + SingleReader = true, // only the push loop reads + SingleWriter = false, // multiple threads may push audio data + FullMode = BoundedChannelFullMode.Wait + }); + + var request = new CoreInteropRequest + { + Params = new Dictionary + { + { "Model", _modelId }, + { "SampleRate", _activeSettings.SampleRate.ToString(CultureInfo.InvariantCulture) }, + { "Channels", _activeSettings.Channels.ToString(CultureInfo.InvariantCulture) }, + { "BitsPerSample", _activeSettings.BitsPerSample.ToString(CultureInfo.InvariantCulture) }, + } + }; + + if (_activeSettings.Language != null) + { + request.Params["Language"] = _activeSettings.Language; + } + + // Store the callback as a field so the delegate is rooted for the session lifetime. + _transcriptionCallback = (callbackData) => + { + try + { + var result = AudioStreamTranscriptionResult.FromJson(callbackData); + // TryWrite always succeeds on unbounded channels + _outputChannel.Writer.TryWrite(result); + } + catch (Exception ex) + { + _logger.LogError(ex, "Error processing audio stream transcription callback"); + _outputChannel.Writer.TryComplete( + new FoundryLocalException("Error processing audio streaming callback.", ex, _logger)); + } + }; + + // StartAudioStream is synchronous (P/Invoke) — run on thread pool + var session = await Task.Run( + () => _coreInterop.StartAudioStream(request, _transcriptionCallback), ct) + .ConfigureAwait(false); + + if (session.Response.Error != null) + { + // Free handle on failure + if (session.CallbackHandle.IsAllocated) + { + session.CallbackHandle.Free(); + } + _outputChannel.Writer.TryComplete(); + throw new FoundryLocalException( + $"Error starting audio stream session: {session.Response.Error}", _logger); + } + + _sessionHandle = session.Response.Data + ?? throw new FoundryLocalException("Native core did not return a session handle.", _logger); + _callbackHandle = session.CallbackHandle; + _started = true; + _stopped = false; + + // Use a dedicated CTS for the push loop — NOT the caller's ct. +#pragma warning disable IDISP003 // Dispose previous before re-assigning + _sessionCts = new CancellationTokenSource(); +#pragma warning restore IDISP003 +#pragma warning disable IDISP013 // Await in using + _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None); +#pragma warning restore IDISP013 + } + + /// + /// Push a chunk of raw PCM audio data to the streaming session. + /// Can be called from any thread (including audio device callbacks). + /// Chunks are internally queued and serialized to the native core. + /// + /// Raw PCM audio bytes matching the configured format. + /// Cancellation token. + public async ValueTask PushAudioDataAsync(ReadOnlyMemory pcmData, CancellationToken ct = default) + { + if (!_started || _stopped) + { + throw new FoundryLocalException("No active streaming session. Call StartAsync first."); + } + + // Copy the data to avoid issues if the caller reuses the buffer (e.g. NAudio reuses e.Buffer) + var copy = new byte[pcmData.Length]; + pcmData.CopyTo(copy); + + await _pushChannel!.Writer.WriteAsync(copy, ct).ConfigureAwait(false); + } + + /// + /// Internal loop that drains the push queue and sends chunks to native core one at a time. + /// Implements retry for transient native errors and terminates the session on permanent failures. + /// + private async Task PushLoopAsync(CancellationToken ct) + { + const int maxRetries = 3; + var initialRetryDelay = TimeSpan.FromMilliseconds(50); + + try + { + await foreach (var audioData in _pushChannel!.Reader.ReadAllAsync(ct).ConfigureAwait(false)) + { + var request = new CoreInteropRequest + { + Params = new Dictionary { { "SessionHandle", _sessionHandle! } } + }; + + var pushed = false; + for (int attempt = 0; attempt <= maxRetries && !pushed; attempt++) + { + var response = _coreInterop.PushAudioData(request, audioData); + + if (response.Error == null) + { + pushed = true; + continue; + } + + // Parse structured error to determine transient vs permanent + var errorInfo = CoreErrorResponse.TryParse(response.Error); + + if (errorInfo?.IsTransient == true && attempt < maxRetries) + { + var delay = initialRetryDelay * Math.Pow(2, attempt); + _logger.LogWarning( + "Transient push error (attempt {Attempt}/{Max}): {Code}. Retrying in {Delay}ms", + attempt + 1, maxRetries, errorInfo.Code, delay.TotalMilliseconds); + await Task.Delay(delay, ct).ConfigureAwait(false); + continue; + } + + // Permanent error or retries exhausted — terminate the session + var fatalEx = new FoundryLocalException( + $"Push failed permanently (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}", + _logger); + _logger.LogError("Terminating push loop due to permanent push failure: {Error}", + response.Error); + _outputChannel?.Writer.TryComplete(fatalEx); + return; // exit push loop + } + } + } + catch (OperationCanceledException) + { + // Expected on cancellation — push loop exits cleanly + } + catch (Exception ex) + { + _logger.LogError(ex, "Push loop terminated with unexpected error"); + _outputChannel?.Writer.TryComplete( + new FoundryLocalException("Push loop terminated unexpectedly.", ex, _logger)); + } + } + + /// + /// Get the async stream of transcription results. + /// Results arrive as the native ASR engine processes audio data. + /// + /// Cancellation token. + /// Async enumerable of transcription results. + public async IAsyncEnumerable GetTranscriptionStream( + [EnumeratorCancellation] CancellationToken ct = default) + { + if (_outputChannel == null) + { + throw new FoundryLocalException("No active streaming session. Call StartAsync first."); + } + + await foreach (var item in _outputChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false)) + { + yield return item; + } + } + + /// + /// Signal end-of-audio and stop the streaming session. + /// Any remaining buffered audio in the push queue will be drained to native core first. + /// Final results are delivered through before it completes. + /// + /// Cancellation token. + public async Task StopAsync(CancellationToken ct = default) + { + using var disposable = await _lock.LockAsync().ConfigureAwait(false); + + if (!_started || _stopped) + { + return; // already stopped or never started + } + + _stopped = true; + + // 1. Complete the push channel so the push loop drains remaining items and exits + _pushChannel?.Writer.TryComplete(); + + // 2. Wait for the push loop to finish draining + if (_pushLoopTask != null) + { + await _pushLoopTask.ConfigureAwait(false); + } + + // 3. Cancel the session CTS (no-op if push loop already exited) + _sessionCts?.Cancel(); + + // 4. Tell native core to flush and finalize. + // This MUST happen even if ct is cancelled — otherwise native session leaks. + var request = new CoreInteropRequest + { + Params = new Dictionary { { "SessionHandle", _sessionHandle! } } + }; + + ICoreInterop.Response? response = null; + try + { + response = await Task.Run( + () => _coreInterop.StopAudioStream(request, _callbackHandle), ct) + .ConfigureAwait(false); + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + // ct fired, but we MUST still stop the native session to avoid a leak. + _logger.LogWarning("StopAsync cancelled — performing best-effort native session stop."); + try + { + response = await Task.Run( + () => _coreInterop.StopAudioStream(request, _callbackHandle)) + .ConfigureAwait(false); + } + catch (Exception cleanupEx) + { + _logger.LogError(cleanupEx, "Best-effort native session stop failed."); + } + + throw; // Re-throw the cancellation after cleanup + } + finally + { + _sessionHandle = null; + _transcriptionCallback = null; + _started = false; + _sessionCts?.Dispose(); + _sessionCts = null; + + // 5. Complete the output channel AFTER StopAudioStream returns + _outputChannel?.Writer.TryComplete(); + } + + if (response?.Error != null) + { + throw new FoundryLocalException( + $"Error stopping audio stream session: {response.Error}", _logger); + } + } + + public async ValueTask DisposeAsync() + { + try + { + if (_started && !_stopped) + { + await StopAsync().ConfigureAwait(false); + } + } + catch (Exception ex) + { + // DisposeAsync must never throw — log and swallow + _logger.LogWarning(ex, "Error during DisposeAsync cleanup."); + } + finally + { + _sessionCts?.Dispose(); + _lock.Dispose(); + } + } +} \ No newline at end of file diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs new file mode 100644 index 00000000..3a0e2ef7 --- /dev/null +++ b/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs @@ -0,0 +1,221 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.Tests; + +using System.Threading.Tasks; +using Microsoft.AI.Foundry.Local.Detail; + +/// +/// Unit tests for audio streaming types and settings. +/// These test the serialization, deserialization, and settings behavior +/// without requiring the native library or a loaded model. +/// +internal sealed class AudioStreamingClientTests +{ + // --- AudioStreamTranscriptionResult deserialization tests --- + + [Test] + public async Task AudioStreamTranscriptionResult_FromJson_FinalResult_AllFields() + { + var json = """{"text":"hello world","is_final":true,"start_time":0.0,"end_time":1.5,"confidence":0.95}"""; + + var result = AudioStreamTranscriptionResult.FromJson(json); + + await Assert.That(result).IsNotNull(); + await Assert.That(result.Text).IsEqualTo("hello world"); + await Assert.That(result.IsFinal).IsTrue(); + await Assert.That(result.StartTime).IsEqualTo(0.0); + await Assert.That(result.EndTime).IsEqualTo(1.5); + await Assert.That(result.Confidence).IsEqualTo(0.95f); + } + + [Test] + public async Task AudioStreamTranscriptionResult_FromJson_PartialResult_OptionalFieldsNull() + { + var json = """{"text":"hel","is_final":false}"""; + + var result = AudioStreamTranscriptionResult.FromJson(json); + + await Assert.That(result).IsNotNull(); + await Assert.That(result.Text).IsEqualTo("hel"); + await Assert.That(result.IsFinal).IsFalse(); + await Assert.That(result.StartTime).IsNull(); + await Assert.That(result.EndTime).IsNull(); + await Assert.That(result.Confidence).IsNull(); + } + + [Test] + public async Task AudioStreamTranscriptionResult_FromJson_EmptyText() + { + var json = """{"text":"","is_final":false}"""; + + var result = AudioStreamTranscriptionResult.FromJson(json); + + await Assert.That(result).IsNotNull(); + await Assert.That(result.Text).IsEqualTo(string.Empty); + await Assert.That(result.IsFinal).IsFalse(); + } + + [Test] + public async Task AudioStreamTranscriptionResult_FromJson_InvalidJson_Throws() + { + FoundryLocalException? caught = null; + try + { + AudioStreamTranscriptionResult.FromJson("not valid json"); + } + catch (FoundryLocalException ex) + { + caught = ex; + } + catch (System.Text.Json.JsonException) + { + // Also acceptable — JsonSerializer may throw before our wrapper + caught = new FoundryLocalException("json parse error"); + } + + await Assert.That(caught).IsNotNull(); + } + + [Test] + public async Task AudioStreamTranscriptionResult_FromJson_EmptyJson_Throws() + { + FoundryLocalException? caught = null; + try + { + AudioStreamTranscriptionResult.FromJson(""); + } + catch (FoundryLocalException ex) + { + caught = ex; + } + catch (System.Text.Json.JsonException) + { + caught = new FoundryLocalException("json parse error"); + } + + await Assert.That(caught).IsNotNull(); + } + + // --- CoreErrorResponse parsing tests --- + + [Test] + public async Task CoreErrorResponse_TryParse_TransientError_Succeeds() + { + var json = """{"code":"ASR_BACKEND_OVERLOADED","message":"try again later","isTransient":true}"""; + + var error = CoreErrorResponse.TryParse(json); + + await Assert.That(error).IsNotNull(); + await Assert.That(error!.Code).IsEqualTo("ASR_BACKEND_OVERLOADED"); + await Assert.That(error.Message).IsEqualTo("try again later"); + await Assert.That(error.IsTransient).IsTrue(); + } + + [Test] + public async Task CoreErrorResponse_TryParse_PermanentError_Succeeds() + { + var json = """{"code":"ASR_SESSION_NOT_FOUND","message":"session gone","isTransient":false}"""; + + var error = CoreErrorResponse.TryParse(json); + + await Assert.That(error).IsNotNull(); + await Assert.That(error!.Code).IsEqualTo("ASR_SESSION_NOT_FOUND"); + await Assert.That(error.IsTransient).IsFalse(); + } + + [Test] + public async Task CoreErrorResponse_TryParse_InvalidJson_ReturnsNull() + { + var error = CoreErrorResponse.TryParse("not json at all"); + + await Assert.That(error).IsNull(); + } + + [Test] + public async Task CoreErrorResponse_TryParse_EmptyString_ReturnsNull() + { + var error = CoreErrorResponse.TryParse(""); + + await Assert.That(error).IsNull(); + } + + [Test] + public async Task CoreErrorResponse_TryParse_ValidJsonWrongShape_ReturnsDefaultValues() + { + // Valid JSON but no matching fields — should deserialize with defaults + var json = """{"unrelated":"field"}"""; + + var error = CoreErrorResponse.TryParse(json); + + await Assert.That(error).IsNotNull(); + await Assert.That(error!.Code).IsEqualTo(""); + await Assert.That(error.IsTransient).IsFalse(); + } + + // --- StreamingAudioSettings tests --- + + [Test] + public async Task StreamingAudioSettings_Defaults_AreCorrect() + { + var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings(); + + await Assert.That(settings.SampleRate).IsEqualTo(16000); + await Assert.That(settings.Channels).IsEqualTo(1); + await Assert.That(settings.BitsPerSample).IsEqualTo(16); + await Assert.That(settings.Language).IsNull(); + await Assert.That(settings.PushQueueCapacity).IsEqualTo(100); + } + + [Test] + public async Task StreamingAudioSettings_Snapshot_IsIndependentCopy() + { + var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings + { + SampleRate = 44100, + Channels = 2, + BitsPerSample = 32, + Language = "zh", + PushQueueCapacity = 50 + }; + + var snapshot = settings.Snapshot(); + + // Modify original after snapshot + settings.SampleRate = 8000; + settings.Channels = 1; + settings.Language = "fr"; + settings.PushQueueCapacity = 200; + + // Snapshot should retain original values + await Assert.That(snapshot.SampleRate).IsEqualTo(44100); + await Assert.That(snapshot.Channels).IsEqualTo(2); + await Assert.That(snapshot.BitsPerSample).IsEqualTo(32); + await Assert.That(snapshot.Language).IsEqualTo("zh"); + await Assert.That(snapshot.PushQueueCapacity).IsEqualTo(50); + } + + [Test] + public async Task StreamingAudioSettings_Snapshot_DoesNotAffectOriginal() + { + var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings + { + SampleRate = 16000, + Language = "en" + }; + + var snapshot = settings.Snapshot(); + + // Modify snapshot + snapshot.SampleRate = 48000; + snapshot.Language = "de"; + + // Original should be unaffected + await Assert.That(settings.SampleRate).IsEqualTo(16000); + await Assert.That(settings.Language).IsEqualTo("en"); + } +} From 9a1578c54802ba0094eba54766cc6ecf50a4b1af Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Thu, 5 Mar 2026 11:21:13 -0800 Subject: [PATCH 02/22] support audio streaming-js --- sdk_v2/js/src/imodel.ts | 8 + sdk_v2/js/src/index.ts | 2 + sdk_v2/js/src/model.ts | 9 + sdk_v2/js/src/modelVariant.ts | 9 + sdk_v2/js/src/openai/audioStreamingClient.ts | 440 +++++++++++++++++++ sdk_v2/js/src/openai/audioStreamingTypes.ts | 52 +++ 6 files changed, 520 insertions(+) create mode 100644 sdk_v2/js/src/openai/audioStreamingClient.ts create mode 100644 sdk_v2/js/src/openai/audioStreamingTypes.ts diff --git a/sdk_v2/js/src/imodel.ts b/sdk_v2/js/src/imodel.ts index 5797ce3b..7268fa1d 100644 --- a/sdk_v2/js/src/imodel.ts +++ b/sdk_v2/js/src/imodel.ts @@ -1,5 +1,6 @@ import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; +import { AudioStreamingClient } from './openai/audioStreamingClient.js'; export interface IModel { get id(): string; @@ -15,4 +16,11 @@ export interface IModel { createChatClient(): ChatClient; createAudioClient(): AudioClient; + + /** + * Creates an AudioStreamingClient for real-time audio streaming ASR. + * The model must be loaded before calling this method. + * @returns An AudioStreamingClient instance. + */ + createAudioStreamingClient(): AudioStreamingClient; } diff --git a/sdk_v2/js/src/index.ts b/sdk_v2/js/src/index.ts index 1af50af8..4061084e 100644 --- a/sdk_v2/js/src/index.ts +++ b/sdk_v2/js/src/index.ts @@ -6,6 +6,8 @@ export { ModelVariant } from './modelVariant.js'; export type { IModel } from './imodel.js'; export { ChatClient, ChatClientSettings } from './openai/chatClient.js'; export { AudioClient, AudioClientSettings } from './openai/audioClient.js'; +export { AudioStreamingClient, StreamingAudioSettings } from './openai/audioStreamingClient.js'; +export type { AudioStreamTranscriptionResult, CoreErrorResponse } from './openai/audioStreamingTypes.js'; export { ModelLoadManager } from './detail/modelLoadManager.js'; /** @internal */ export { CoreInterop } from './detail/coreInterop.js'; diff --git a/sdk_v2/js/src/model.ts b/sdk_v2/js/src/model.ts index c2848524..679de121 100644 --- a/sdk_v2/js/src/model.ts +++ b/sdk_v2/js/src/model.ts @@ -1,6 +1,7 @@ import { ModelVariant } from './modelVariant.js'; import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; +import { AudioStreamingClient } from './openai/audioStreamingClient.js'; import { IModel } from './imodel.js'; /** @@ -146,4 +147,12 @@ export class Model implements IModel { public createAudioClient(): AudioClient { return this.selectedVariant.createAudioClient(); } + + /** + * Creates an AudioStreamingClient for real-time audio streaming ASR. + * @returns An AudioStreamingClient instance. + */ + public createAudioStreamingClient(): AudioStreamingClient { + return this.selectedVariant.createAudioStreamingClient(); + } } diff --git a/sdk_v2/js/src/modelVariant.ts b/sdk_v2/js/src/modelVariant.ts index 7c8b8023..b69f0a45 100644 --- a/sdk_v2/js/src/modelVariant.ts +++ b/sdk_v2/js/src/modelVariant.ts @@ -3,6 +3,7 @@ import { ModelLoadManager } from './detail/modelLoadManager.js'; import { ModelInfo } from './types.js'; import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; +import { AudioStreamingClient } from './openai/audioStreamingClient.js'; import { IModel } from './imodel.js'; /** @@ -127,4 +128,12 @@ export class ModelVariant implements IModel { public createAudioClient(): AudioClient { return new AudioClient(this._modelInfo.id, this.coreInterop); } + + /** + * Creates an AudioStreamingClient for real-time audio streaming ASR. + * @returns An AudioStreamingClient instance. + */ + public createAudioStreamingClient(): AudioStreamingClient { + return new AudioStreamingClient(this._modelInfo.id, this.coreInterop); + } } diff --git a/sdk_v2/js/src/openai/audioStreamingClient.ts b/sdk_v2/js/src/openai/audioStreamingClient.ts new file mode 100644 index 00000000..f8213161 --- /dev/null +++ b/sdk_v2/js/src/openai/audioStreamingClient.ts @@ -0,0 +1,440 @@ +import { CoreInterop } from '../detail/coreInterop.js'; +import { AudioStreamTranscriptionResult, tryParseCoreError } from './audioStreamingTypes.js'; + +/** + * Audio format settings for a streaming session. + * Must be configured before calling start(). + * Settings are frozen once the session starts. + */ +export class StreamingAudioSettings { + /** PCM sample rate in Hz. Default: 16000. */ + sampleRate: number = 16000; + /** Number of audio channels. Default: 1 (mono). */ + channels: number = 1; + /** Bits per sample. Default: 16. */ + bitsPerSample: number = 16; + /** Optional BCP-47 language hint (e.g., "en", "zh"). */ + language?: string; + /** Maximum number of audio chunks buffered in the internal push queue. Default: 100. */ + pushQueueCapacity: number = 100; + + /** @internal Create a frozen copy of these settings. */ + snapshot(): StreamingAudioSettings { + const copy = new StreamingAudioSettings(); + copy.sampleRate = this.sampleRate; + copy.channels = this.channels; + copy.bitsPerSample = this.bitsPerSample; + copy.language = this.language; + copy.pushQueueCapacity = this.pushQueueCapacity; + return Object.freeze(copy) as StreamingAudioSettings; + } +} + +/** + * Internal async queue that acts like C#'s Channel. + * Supports a single consumer reading via async iteration and multiple producers writing. + * @internal + */ +class AsyncQueue { + private queue: T[] = []; + private waitingResolve: ((value: IteratorResult) => void) | null = null; + private completed = false; + private completionError: Error | null = null; + private maxCapacity: number; + private backpressureResolve: (() => void) | null = null; + + constructor(maxCapacity: number = Infinity) { + this.maxCapacity = maxCapacity; + } + + /** Push an item. If at capacity, waits until space is available. */ + async write(item: T): Promise { + if (this.completed) { + throw new Error('Cannot write to a completed queue.'); + } + + // If someone is waiting to read, deliver directly + if (this.waitingResolve) { + const resolve = this.waitingResolve; + this.waitingResolve = null; + resolve({ value: item, done: false }); + return; + } + + // If at capacity, wait for space + if (this.queue.length >= this.maxCapacity) { + await new Promise((resolve) => { + this.backpressureResolve = resolve; + }); + } + + this.queue.push(item); + } + + /** Push an item synchronously (no backpressure wait). Used by native callbacks. */ + tryWrite(item: T): boolean { + if (this.completed) return false; + + if (this.waitingResolve) { + const resolve = this.waitingResolve; + this.waitingResolve = null; + resolve({ value: item, done: false }); + return true; + } + + this.queue.push(item); + return true; + } + + /** Signal that no more items will be written. */ + complete(error?: Error): void { + if (this.completed) return; + this.completed = true; + this.completionError = error ?? null; + + // Release backpressure waiter + if (this.backpressureResolve) { + this.backpressureResolve(); + this.backpressureResolve = null; + } + + // Wake up any waiting reader + if (this.waitingResolve) { + const resolve = this.waitingResolve; + this.waitingResolve = null; + if (this.completionError) { + // Can't reject through iterator result — reader will get done:true + // and the error is surfaced via the completionError property + } + resolve({ value: undefined as any, done: true }); + } + } + + get error(): Error | null { + return this.completionError; + } + + /** Async iterator for consuming items. */ + async *[Symbol.asyncIterator](): AsyncGenerator { + while (true) { + // Release backpressure if queue drained below capacity + if (this.backpressureResolve && this.queue.length < this.maxCapacity) { + const resolve = this.backpressureResolve; + this.backpressureResolve = null; + resolve(); + } + + if (this.queue.length > 0) { + yield this.queue.shift()!; + continue; + } + + if (this.completed) { + if (this.completionError) { + throw this.completionError; + } + return; + } + + // Wait for next item or completion + const result = await new Promise>((resolve) => { + this.waitingResolve = resolve; + }); + + if (result.done) { + if (this.completionError) { + throw this.completionError; + } + return; + } + + yield result.value; + } + } +} + +/** + * Client for real-time audio streaming ASR (Automatic Speech Recognition). + * Audio data from a microphone (or other source) is pushed in as PCM chunks, + * and partial transcription results are returned as an async iterable. + * + * Thread safety: pushAudioData() can be called from any context. + * Pushes are internally queued and serialized to native core one at a time. + * + * Mirrors the C# OpenAIAudioStreamingClient. + */ +export class AudioStreamingClient { + private modelId: string; + private coreInterop: CoreInterop; + + // Session state + private sessionHandle: string | null = null; + private started = false; + private stopped = false; + + // Output queue: native callback writes, user reads via getTranscriptionStream() + private outputQueue: AsyncQueue | null = null; + + // Internal push queue: user writes audio chunks, push loop drains to native core + private pushQueue: AsyncQueue | null = null; + private pushLoopPromise: Promise | null = null; + + // Frozen settings snapshot + private activeSettings: StreamingAudioSettings | null = null; + + // Abort controller for the push loop — decoupled from caller's signal + private sessionAbortController: AbortController | null = null; + + // Whether native callback has been registered (for tracking) + private nativeCallbackRegistered = false; + + /** + * Configuration settings for the streaming session. + * Must be configured before calling start(). Settings are frozen after start(). + */ + public settings = new StreamingAudioSettings(); + + /** + * @internal + * Restricted to internal use. Users should create clients via Model.createAudioStreamingClient(). + */ + constructor(modelId: string, coreInterop: CoreInterop) { + this.modelId = modelId; + this.coreInterop = coreInterop; + } + + /** + * Start a real-time audio streaming session. + * Must be called before pushAudioData() or getTranscriptionStream(). + * Settings are frozen after this call. + */ + public async start(): Promise { + if (this.started) { + throw new Error('Streaming session already started. Call stop() first.'); + } + + // Freeze settings + this.activeSettings = this.settings.snapshot(); + + this.outputQueue = new AsyncQueue(); + this.pushQueue = new AsyncQueue(this.activeSettings.pushQueueCapacity); + + const params: Record = { + Model: this.modelId, + SampleRate: this.activeSettings.sampleRate.toString(), + Channels: this.activeSettings.channels.toString(), + BitsPerSample: this.activeSettings.bitsPerSample.toString(), + }; + + if (this.activeSettings.language) { + params['Language'] = this.activeSettings.language; + } + + // Start session via native core with a callback for transcription results. + // executeCommandStreaming registers a callback and calls the native function async. + // For audio_stream_start, the native function returns immediately (non-blocking) + // and invokes the callback on a native thread whenever partial results are ready. + // + // However, the current CoreInterop.executeCommandStreaming wraps the call in + // execute_command_with_callback which blocks until the command completes. + // For audio streaming, we need the start command to return immediately. + // We use executeCommand (synchronous) for start, and the callback is registered + // by the native core during that call. + // + // NOTE: This matches the C# pattern where StartAudioStream is synchronous and + // the callback is registered during the P/Invoke call. The JS koffi FFI works + // similarly — the native function registers our callback pointer and returns. + + try { + const response = this.coreInterop.executeCommand("audio_stream_start", { + Params: params + }); + + this.sessionHandle = response; + if (!this.sessionHandle) { + throw new Error('Native core did not return a session handle.'); + } + } catch (error) { + this.outputQueue.complete(); + throw new Error( + `Error starting audio stream session: ${error instanceof Error ? error.message : String(error)}`, + { cause: error } + ); + } + + this.started = true; + this.stopped = false; + + // Start the background push loop + this.sessionAbortController = new AbortController(); + this.pushLoopPromise = this.pushLoop(); + } + + /** + * Push a chunk of raw PCM audio data to the streaming session. + * Can be called from any context. Chunks are internally queued + * and serialized to native core one at a time. + * + * @param pcmData - Raw PCM audio bytes matching the configured format. + */ + public async pushAudioData(pcmData: Uint8Array): Promise { + if (!this.started || this.stopped) { + throw new Error('No active streaming session. Call start() first.'); + } + + // Copy the buffer to avoid issues if the caller reuses it + const copy = new Uint8Array(pcmData.length); + copy.set(pcmData); + + await this.pushQueue!.write(copy); + } + + /** + * Internal loop that drains the push queue and sends chunks to native core one at a time. + * Implements retry for transient native errors and terminates on permanent failures. + * @internal + */ + private async pushLoop(): Promise { + const maxRetries = 3; + const initialRetryDelayMs = 50; + + try { + for await (const audioData of this.pushQueue!) { + // Check if aborted + if (this.sessionAbortController?.signal.aborted) { + break; + } + + let pushed = false; + for (let attempt = 0; attempt <= maxRetries && !pushed; attempt++) { + try { + // Send audio data to native core. + // The native core receives the session handle and audio details via JSON params. + this.coreInterop.executeCommand("audio_stream_push", { + Params: { + SessionHandle: this.sessionHandle!, + AudioDataLength: audioData.length.toString() + } + }); + pushed = true; + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + const errorInfo = tryParseCoreError(errorMsg); + + if (errorInfo?.isTransient && attempt < maxRetries) { + const delay = initialRetryDelayMs * Math.pow(2, attempt); + console.warn( + `Transient push error (attempt ${attempt + 1}/${maxRetries}): ${errorInfo.code}. Retrying in ${delay}ms` + ); + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } + + // Permanent error or retries exhausted + const fatalError = new Error( + `Push failed permanently (code=${errorInfo?.code ?? 'UNKNOWN'}): ${errorMsg}`, + { cause: error } + ); + console.error('Terminating push loop due to permanent push failure:', errorMsg); + this.outputQueue?.complete(fatalError); + return; + } + } + } + } catch (error) { + if (this.sessionAbortController?.signal.aborted) { + // Expected on cancellation + return; + } + const err = error instanceof Error ? error : new Error(String(error)); + console.error('Push loop terminated with unexpected error:', err.message); + this.outputQueue?.complete(new Error('Push loop terminated unexpectedly.', { cause: err })); + } + } + + /** + * Get the async iterable of transcription results. + * Results arrive as the native ASR engine processes audio data. + * + * Usage: + * ```ts + * for await (const result of client.getTranscriptionStream()) { + * console.log(result.text); + * } + * ``` + */ + public async *getTranscriptionStream(): AsyncGenerator { + if (!this.outputQueue) { + throw new Error('No active streaming session. Call start() first.'); + } + + for await (const item of this.outputQueue) { + yield item; + } + } + + /** + * Signal end-of-audio and stop the streaming session. + * Any remaining buffered audio in the push queue will be drained to native core first. + * Final results are delivered through getTranscriptionStream() before it completes. + */ + public async stop(): Promise { + if (!this.started || this.stopped) { + return; // already stopped or never started + } + + this.stopped = true; + + // 1. Complete the push queue so the push loop drains remaining items and exits + this.pushQueue?.complete(); + + // 2. Wait for the push loop to finish draining + if (this.pushLoopPromise) { + await this.pushLoopPromise; + } + + // 3. Abort the session (no-op if push loop already exited) + this.sessionAbortController?.abort(); + + // 4. Tell native core to flush and finalize + let stopError: Error | null = null; + try { + this.coreInterop.executeCommand("audio_stream_stop", { + Params: { SessionHandle: this.sessionHandle! } + }); + } catch (error) { + stopError = error instanceof Error ? error : new Error(String(error)); + console.error('Error stopping audio stream session:', stopError.message); + } + + // 5. Clean up state + this.sessionHandle = null; + this.started = false; + this.sessionAbortController = null; + + // 6. Complete the output queue AFTER the native stop so final callbacks are captured + this.outputQueue?.complete(); + + if (stopError) { + throw new Error( + `Error stopping audio stream session: ${stopError.message}`, + { cause: stopError } + ); + } + } + + /** + * Dispose the client and stop any active session. + * Safe to call multiple times. + */ + public async dispose(): Promise { + try { + if (this.started && !this.stopped) { + await this.stop(); + } + } catch (error) { + // dispose must not throw — log and swallow + console.warn('Error during dispose cleanup:', error instanceof Error ? error.message : String(error)); + } + } +} diff --git a/sdk_v2/js/src/openai/audioStreamingTypes.ts b/sdk_v2/js/src/openai/audioStreamingTypes.ts new file mode 100644 index 00000000..ced58e10 --- /dev/null +++ b/sdk_v2/js/src/openai/audioStreamingTypes.ts @@ -0,0 +1,52 @@ +/** + * Types for real-time audio streaming transcription results and structured errors. + * Mirrors the C# AudioStreamTranscriptionResult and CoreErrorResponse. + */ + +/** + * A transcription result from a real-time audio streaming session. + */ +export interface AudioStreamTranscriptionResult { + /** Whether this is a partial (interim) or final result for this segment. */ + is_final: boolean; + /** The transcribed text. */ + text: string; + /** Start time offset of this segment in the audio stream (seconds). */ + start_time?: number | null; + /** End time offset of this segment in the audio stream (seconds). */ + end_time?: number | null; + /** Confidence score (0.0 - 1.0) if available. */ + confidence?: number | null; +} + +/** + * Structured error response from native core audio streaming commands. + * Used by the push loop to distinguish transient vs permanent failures. + * @internal + */ +export interface CoreErrorResponse { + /** Machine-readable error code. */ + code: string; + /** Human-readable error message. */ + message: string; + /** Whether this error is transient and may succeed on retry. */ + isTransient: boolean; +} + +/** + * Attempt to parse a native error string as a structured CoreErrorResponse. + * Returns null if the error is not valid JSON or doesn't match the schema, + * which should be treated as a permanent/unknown error. + * @internal + */ +export function tryParseCoreError(errorString: string): CoreErrorResponse | null { + try { + const parsed = JSON.parse(errorString); + if (typeof parsed.code === 'string' && typeof parsed.isTransient === 'boolean') { + return parsed as CoreErrorResponse; + } + return null; + } catch { + return null; // unstructured error — treat as permanent + } +} From 397093637f243e8029b0e14b4540ade1b4ae2310 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Thu, 5 Mar 2026 13:49:49 -0800 Subject: [PATCH 03/22] delete dll mock test --- .../AudioStreamingClientTests.cs | 221 ------------------ 1 file changed, 221 deletions(-) delete mode 100644 sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs deleted file mode 100644 index 3a0e2ef7..00000000 --- a/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs +++ /dev/null @@ -1,221 +0,0 @@ -// -------------------------------------------------------------------------------------------------------------------- -// -// Copyright (c) Microsoft. All rights reserved. -// -// -------------------------------------------------------------------------------------------------------------------- - -namespace Microsoft.AI.Foundry.Local.Tests; - -using System.Threading.Tasks; -using Microsoft.AI.Foundry.Local.Detail; - -/// -/// Unit tests for audio streaming types and settings. -/// These test the serialization, deserialization, and settings behavior -/// without requiring the native library or a loaded model. -/// -internal sealed class AudioStreamingClientTests -{ - // --- AudioStreamTranscriptionResult deserialization tests --- - - [Test] - public async Task AudioStreamTranscriptionResult_FromJson_FinalResult_AllFields() - { - var json = """{"text":"hello world","is_final":true,"start_time":0.0,"end_time":1.5,"confidence":0.95}"""; - - var result = AudioStreamTranscriptionResult.FromJson(json); - - await Assert.That(result).IsNotNull(); - await Assert.That(result.Text).IsEqualTo("hello world"); - await Assert.That(result.IsFinal).IsTrue(); - await Assert.That(result.StartTime).IsEqualTo(0.0); - await Assert.That(result.EndTime).IsEqualTo(1.5); - await Assert.That(result.Confidence).IsEqualTo(0.95f); - } - - [Test] - public async Task AudioStreamTranscriptionResult_FromJson_PartialResult_OptionalFieldsNull() - { - var json = """{"text":"hel","is_final":false}"""; - - var result = AudioStreamTranscriptionResult.FromJson(json); - - await Assert.That(result).IsNotNull(); - await Assert.That(result.Text).IsEqualTo("hel"); - await Assert.That(result.IsFinal).IsFalse(); - await Assert.That(result.StartTime).IsNull(); - await Assert.That(result.EndTime).IsNull(); - await Assert.That(result.Confidence).IsNull(); - } - - [Test] - public async Task AudioStreamTranscriptionResult_FromJson_EmptyText() - { - var json = """{"text":"","is_final":false}"""; - - var result = AudioStreamTranscriptionResult.FromJson(json); - - await Assert.That(result).IsNotNull(); - await Assert.That(result.Text).IsEqualTo(string.Empty); - await Assert.That(result.IsFinal).IsFalse(); - } - - [Test] - public async Task AudioStreamTranscriptionResult_FromJson_InvalidJson_Throws() - { - FoundryLocalException? caught = null; - try - { - AudioStreamTranscriptionResult.FromJson("not valid json"); - } - catch (FoundryLocalException ex) - { - caught = ex; - } - catch (System.Text.Json.JsonException) - { - // Also acceptable — JsonSerializer may throw before our wrapper - caught = new FoundryLocalException("json parse error"); - } - - await Assert.That(caught).IsNotNull(); - } - - [Test] - public async Task AudioStreamTranscriptionResult_FromJson_EmptyJson_Throws() - { - FoundryLocalException? caught = null; - try - { - AudioStreamTranscriptionResult.FromJson(""); - } - catch (FoundryLocalException ex) - { - caught = ex; - } - catch (System.Text.Json.JsonException) - { - caught = new FoundryLocalException("json parse error"); - } - - await Assert.That(caught).IsNotNull(); - } - - // --- CoreErrorResponse parsing tests --- - - [Test] - public async Task CoreErrorResponse_TryParse_TransientError_Succeeds() - { - var json = """{"code":"ASR_BACKEND_OVERLOADED","message":"try again later","isTransient":true}"""; - - var error = CoreErrorResponse.TryParse(json); - - await Assert.That(error).IsNotNull(); - await Assert.That(error!.Code).IsEqualTo("ASR_BACKEND_OVERLOADED"); - await Assert.That(error.Message).IsEqualTo("try again later"); - await Assert.That(error.IsTransient).IsTrue(); - } - - [Test] - public async Task CoreErrorResponse_TryParse_PermanentError_Succeeds() - { - var json = """{"code":"ASR_SESSION_NOT_FOUND","message":"session gone","isTransient":false}"""; - - var error = CoreErrorResponse.TryParse(json); - - await Assert.That(error).IsNotNull(); - await Assert.That(error!.Code).IsEqualTo("ASR_SESSION_NOT_FOUND"); - await Assert.That(error.IsTransient).IsFalse(); - } - - [Test] - public async Task CoreErrorResponse_TryParse_InvalidJson_ReturnsNull() - { - var error = CoreErrorResponse.TryParse("not json at all"); - - await Assert.That(error).IsNull(); - } - - [Test] - public async Task CoreErrorResponse_TryParse_EmptyString_ReturnsNull() - { - var error = CoreErrorResponse.TryParse(""); - - await Assert.That(error).IsNull(); - } - - [Test] - public async Task CoreErrorResponse_TryParse_ValidJsonWrongShape_ReturnsDefaultValues() - { - // Valid JSON but no matching fields — should deserialize with defaults - var json = """{"unrelated":"field"}"""; - - var error = CoreErrorResponse.TryParse(json); - - await Assert.That(error).IsNotNull(); - await Assert.That(error!.Code).IsEqualTo(""); - await Assert.That(error.IsTransient).IsFalse(); - } - - // --- StreamingAudioSettings tests --- - - [Test] - public async Task StreamingAudioSettings_Defaults_AreCorrect() - { - var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings(); - - await Assert.That(settings.SampleRate).IsEqualTo(16000); - await Assert.That(settings.Channels).IsEqualTo(1); - await Assert.That(settings.BitsPerSample).IsEqualTo(16); - await Assert.That(settings.Language).IsNull(); - await Assert.That(settings.PushQueueCapacity).IsEqualTo(100); - } - - [Test] - public async Task StreamingAudioSettings_Snapshot_IsIndependentCopy() - { - var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings - { - SampleRate = 44100, - Channels = 2, - BitsPerSample = 32, - Language = "zh", - PushQueueCapacity = 50 - }; - - var snapshot = settings.Snapshot(); - - // Modify original after snapshot - settings.SampleRate = 8000; - settings.Channels = 1; - settings.Language = "fr"; - settings.PushQueueCapacity = 200; - - // Snapshot should retain original values - await Assert.That(snapshot.SampleRate).IsEqualTo(44100); - await Assert.That(snapshot.Channels).IsEqualTo(2); - await Assert.That(snapshot.BitsPerSample).IsEqualTo(32); - await Assert.That(snapshot.Language).IsEqualTo("zh"); - await Assert.That(snapshot.PushQueueCapacity).IsEqualTo(50); - } - - [Test] - public async Task StreamingAudioSettings_Snapshot_DoesNotAffectOriginal() - { - var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings - { - SampleRate = 16000, - Language = "en" - }; - - var snapshot = settings.Snapshot(); - - // Modify snapshot - snapshot.SampleRate = 48000; - snapshot.Language = "de"; - - // Original should be unaffected - await Assert.That(settings.SampleRate).IsEqualTo(16000); - await Assert.That(settings.Language).IsEqualTo("en"); - } -} From ef2e9e04e6be1f9e2320df0fe757fffaedc20ba9 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Thu, 5 Mar 2026 15:51:16 -0800 Subject: [PATCH 04/22] update core api --- sdk_v2/cs/src/Detail/CoreInterop.cs | 119 ++----------------- sdk_v2/cs/src/Detail/ICoreInterop.cs | 10 +- sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs | 44 ++----- 3 files changed, 19 insertions(+), 154 deletions(-) diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs index a178bdca..7def104f 100644 --- a/sdk_v2/cs/src/Detail/CoreInterop.cs +++ b/sdk_v2/cs/src/Detail/CoreInterop.cs @@ -160,26 +160,12 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer* // --- Audio streaming P/Invoke imports --- - [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")] + [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")] [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] - private static unsafe partial void CoreAudioStreamStart( - RequestBuffer* request, - ResponseBuffer* response, - nint callbackPtr, - nint userData); - - [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")] - [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] - private static unsafe partial void CoreAudioStreamPush( + private static unsafe partial void CoreExecuteCommandWithBinary( StreamingRequestBuffer* request, ResponseBuffer* response); - [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")] - [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] - private static unsafe partial void CoreAudioStreamStop( - RequestBuffer* request, - ResponseBuffer* response); - // helper to capture exceptions in callbacks internal class CallbackHelper { @@ -355,77 +341,10 @@ public Task ExecuteCommandWithCallbackAsync(string commandName, CoreIn // --- Audio streaming managed implementations --- - public AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback) + public Response StartAudioStream(CoreInteropRequest request) { - try - { - var commandInputJson = request.ToJson(); - byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_start"); - byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); - - IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length); - Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length); - - IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); - Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); - - var reqBuf = new RequestBuffer - { - Command = commandPtr, - CommandLength = commandBytes.Length, - Data = inputPtr, - DataLength = inputBytes.Length - }; - - ResponseBuffer response = default; - - var helper = new CallbackHelper(transcriptionCallback); - var funcPtr = Marshal.GetFunctionPointerForDelegate(handleCallbackDelegate); - var helperHandle = GCHandle.Alloc(helper); - var helperPtr = GCHandle.ToIntPtr(helperHandle); - - try - { - unsafe - { - CoreAudioStreamStart(&reqBuf, &response, funcPtr, helperPtr); - } - } - catch - { - // Free on failure — native core never saw the handle - helperHandle.Free(); - throw; - } - finally - { - Marshal.FreeHGlobal(commandPtr); - Marshal.FreeHGlobal(inputPtr); - } - - // Marshal response inline (matching existing ExecuteCommandImpl pattern) - Response result = new(); - if (response.Data != IntPtr.Zero && response.DataLength > 0) - { - byte[] managedResponse = new byte[response.DataLength]; - Marshal.Copy(response.Data, managedResponse, 0, response.DataLength); - result.Data = System.Text.Encoding.UTF8.GetString(managedResponse); - } - if (response.Error != IntPtr.Zero && response.ErrorLength > 0) - { - result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!; - } - Marshal.FreeHGlobal(response.Data); - Marshal.FreeHGlobal(response.Error); - - // Return the GCHandle alongside the response — caller is responsible for - // keeping it alive during the session and freeing it in StopAudioStream. - return new AudioStreamSession(result, helperHandle); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger); - } + // Uses existing execute_command entry point with "audio_stream_start" command + return ExecuteCommand("audio_stream_start", request); } public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData) @@ -461,7 +380,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a try { - CoreAudioStreamPush(&reqBuf, &response); + CoreExecuteCommandWithBinary(&reqBuf, &response); } finally { @@ -493,30 +412,10 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a } } - public Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle) + public Response StopAudioStream(CoreInteropRequest request) { - try - { - var result = ExecuteCommand("audio_stream_stop", request); - - // Free the GCHandle that was keeping the callback delegate alive. - // After this point, the native core must not invoke the callback. - if (callbackHandle.IsAllocated) - { - callbackHandle.Free(); - } - - return result; - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - // Still free the handle on failure to avoid leaks - if (callbackHandle.IsAllocated) - { - callbackHandle.Free(); - } - throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger); - } + // Uses existing execute_command entry point with "audio_stream_stop" command + return ExecuteCommand("audio_stream_stop", request); } } diff --git a/sdk_v2/cs/src/Detail/ICoreInterop.cs b/sdk_v2/cs/src/Detail/ICoreInterop.cs index cd342ce5..b493dfb7 100644 --- a/sdk_v2/cs/src/Detail/ICoreInterop.cs +++ b/sdk_v2/cs/src/Detail/ICoreInterop.cs @@ -65,13 +65,7 @@ protected unsafe struct StreamingRequestBuffer public int BinaryDataLength; } - /// - /// Returned by StartAudioStream. Holds the session handle and the GCHandle - /// that must remain alive for the callback lifetime. - /// - internal record AudioStreamSession(Response Response, GCHandle CallbackHandle); - - AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback); + Response StartAudioStream(CoreInteropRequest request); Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData); - Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle); + Response StopAudioStream(CoreInteropRequest request); } diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs index 27e1bbea..303362e3 100644 --- a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs +++ b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs @@ -34,7 +34,6 @@ public sealed class OpenAIAudioStreamingClient : IAsyncDisposable // Session state — protected by _lock private readonly AsyncLock _lock = new(); private string? _sessionHandle; - private GCHandle _callbackHandle; private bool _started; private bool _stopped; @@ -50,9 +49,6 @@ public sealed class OpenAIAudioStreamingClient : IAsyncDisposable // Cancelled only during StopAsync/DisposeAsync to allow clean drain. private CancellationTokenSource? _sessionCts; - // Stored as a field so the delegate is not garbage collected while native core holds a reference. - private ICoreInterop.CallbackFn? _transcriptionCallback; - // Snapshot of settings captured at StartAsync — prevents mutation after session starts. private StreamingAudioSettings? _activeSettings; @@ -142,43 +138,20 @@ public async Task StartAsync(CancellationToken ct = default) request.Params["Language"] = _activeSettings.Language; } - // Store the callback as a field so the delegate is rooted for the session lifetime. - _transcriptionCallback = (callbackData) => - { - try - { - var result = AudioStreamTranscriptionResult.FromJson(callbackData); - // TryWrite always succeeds on unbounded channels - _outputChannel.Writer.TryWrite(result); - } - catch (Exception ex) - { - _logger.LogError(ex, "Error processing audio stream transcription callback"); - _outputChannel.Writer.TryComplete( - new FoundryLocalException("Error processing audio streaming callback.", ex, _logger)); - } - }; - - // StartAudioStream is synchronous (P/Invoke) — run on thread pool - var session = await Task.Run( - () => _coreInterop.StartAudioStream(request, _transcriptionCallback), ct) + // StartAudioStream uses existing execute_command entry point — synchronous P/Invoke + var response = await Task.Run( + () => _coreInterop.StartAudioStream(request), ct) .ConfigureAwait(false); - if (session.Response.Error != null) + if (response.Error != null) { - // Free handle on failure - if (session.CallbackHandle.IsAllocated) - { - session.CallbackHandle.Free(); - } _outputChannel.Writer.TryComplete(); throw new FoundryLocalException( - $"Error starting audio stream session: {session.Response.Error}", _logger); + $"Error starting audio stream session: {response.Error}", _logger); } - _sessionHandle = session.Response.Data + _sessionHandle = response.Data ?? throw new FoundryLocalException("Native core did not return a session handle.", _logger); - _callbackHandle = session.CallbackHandle; _started = true; _stopped = false; @@ -337,7 +310,7 @@ public async Task StopAsync(CancellationToken ct = default) try { response = await Task.Run( - () => _coreInterop.StopAudioStream(request, _callbackHandle), ct) + () => _coreInterop.StopAudioStream(request), ct) .ConfigureAwait(false); } catch (OperationCanceledException) when (ct.IsCancellationRequested) @@ -347,7 +320,7 @@ public async Task StopAsync(CancellationToken ct = default) try { response = await Task.Run( - () => _coreInterop.StopAudioStream(request, _callbackHandle)) + () => _coreInterop.StopAudioStream(request)) .ConfigureAwait(false); } catch (Exception cleanupEx) @@ -360,7 +333,6 @@ public async Task StopAsync(CancellationToken ct = default) finally { _sessionHandle = null; - _transcriptionCallback = null; _started = false; _sessionCts?.Dispose(); _sessionCts = null; From 535b73596b567cbd90e73ee40060ddc9e5b643d8 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 10 Mar 2026 18:09:38 -0700 Subject: [PATCH 05/22] update sdk --- sdk_v2/cs/src/AssemblyInfo.cs | 1 + sdk_v2/cs/src/Detail/CoreInterop.cs | 136 ++++- sdk_v2/cs/src/IModel.cs | 7 - sdk_v2/cs/src/Model.cs | 5 - sdk_v2/cs/src/ModelVariant.cs | 17 - sdk_v2/cs/src/OpenAI/AudioClient.cs | 124 +---- .../OpenAI/AudioStreamTranscriptionTypes.cs | 13 +- sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs | 68 ++- .../cs/test/FoundryLocal.Tests/ModelTests.cs | 2 + sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs | 500 +++--------------- 10 files changed, 249 insertions(+), 624 deletions(-) diff --git a/sdk_v2/cs/src/AssemblyInfo.cs b/sdk_v2/cs/src/AssemblyInfo.cs index 9bebe71b..987f9de6 100644 --- a/sdk_v2/cs/src/AssemblyInfo.cs +++ b/sdk_v2/cs/src/AssemblyInfo.cs @@ -7,4 +7,5 @@ using System.Runtime.CompilerServices; [assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")] +[assembly: InternalsVisibleTo("AudioStreamTest")] [assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs index 7def104f..e4c88e9b 100644 --- a/sdk_v2/cs/src/Detail/CoreInterop.cs +++ b/sdk_v2/cs/src/Detail/CoreInterop.cs @@ -160,12 +160,24 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer* // --- Audio streaming P/Invoke imports --- - [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")] + [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")] [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] - private static unsafe partial void CoreExecuteCommandWithBinary( + private static unsafe partial void CoreAudioStreamStart( + RequestBuffer* request, + ResponseBuffer* response); + + [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")] + [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] + private static unsafe partial void CoreAudioStreamPush( StreamingRequestBuffer* request, ResponseBuffer* response); + [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")] + [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] + private static unsafe partial void CoreAudioStreamStop( + RequestBuffer* request, + ResponseBuffer* response); + // helper to capture exceptions in callbacks internal class CallbackHelper { @@ -339,12 +351,71 @@ public Task ExecuteCommandWithCallbackAsync(string commandName, CoreIn return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct); } + /// + /// Marshal a ResponseBuffer from unmanaged memory into a managed Response and free the unmanaged memory. + /// + private Response MarshalResponse(ResponseBuffer response) + { + Response result = new(); + + if (response.Data != IntPtr.Zero && response.DataLength > 0) + { + byte[] managedResponse = new byte[response.DataLength]; + Marshal.Copy(response.Data, managedResponse, 0, response.DataLength); + result.Data = System.Text.Encoding.UTF8.GetString(managedResponse); + } + + if (response.Error != IntPtr.Zero && response.ErrorLength > 0) + { + result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!; + } + + Marshal.FreeHGlobal(response.Data); + Marshal.FreeHGlobal(response.Error); + + return result; + } + // --- Audio streaming managed implementations --- public Response StartAudioStream(CoreInteropRequest request) { - // Uses existing execute_command entry point with "audio_stream_start" command - return ExecuteCommand("audio_stream_start", request); + try + { + var commandInputJson = request.ToJson(); + byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); + + IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); + Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); + + unsafe + { + var reqBuf = new RequestBuffer + { + Command = IntPtr.Zero, + CommandLength = 0, + Data = inputPtr, + DataLength = inputBytes.Length + }; + + ResponseBuffer response = default; + + try + { + CoreAudioStreamStart(&reqBuf, &response); + } + finally + { + Marshal.FreeHGlobal(inputPtr); + } + + return MarshalResponse(response); + } + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger); + } } public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData) @@ -380,7 +451,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a try { - CoreExecuteCommandWithBinary(&reqBuf, &response); + CoreAudioStreamPush(&reqBuf, &response); } finally { @@ -388,22 +459,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a Marshal.FreeHGlobal(inputPtr); } - // Marshal response inline - Response result = new(); - if (response.Data != IntPtr.Zero && response.DataLength > 0) - { - byte[] managedResponse = new byte[response.DataLength]; - Marshal.Copy(response.Data, managedResponse, 0, response.DataLength); - result.Data = System.Text.Encoding.UTF8.GetString(managedResponse); - } - if (response.Error != IntPtr.Zero && response.ErrorLength > 0) - { - result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!; - } - Marshal.FreeHGlobal(response.Data); - Marshal.FreeHGlobal(response.Error); - - return result; + return MarshalResponse(response); } } catch (Exception ex) when (ex is not OperationCanceledException) @@ -414,8 +470,42 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a public Response StopAudioStream(CoreInteropRequest request) { - // Uses existing execute_command entry point with "audio_stream_stop" command - return ExecuteCommand("audio_stream_stop", request); + try + { + var commandInputJson = request.ToJson(); + byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); + + IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); + Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); + + unsafe + { + var reqBuf = new RequestBuffer + { + Command = IntPtr.Zero, + CommandLength = 0, + Data = inputPtr, + DataLength = inputBytes.Length + }; + + ResponseBuffer response = default; + + try + { + CoreAudioStreamStop(&reqBuf, &response); + } + finally + { + Marshal.FreeHGlobal(inputPtr); + } + + return MarshalResponse(response); + } + } + catch (Exception ex) when (ex is not OperationCanceledException) + { + throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger); + } } } diff --git a/sdk_v2/cs/src/IModel.cs b/sdk_v2/cs/src/IModel.cs index 20eca014..c3acba61 100644 --- a/sdk_v2/cs/src/IModel.cs +++ b/sdk_v2/cs/src/IModel.cs @@ -67,11 +67,4 @@ Task DownloadAsync(Action? downloadProgress = null, /// Optional cancellation token. /// OpenAI.AudioClient Task GetAudioClientAsync(CancellationToken? ct = null); - - /// - /// Get a real-time audio streaming client for ASR. - /// - /// Optional cancellation token. - /// OpenAIAudioStreamingClient for real-time transcription. - Task GetAudioStreamingClientAsync(CancellationToken? ct = null); } diff --git a/sdk_v2/cs/src/Model.cs b/sdk_v2/cs/src/Model.cs index ffe8bb1c..83bcef69 100644 --- a/sdk_v2/cs/src/Model.cs +++ b/sdk_v2/cs/src/Model.cs @@ -114,11 +114,6 @@ public async Task GetAudioClientAsync(CancellationToken? ct = return await SelectedVariant.GetAudioClientAsync(ct).ConfigureAwait(false); } - public async Task GetAudioStreamingClientAsync(CancellationToken? ct = null) - { - return await SelectedVariant.GetAudioStreamingClientAsync(ct).ConfigureAwait(false); - } - public async Task UnloadAsync(CancellationToken? ct = null) { await SelectedVariant.UnloadAsync(ct).ConfigureAwait(false); diff --git a/sdk_v2/cs/src/ModelVariant.cs b/sdk_v2/cs/src/ModelVariant.cs index d5285c1c..6ca7cda7 100644 --- a/sdk_v2/cs/src/ModelVariant.cs +++ b/sdk_v2/cs/src/ModelVariant.cs @@ -190,21 +190,4 @@ private async Task GetAudioClientImplAsync(CancellationToken? return new OpenAIAudioClient(Id); } - - public async Task GetAudioStreamingClientAsync(CancellationToken? ct = null) - { - return await Utils.CallWithExceptionHandling(() => GetAudioStreamingClientImplAsync(ct), - "Error getting audio streaming client for model", _logger) - .ConfigureAwait(false); - } - - private async Task GetAudioStreamingClientImplAsync(CancellationToken? ct = null) - { - if (!await IsLoadedAsync(ct)) - { - throw new FoundryLocalException($"Model {Id} is not loaded. Call LoadAsync first."); - } - - return new OpenAIAudioStreamingClient(Id); - } } diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs index 5475185c..1f44996b 100644 --- a/sdk_v2/cs/src/OpenAI/AudioClient.cs +++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs @@ -6,9 +6,6 @@ namespace Microsoft.AI.Foundry.Local; -using System.Runtime.CompilerServices; -using System.Threading.Channels; - using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; @@ -46,6 +43,16 @@ public record AudioSettings /// public AudioSettings Settings { get; } = new(); + /// + /// Create a real-time streaming transcription session. + /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream. + /// + /// A streaming session that must be disposed when done. + public AudioTranscriptionStreamSession CreateStreamingSession() + { + return new AudioTranscriptionStreamSession(_modelId); + } + /// /// Transcribe audio from a file. /// @@ -63,28 +70,6 @@ public async Task TranscribeAudioAsync(string .ConfigureAwait(false); } - /// - /// Transcribe audio from a file with streamed output. - /// - /// - /// Path to file containing audio recording. - /// Supported formats: mp3 - /// - /// Cancellation token. - /// An asynchronous enumerable of transcription responses. - public async IAsyncEnumerable TranscribeAudioStreamingAsync( - string audioFilePath, [EnumeratorCancellation] CancellationToken ct) - { - var enumerable = Utils.CallWithExceptionHandling( - () => TranscribeAudioStreamingImplAsync(audioFilePath, ct), - "Error during streaming audio transcription.", _logger).ConfigureAwait(false); - - await foreach (var item in enumerable) - { - yield return item; - } - } - private async Task TranscribeAudioImplAsync(string audioFilePath, CancellationToken? ct) { @@ -107,93 +92,4 @@ private async Task TranscribeAudioImplAsync(st return output; } - - private async IAsyncEnumerable TranscribeAudioStreamingImplAsync( - string audioFilePath, [EnumeratorCancellation] CancellationToken ct) - { - var openaiRequest = AudioTranscriptionCreateRequestExtended.FromUserInput(_modelId, audioFilePath, Settings); - - var request = new CoreInteropRequest - { - Params = new Dictionary - { - { "OpenAICreateRequest", openaiRequest.ToJson() }, - } - }; - - var channel = Channel.CreateUnbounded( - new UnboundedChannelOptions - { - SingleWriter = true, - SingleReader = true, - AllowSynchronousContinuations = true - }); - - // The callback will push ChatResponse objects into the channel. - // The channel reader will return the values to the user. - // This setup prevents the user from blocking the thread generating the responses. - _ = Task.Run(async () => - { - try - { - var failed = false; - - var res = await _coreInterop.ExecuteCommandWithCallbackAsync( - "audio_transcribe", - request, - async (callbackData) => - { - try - { - if (!failed) - { - var audioCompletion = callbackData.ToAudioTranscription(_logger); - await channel.Writer.WriteAsync(audioCompletion); - } - } - catch (Exception ex) - { - // propagate exception to reader - channel.Writer.TryComplete( - new FoundryLocalException( - "Error processing streaming audio transcription callback data.", ex, _logger)); - failed = true; - } - }, - ct - ).ConfigureAwait(false); - - // If the native layer returned an error (e.g. missing audio file, invalid model) - // without invoking any callbacks, propagate it so the caller sees an exception - // instead of an empty stream. - if (res.Error != null) - { - channel.Writer.TryComplete( - new FoundryLocalException($"Error from audio_transcribe command: {res.Error}", _logger)); - return; - } - - // use TryComplete as an exception in the callback may have already closed the channel - _ = channel.Writer.TryComplete(); - } - // Ignore cancellation exceptions so we don't convert them into errors - catch (Exception ex) when (ex is not OperationCanceledException) - { - channel.Writer.TryComplete( - new FoundryLocalException("Error executing streaming chat completion.", ex, _logger)); - } - catch (OperationCanceledException) - { - // Complete the channel on cancellation but don't turn it into an error - channel.Writer.TryComplete(); - } - }, ct); - - // Start reading from the channel as items arrive. - // This will continue until ExecuteCommandWithCallbackAsync completes and closes the channel. - await foreach (var item in channel.Reader.ReadAllAsync(ct)) - { - yield return item; - } - } } diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs index 7736cb47..02c4169e 100644 --- a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs +++ b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs @@ -6,11 +6,20 @@ namespace Microsoft.AI.Foundry.Local; public record AudioStreamTranscriptionResult { - /// Whether this is a partial (interim) or final result for this segment. + /// + /// Whether this is a final or partial (interim) result. + /// - Nemotron models always return true (every result is final). + /// - Other models (e.g., Azure Embedded) may return false for interim + /// hypotheses that will be replaced by a subsequent final result. + /// [JsonPropertyName("is_final")] public bool IsFinal { get; init; } - /// The transcribed text. + /// + /// Newly transcribed text from this audio chunk only (incremental hypothesis). + /// This is NOT the full accumulated transcript — each result contains only + /// the text decoded from the most recent audio chunk. + /// [JsonPropertyName("text")] public string Text { get; init; } = string.Empty; diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs index 303362e3..f0a1904d 100644 --- a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs +++ b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs @@ -15,17 +15,19 @@ namespace Microsoft.AI.Foundry.Local; /// -/// Client for real-time audio streaming ASR (Automatic Speech Recognition). +/// Session for real-time audio streaming ASR (Automatic Speech Recognition). /// Audio data from a microphone (or other source) is pushed in as PCM chunks, -/// and partial transcription results are returned as an async stream. +/// and transcription results are returned as an async stream. /// -/// Thread safety: PushAudioDataAsync can be called from any thread (including high-frequency +/// Created via . +/// +/// Thread safety: PushAudioAsync can be called from any thread (including high-frequency /// audio callbacks). Pushes are internally serialized via a bounded channel to prevent /// unbounded memory growth and ensure ordering. /// -public sealed class OpenAIAudioStreamingClient : IAsyncDisposable +public sealed class AudioTranscriptionStreamSession : IAsyncDisposable { private readonly string _modelId; private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop; @@ -50,14 +52,14 @@ public sealed class OpenAIAudioStreamingClient : IAsyncDisposable private CancellationTokenSource? _sessionCts; // Snapshot of settings captured at StartAsync — prevents mutation after session starts. - private StreamingAudioSettings? _activeSettings; + private AudioStreamTranscriptionOptions? _activeSettings; /// /// Audio format settings for the streaming session. /// Must be configured before calling . /// Settings are frozen once the session starts. /// - public record StreamingAudioSettings + public record AudioStreamTranscriptionOptions { /// PCM sample rate in Hz. Default: 16000. public int SampleRate { get; set; } = 16000; @@ -65,32 +67,29 @@ public record StreamingAudioSettings /// Number of audio channels. Default: 1 (mono). public int Channels { get; set; } = 1; - /// Bits per sample. Default: 16. - public int BitsPerSample { get; set; } = 16; - /// Optional BCP-47 language hint (e.g., "en", "zh"). public string? Language { get; set; } /// /// Maximum number of audio chunks buffered in the internal push queue. - /// If the queue is full, PushAudioDataAsync will asynchronously wait. + /// If the queue is full, AppendAsync will asynchronously wait. /// Default: 100 (~3 seconds of audio at typical chunk sizes). /// public int PushQueueCapacity { get; set; } = 100; - internal StreamingAudioSettings Snapshot() => this with { }; // record copy + internal AudioStreamTranscriptionOptions Snapshot() => this with { }; // record copy } - public StreamingAudioSettings Settings { get; } = new(); + public AudioStreamTranscriptionOptions Settings { get; } = new(); - internal OpenAIAudioStreamingClient(string modelId) + internal AudioTranscriptionStreamSession(string modelId) { _modelId = modelId; } /// /// Start a real-time audio streaming session. - /// Must be called before or . + /// Must be called before or . /// Settings are frozen after this call. /// /// Cancellation token. @@ -129,7 +128,6 @@ public async Task StartAsync(CancellationToken ct = default) { "Model", _modelId }, { "SampleRate", _activeSettings.SampleRate.ToString(CultureInfo.InvariantCulture) }, { "Channels", _activeSettings.Channels.ToString(CultureInfo.InvariantCulture) }, - { "BitsPerSample", _activeSettings.BitsPerSample.ToString(CultureInfo.InvariantCulture) }, } }; @@ -171,7 +169,7 @@ public async Task StartAsync(CancellationToken ct = default) /// /// Raw PCM audio bytes matching the configured format. /// Cancellation token. - public async ValueTask PushAudioDataAsync(ReadOnlyMemory pcmData, CancellationToken ct = default) + public async ValueTask AppendAsync(ReadOnlyMemory pcmData, CancellationToken ct = default) { if (!_started || _stopped) { @@ -211,6 +209,25 @@ private async Task PushLoopAsync(CancellationToken ct) if (response.Error == null) { pushed = true; + + // Parse transcription result from push response and surface it + if (!string.IsNullOrEmpty(response.Data)) + { + try + { + var transcription = AudioStreamTranscriptionResult.FromJson(response.Data); + if (!string.IsNullOrEmpty(transcription.Text)) + { + _outputChannel?.Writer.TryWrite(transcription); + } + } + catch (Exception parseEx) + { + // Non-fatal: log and continue if response isn't a transcription result + _logger.LogDebug(parseEx, "Could not parse push response as transcription result"); + } + } + continue; } @@ -332,12 +349,29 @@ public async Task StopAsync(CancellationToken ct = default) } finally { + // Parse final transcription from stop response before completing the channel + if (response?.Data != null) + { + try + { + var finalResult = AudioStreamTranscriptionResult.FromJson(response.Data); + if (!string.IsNullOrEmpty(finalResult.Text)) + { + _outputChannel?.Writer.TryWrite(finalResult); + } + } + catch (Exception parseEx) + { + _logger.LogDebug(parseEx, "Could not parse stop response as transcription result"); + } + } + _sessionHandle = null; _started = false; _sessionCts?.Dispose(); _sessionCts = null; - // 5. Complete the output channel AFTER StopAudioStream returns + // Complete the output channel AFTER writing final result _outputChannel?.Writer.TryComplete(); } diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs index b5a49657..0e2ea1dc 100644 --- a/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs +++ b/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs @@ -52,3 +52,5 @@ public async Task GetLastestVersion_Works() await Assert.That(latestB).IsEqualTo(variants[1]); } } + + diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs index 55808da9..6da59baf 100644 --- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs +++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs @@ -1,452 +1,74 @@ -// -------------------------------------------------------------------------------------------------------------------- -// -// Copyright (c) Microsoft. All rights reserved. -// -// -------------------------------------------------------------------------------------------------------------------- - -namespace Microsoft.AI.Foundry.Local.Tests; - -using System; -using System.Collections.Generic; -using System.Runtime.CompilerServices; -using System.Text.Json; - -using Microsoft.AI.Foundry.Local.Detail; -using Microsoft.Extensions.Configuration; +using Microsoft.AI.Foundry.Local; using Microsoft.Extensions.Logging; -using Microsoft.VisualStudio.TestPlatform.TestHost; +var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug)); +var logger = loggerFactory.CreateLogger("AudioStreamTest"); -using Moq; +// Point to the directory containing Core + ORT DLLs +var corePath = @"C:\Users\ruiren\Desktop\audio-stream-test\Microsoft.AI.Foundry.Local.Core.dll"; -internal static class Utils +var config = new Configuration { - internal struct TestCatalogInfo - { - internal readonly List TestCatalog { get; } - internal readonly string ModelListJson { get; } - - internal TestCatalogInfo(bool includeCuda) - { - - TestCatalog = Utils.BuildTestCatalog(includeCuda); - ModelListJson = JsonSerializer.Serialize(TestCatalog, JsonSerializationContext.Default.ListModelInfo); - } - } - - internal static readonly TestCatalogInfo TestCatalog = new(true); - - [Before(Assembly)] - public static void AssemblyInit(AssemblyHookContext _) - { - using var loggerFactory = LoggerFactory.Create(builder => - { - builder - .AddConsole() - .SetMinimumLevel(LogLevel.Debug); - }); - - ILogger logger = loggerFactory.CreateLogger(); - - // Read configuration from appsettings.Test.json - logger.LogDebug("Reading configuration from appsettings.Test.json"); - var configuration = new ConfigurationBuilder() - .SetBasePath(Directory.GetCurrentDirectory()) - .AddJsonFile("appsettings.Test.json", optional: true, reloadOnChange: false) - .Build(); - - var testModelCacheDirName = "test-data-shared"; - string testDataSharedPath; - if (Path.IsPathRooted(testModelCacheDirName) || - testModelCacheDirName.Contains(Path.DirectorySeparatorChar) || - testModelCacheDirName.Contains(Path.AltDirectorySeparatorChar)) - { - // It's a relative or complete filepath, resolve from current directory - testDataSharedPath = Path.GetFullPath(testModelCacheDirName); - } - else - { - // It's just a directory name, combine with repo root parent - testDataSharedPath = Path.GetFullPath(Path.Combine(GetRepoRoot(), "..", testModelCacheDirName)); - } - - logger.LogInformation("Using test model cache directory: {testDataSharedPath}", testDataSharedPath); - - if (!Directory.Exists(testDataSharedPath)) - { - throw new DirectoryNotFoundException($"Test model cache directory does not exist: {testDataSharedPath}"); - - } - - var config = new Configuration - { - AppName = "FoundryLocalSdkTest", - LogLevel = Local.LogLevel.Debug, - Web = new Configuration.WebService - { - Urls = "http://127.0.0.1:0" - }, - ModelCacheDir = testDataSharedPath, - LogsDir = Path.Combine(GetRepoRoot(), "sdk_v2", "cs", "logs") - }; - - // Initialize the singleton instance. - FoundryLocalManager.CreateAsync(config, logger).GetAwaiter().GetResult(); - - // standalone instance for testing individual components that skips the 'initialize' command - CoreInterop = new CoreInterop(logger); - } - - internal static ICoreInterop CoreInterop { get; private set; } = default!; - - internal static Mock CreateCapturingLoggerMock(List sink) - { - var mock = new Mock(); - mock.Setup(x => x.Log( - It.IsAny(), - It.IsAny(), - It.IsAny(), - It.IsAny(), - (Func)It.IsAny())) - .Callback((LogLevel level, EventId id, object state, Exception? ex, Delegate formatter) => - { - var message = formatter.DynamicInvoke(state, ex) as string; - sink.Add($"{level}: {message}"); - }); - - return mock; - } - - internal sealed record InteropCommandInterceptInfo - { - public string CommandName { get; init; } = default!; - public string? CommandInput { get; init; } - public string ResponseData { get; init; } = default!; - public string? ResponseError { get; init; } - } - - internal static Mock CreateCoreInteropWithIntercept(ICoreInterop coreInterop, - List intercepts) - { - var mock = new Mock(); - var interceptNames = new HashSet(StringComparer.InvariantCulture); - - foreach (var intercept in intercepts) - { - if (!interceptNames.Add(intercept.CommandName)) - { - throw new ArgumentException($"Duplicate intercept for command {intercept.CommandName}"); - } - - mock.Setup(x => x.ExecuteCommand(It.Is(s => s == intercept.CommandName), It.IsAny())) - .Returns(new ICoreInterop.Response - { - Data = intercept.ResponseData, - Error = intercept.ResponseError - }); - - mock.Setup(x => x.ExecuteCommandAsync(It.Is(s => s == intercept.CommandName), - It.IsAny(), - It.IsAny())) - .ReturnsAsync(new ICoreInterop.Response - { - Data = intercept.ResponseData, - Error = intercept.ResponseError - }); - } - - mock.Setup(x => x.ExecuteCommand(It.Is(s => !interceptNames.Contains(s)), - It.IsAny())) - .Returns((string commandName, CoreInteropRequest? commandInput) => - coreInterop.ExecuteCommand(commandName, commandInput)); - - mock.Setup(x => x.ExecuteCommandAsync(It.Is(s => !interceptNames.Contains(s)), - It.IsAny(), - It.IsAny())) - .Returns((string commandName, CoreInteropRequest? commandInput, CancellationToken? ct) => - coreInterop.ExecuteCommandAsync(commandName, commandInput, ct)); - - return mock; - } - - internal static bool IsRunningInCI() + AppName = "AudioStreamTest", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Debug, + AdditionalSettings = new Dictionary { - var azureDevOps = Environment.GetEnvironmentVariable("TF_BUILD"); - var githubActions = Environment.GetEnvironmentVariable("GITHUB_ACTIONS"); - var isCI = string.Equals(azureDevOps, "True", StringComparison.OrdinalIgnoreCase) || - string.Equals(githubActions, "true", StringComparison.OrdinalIgnoreCase); - - return isCI; + { "FoundryLocalCorePath", corePath } } +}; - private static List BuildTestCatalog(bool includeCuda = true) - { - // Mirrors MOCK_CATALOG_DATA ordering and fields (Python tests) - var common = new - { - ProviderType = "AzureFoundry", - Version = 1, - ModelType = "ONNX", - PromptTemplate = (PromptTemplate?)null, - Publisher = "Microsoft", - Task = "chat-completion", - FileSizeMb = 10403, - ModelSettings = new ModelSettings { Parameters = [] }, - SupportsToolCalling = false, - License = "MIT", - LicenseDescription = "License…", - MaxOutputTokens = 1024L, - MinFLVersion = "1.0.0", - }; - - var list = new List - { - // model-1 generic-gpu, generic-cpu:2, generic-cpu:1 - new() - { - Id = "model-1-generic-gpu:1", - Name = "model-1-generic-gpu", - DisplayName = "model-1-generic-gpu", - Uri = "azureml://registries/azureml/models/model-1-generic-gpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" }, - Alias = "model-1", - // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1", - ProviderType = common.ProviderType, Version = common.Version, ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, License = common.License, - LicenseDescription = common.LicenseDescription, MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, - new() - { - Id = "model-1-generic-cpu:2", - Name = "model-1-generic-cpu", - DisplayName = "model-1-generic-cpu", - Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/2", - Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, - Alias = "model-1", - // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/2", - ProviderType = common.ProviderType, - Version = common.Version, ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb - 10, // smaller so default chosen in test that sorts on this - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, - new() - { - Id = "model-1-generic-cpu:1", - Name = "model-1-generic-cpu", - DisplayName = "model-1-generic-cpu", - Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, - Alias = "model-1", - //ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, - ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, +Console.WriteLine("=== Initializing FoundryLocalManager ==="); +await FoundryLocalManager.CreateAsync(config, logger); +var manager = FoundryLocalManager.Instance; - // model-2 npu:2, npu:1, generic-cpu:1 - new() - { - Id = "model-2-npu:2", - Name = "model-2-npu", - DisplayName = "model-2-npu", - Uri = "azureml://registries/azureml/models/model-2-npu/versions/2", - Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" }, - Alias = "model-2", - //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/2", - ProviderType = common.ProviderType, - Version = common.Version, ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, - new() - { - Id = "model-2-npu:1", - Name = "model-2-npu", - DisplayName = "model-2-npu", - Uri = "azureml://registries/azureml/models/model-2-npu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" }, - Alias = "model-2", - //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, - new() - { - Id = "model-2-generic-cpu:1", - Name = "model-2-generic-cpu", - DisplayName = "model-2-generic-cpu", - Uri = "azureml://registries/azureml/models/model-2-generic-cpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, - Alias = "model-2", - //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, - }; +Console.WriteLine("=== Getting Catalog ==="); +var catalog = await manager.GetCatalogAsync(); +var models = await catalog.ListModelsAsync(); +Console.WriteLine($"Found {models.Count} models"); - // model-3 cuda-gpu (optional), generic-gpu, generic-cpu - if (includeCuda) - { - list.Add(new ModelInfo - { - Id = "model-3-cuda-gpu:1", - Name = "model-3-cuda-gpu", - DisplayName = "model-3-cuda-gpu", - Uri = "azureml://registries/azureml/models/model-3-cuda-gpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "CUDAExecutionProvider" }, - Alias = "model-3", - //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, - ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, - Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }); - } - - list.AddRange(new[] - { - new ModelInfo - { - Id = "model-3-generic-gpu:1", - Name = "model-3-generic-gpu", - DisplayName = "model-3-generic-gpu", - Uri = "azureml://registries/azureml/models/model-3-generic-gpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" }, - Alias = "model-3", - //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }, - new ModelInfo - { - Id = "model-3-generic-cpu:1", - Name = "model-3-generic-cpu", - DisplayName = "model-3-generic-cpu", - Uri = "azureml://registries/azureml/models/model-3-generic-cpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, - Alias = "model-3", - //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, - ModelType = common.ModelType, - PromptTemplate = common.PromptTemplate, - Publisher = common.Publisher, Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - } - }); - - // model-4 generic-gpu (nullable prompt) - list.Add(new ModelInfo - { - Id = "model-4-generic-gpu:1", - Name = "model-4-generic-gpu", - DisplayName = "model-4-generic-gpu", - Uri = "azureml://registries/azureml/models/model-4-generic-gpu/versions/1", - Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" }, - Alias = "model-4", - //ParentModelUri = "azureml://registries/azureml/models/model-4/versions/1", - ProviderType = common.ProviderType, - Version = common.Version, - ModelType = common.ModelType, - PromptTemplate = null, - Publisher = common.Publisher, - Task = common.Task, - FileSizeMb = common.FileSizeMb, - ModelSettings = common.ModelSettings, - SupportsToolCalling = common.SupportsToolCalling, - License = common.License, - LicenseDescription = common.LicenseDescription, - MaxOutputTokens = common.MaxOutputTokens, - MinFLVersion = common.MinFLVersion - }); - - return list; - } - - private static string GetSourceFilePath([CallerFilePath] string path = "") => path; - - // Gets the root directory of the foundry-local-sdk repository by finding the .git directory. - private static string GetRepoRoot() - { - var sourceFile = GetSourceFilePath(); - var dir = new DirectoryInfo(Path.GetDirectoryName(sourceFile)!); +// Find and load a whisper model +var model = await catalog.GetModelAsync("whisper-tiny"); +if (model == null) +{ + Console.WriteLine("whisper-tiny not found. Available models:"); + foreach (var m in models) + Console.WriteLine($" - {m.Alias}"); + return; +} - while (dir != null) - { - if (Directory.Exists(Path.Combine(dir.FullName, ".git"))) - return dir.FullName; +Console.WriteLine($"=== Downloading {model.Alias} ==="); +await model.DownloadAsync(p => Console.Write($"\r Progress: {p:F1}%")); +Console.WriteLine(); + +Console.WriteLine($"=== Loading {model.Alias} ==="); +await model.LoadAsync(); +Console.WriteLine("Model loaded."); + +Console.WriteLine("=== Creating streaming session ==="); +var audioClient = await model.GetAudioClientAsync(); +var streamingClient = audioClient.CreateStreamingSession(); +streamingClient.Settings.SampleRate = 16000; +streamingClient.Settings.Channels = 1; +streamingClient.Settings.BitsPerSample = 16; +streamingClient.Settings.Language = "en"; + +Console.WriteLine("=== Starting streaming session ==="); +await streamingClient.StartAsync(); +Console.WriteLine("Session started!"); + +// Push some fake PCM data (silence — 100ms at 16kHz 16-bit mono = 3200 bytes) +var fakePcm = new byte[3200]; +Console.WriteLine("=== Pushing audio chunks ==="); +for (int i = 0; i < 5; i++) +{ + await streamingClient.AppendAsync(fakePcm); + Console.WriteLine($" Pushed chunk {i + 1}"); +} - dir = dir.Parent; - } +Console.WriteLine("=== Stopping session ==="); +await streamingClient.StopAsync(); +Console.WriteLine("Session stopped."); - throw new InvalidOperationException("Could not find git repository root from test file location"); - } -} +Console.WriteLine("=== Unloading model ==="); +await model.UnloadAsync(); +Console.WriteLine("Done! All plumbing works end-to-end."); \ No newline at end of file From f5bd9162c30928435e7bd5e39876313c378a9ad0 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Thu, 12 Mar 2026 18:52:41 -0700 Subject: [PATCH 06/22] update the api --- sdk_v2/cs/src/Detail/CoreInterop.cs | 86 ++++------------------------- 1 file changed, 12 insertions(+), 74 deletions(-) diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs index e4c88e9b..c5eba7ec 100644 --- a/sdk_v2/cs/src/Detail/CoreInterop.cs +++ b/sdk_v2/cs/src/Detail/CoreInterop.cs @@ -158,7 +158,12 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer* nint callbackPtr, // NativeCallbackFn pointer nint userData); - // --- Audio streaming P/Invoke imports --- + [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")] + [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] + private static unsafe partial void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest, + ResponseBuffer* nativeResponse); + + // --- Audio streaming P/Invoke imports (kept for future dedicated entry points) --- [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")] [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })] @@ -377,45 +382,13 @@ private Response MarshalResponse(ResponseBuffer response) } // --- Audio streaming managed implementations --- + // Route through the existing execute_command / execute_command_with_binary entry points. + // The Core handles audio_stream_start / audio_stream_stop as command cases in ExecuteCommandManaged, + // and audio_stream_push as a command case in ExecuteCommandWithBinaryManaged. public Response StartAudioStream(CoreInteropRequest request) { - try - { - var commandInputJson = request.ToJson(); - byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); - - IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); - Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); - - unsafe - { - var reqBuf = new RequestBuffer - { - Command = IntPtr.Zero, - CommandLength = 0, - Data = inputPtr, - DataLength = inputBytes.Length - }; - - ResponseBuffer response = default; - - try - { - CoreAudioStreamStart(&reqBuf, &response); - } - finally - { - Marshal.FreeHGlobal(inputPtr); - } - - return MarshalResponse(response); - } - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger); - } + return ExecuteCommand("audio_stream_start", request); } public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory audioData) @@ -451,7 +424,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a try { - CoreAudioStreamPush(&reqBuf, &response); + CoreExecuteCommandWithBinary(&reqBuf, &response); } finally { @@ -470,42 +443,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory a public Response StopAudioStream(CoreInteropRequest request) { - try - { - var commandInputJson = request.ToJson(); - byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson); - - IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length); - Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length); - - unsafe - { - var reqBuf = new RequestBuffer - { - Command = IntPtr.Zero, - CommandLength = 0, - Data = inputPtr, - DataLength = inputBytes.Length - }; - - ResponseBuffer response = default; - - try - { - CoreAudioStreamStop(&reqBuf, &response); - } - finally - { - Marshal.FreeHGlobal(inputPtr); - } - - return MarshalResponse(response); - } - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger); - } + return ExecuteCommand("audio_stream_stop", request); } } From 6d067e086ed8c294006f961585233d0421e4da2f Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Fri, 13 Mar 2026 12:15:55 -0700 Subject: [PATCH 07/22] rename LiveAudioTranscription --- .../cs/src/Detail/JsonSerializationContext.cs | 2 +- sdk_v2/cs/src/OpenAI/AudioClient.cs | 4 ++-- ...ent.cs => LiveAudioTranscriptionClient.cs} | 24 +++++++++---------- ...ypes.cs => LiveAudioTranscriptionTypes.cs} | 8 +++---- 4 files changed, 19 insertions(+), 19 deletions(-) rename sdk_v2/cs/src/OpenAI/{AudioStreamingClient.cs => LiveAudioTranscriptionClient.cs} (94%) rename sdk_v2/cs/src/OpenAI/{AudioStreamTranscriptionTypes.cs => LiveAudioTranscriptionTypes.cs} (91%) diff --git a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs index 3cc079f3..9ca3f539 100644 --- a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs @@ -34,7 +34,7 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(PropertyDefinition))] [JsonSerializable(typeof(IList))] // --- NEW: Audio streaming types --- -[JsonSerializable(typeof(AudioStreamTranscriptionResult))] +[JsonSerializable(typeof(LiveAudioTranscriptionResult))] [JsonSerializable(typeof(CoreErrorResponse))] [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, WriteIndented = false)] diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs index 1f44996b..e2088901 100644 --- a/sdk_v2/cs/src/OpenAI/AudioClient.cs +++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs @@ -48,9 +48,9 @@ public record AudioSettings /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream. /// /// A streaming session that must be disposed when done. - public AudioTranscriptionStreamSession CreateStreamingSession() + public LiveAudioTranscriptionSession CreateLiveTranscriptionSession() { - return new AudioTranscriptionStreamSession(_modelId); + return new LiveAudioTranscriptionSession(_modelId); } /// diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs similarity index 94% rename from sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs rename to sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs index f0a1904d..0c9e6477 100644 --- a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs +++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs @@ -19,7 +19,7 @@ namespace Microsoft.AI.Foundry.Local; /// Audio data from a microphone (or other source) is pushed in as PCM chunks, /// and transcription results are returned as an async stream. /// -/// Created via . +/// Created via . /// /// Thread safety: PushAudioAsync can be called from any thread (including high-frequency /// audio callbacks). Pushes are internally serialized via a bounded channel to prevent @@ -27,7 +27,7 @@ namespace Microsoft.AI.Foundry.Local; /// -public sealed class AudioTranscriptionStreamSession : IAsyncDisposable +public sealed class LiveAudioTranscriptionSession : IAsyncDisposable { private readonly string _modelId; private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop; @@ -40,7 +40,7 @@ public sealed class AudioTranscriptionStreamSession : IAsyncDisposable private bool _stopped; // Output channel: native callback writes, user reads via GetTranscriptionStream - private Channel? _outputChannel; + private Channel? _outputChannel; // Internal push queue: user writes audio chunks, background loop drains to native core. // Bounded to prevent unbounded memory growth if native core is slower than real-time. @@ -52,14 +52,14 @@ public sealed class AudioTranscriptionStreamSession : IAsyncDisposable private CancellationTokenSource? _sessionCts; // Snapshot of settings captured at StartAsync — prevents mutation after session starts. - private AudioStreamTranscriptionOptions? _activeSettings; + private LiveAudioTranscriptionOptions? _activeSettings; /// /// Audio format settings for the streaming session. /// Must be configured before calling . /// Settings are frozen once the session starts. /// - public record AudioStreamTranscriptionOptions + public record LiveAudioTranscriptionOptions { /// PCM sample rate in Hz. Default: 16000. public int SampleRate { get; set; } = 16000; @@ -77,12 +77,12 @@ public record AudioStreamTranscriptionOptions /// public int PushQueueCapacity { get; set; } = 100; - internal AudioStreamTranscriptionOptions Snapshot() => this with { }; // record copy + internal LiveAudioTranscriptionOptions Snapshot() => this with { }; // record copy } - public AudioStreamTranscriptionOptions Settings { get; } = new(); + public LiveAudioTranscriptionOptions Settings { get; } = new(); - internal AudioTranscriptionStreamSession(string modelId) + internal LiveAudioTranscriptionSession(string modelId) { _modelId = modelId; } @@ -105,7 +105,7 @@ public async Task StartAsync(CancellationToken ct = default) // Freeze settings _activeSettings = Settings.Snapshot(); - _outputChannel = Channel.CreateUnbounded( + _outputChannel = Channel.CreateUnbounded( new UnboundedChannelOptions { SingleWriter = true, // only the native callback writes @@ -215,7 +215,7 @@ private async Task PushLoopAsync(CancellationToken ct) { try { - var transcription = AudioStreamTranscriptionResult.FromJson(response.Data); + var transcription = LiveAudioTranscriptionResult.FromJson(response.Data); if (!string.IsNullOrEmpty(transcription.Text)) { _outputChannel?.Writer.TryWrite(transcription); @@ -273,7 +273,7 @@ private async Task PushLoopAsync(CancellationToken ct) /// /// Cancellation token. /// Async enumerable of transcription results. - public async IAsyncEnumerable GetTranscriptionStream( + public async IAsyncEnumerable GetTranscriptionStream( [EnumeratorCancellation] CancellationToken ct = default) { if (_outputChannel == null) @@ -354,7 +354,7 @@ public async Task StopAsync(CancellationToken ct = default) { try { - var finalResult = AudioStreamTranscriptionResult.FromJson(response.Data); + var finalResult = LiveAudioTranscriptionResult.FromJson(response.Data); if (!string.IsNullOrEmpty(finalResult.Text)) { _outputChannel?.Writer.TryWrite(finalResult); diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs similarity index 91% rename from sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs rename to sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs index 02c4169e..33820836 100644 --- a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs +++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs @@ -4,7 +4,7 @@ namespace Microsoft.AI.Foundry.Local; using System.Text.Json.Serialization; using Microsoft.AI.Foundry.Local.Detail; -public record AudioStreamTranscriptionResult +public record LiveAudioTranscriptionResult { /// /// Whether this is a final or partial (interim) result. @@ -35,11 +35,11 @@ public record AudioStreamTranscriptionResult [JsonPropertyName("confidence")] public float? Confidence { get; init; } - internal static AudioStreamTranscriptionResult FromJson(string json) + internal static LiveAudioTranscriptionResult FromJson(string json) { return JsonSerializer.Deserialize(json, - JsonSerializationContext.Default.AudioStreamTranscriptionResult) - ?? throw new FoundryLocalException("Failed to deserialize AudioStreamTranscriptionResult"); + JsonSerializationContext.Default.LiveAudioTranscriptionResult) + ?? throw new FoundryLocalException("Failed to deserialize LiveAudioTranscriptionResult"); } } From 6dee740b1770c3abd8602572e8874f9169e526db Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Fri, 13 Mar 2026 13:20:51 -0700 Subject: [PATCH 08/22] fix: add missing using directives for EnumeratorCancellation and Channel --- sdk_v2/cs/src/OpenAI/AudioClient.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs index 1a402ca6..e7529284 100644 --- a/sdk_v2/cs/src/OpenAI/AudioClient.cs +++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs @@ -6,6 +6,8 @@ namespace Microsoft.AI.Foundry.Local; +using System.Runtime.CompilerServices; +using System.Threading.Channels; using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels; using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; From b89e1bd285c328e6091d7beaf47cc9de27603b67 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Fri, 13 Mar 2026 13:26:38 -0700 Subject: [PATCH 09/22] update test --- sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs index 6da59baf..6b71921a 100644 --- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs +++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs @@ -46,7 +46,7 @@ Console.WriteLine("=== Creating streaming session ==="); var audioClient = await model.GetAudioClientAsync(); -var streamingClient = audioClient.CreateStreamingSession(); +var streamingClient = audioClient.CreateLiveTranscriptionSession(); streamingClient.Settings.SampleRate = 16000; streamingClient.Settings.Channels = 1; streamingClient.Settings.BitsPerSample = 16; From eb9f282ff3be8403dbd06dc368f2a874668c24d6 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 17 Mar 2026 20:42:16 -0700 Subject: [PATCH 10/22] e2e test --- .../LiveAudioTranscription.csproj | 30 ++++ samples/cs/LiveAudioTranscription/Program.cs | 169 ++++++++++++++++++ samples/cs/LiveAudioTranscription/README.md | 143 +++++++++++++++ 3 files changed, 342 insertions(+) create mode 100644 samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj create mode 100644 samples/cs/LiveAudioTranscription/Program.cs create mode 100644 samples/cs/LiveAudioTranscription/README.md diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj new file mode 100644 index 00000000..a816d2ba --- /dev/null +++ b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj @@ -0,0 +1,30 @@ + + + + + + + + + + + + + Exe + net9.0 + win-x64 + enable + enable + + + + + + + + + + + diff --git a/samples/cs/LiveAudioTranscription/Program.cs b/samples/cs/LiveAudioTranscription/Program.cs new file mode 100644 index 00000000..c0ecee95 --- /dev/null +++ b/samples/cs/LiveAudioTranscription/Program.cs @@ -0,0 +1,169 @@ +// Live Audio Transcription — Foundry Local SDK Example +// +// Demonstrates real-time microphone-to-text using: +// SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor) +// +// Prerequisites: +// 1. Nemotron ASR model downloaded to a local cache folder +// 2. Microsoft.AI.Foundry.Local.Core.dll (built from neutron-server with GenAI 0.13.0+) +// 3. onnxruntime-genai.dll + onnxruntime.dll + onnxruntime_providers_shared.dll (native GenAI) +// +// Usage: +// dotnet run -- [model-cache-dir] +// dotnet run -- C:\path\to\models + +using Microsoft.AI.Foundry.Local; +using Microsoft.Extensions.Logging; +using NAudio.Wave; + +// Parse model cache directory from args or use default +var modelCacheDir = args.Length > 0 + ? args[0] + : Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), + "FoundryLocal", "models"); + +var coreDllPath = Path.Combine(AppContext.BaseDirectory, "Microsoft.AI.Foundry.Local.Core.dll"); + +var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information)); +var logger = loggerFactory.CreateLogger("LiveAudioTranscription"); + +Console.WriteLine("==========================================================="); +Console.WriteLine(" Foundry Local -- Live Audio Transcription Demo"); +Console.WriteLine("==========================================================="); +Console.WriteLine(); +Console.WriteLine($" Model cache: {modelCacheDir}"); +Console.WriteLine($" Core DLL: {coreDllPath} (exists: {File.Exists(coreDllPath)})"); +Console.WriteLine(); + +try +{ + // === Step 1: Initialize Foundry Local SDK === + Console.WriteLine("[1/5] Initializing Foundry Local SDK..."); + var config = new Configuration + { + AppName = "LiveAudioTranscription", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information, + ModelCacheDir = modelCacheDir, + AdditionalSettings = new Dictionary + { + { "FoundryLocalCorePath", coreDllPath } + } + }; + + await FoundryLocalManager.CreateAsync(config, logger); + Console.WriteLine(" SDK initialized."); + + // === Step 2: Find and load the nemotron ASR model === + Console.WriteLine("[2/5] Loading nemotron model..."); + var catalog = await FoundryLocalManager.Instance.GetCatalogAsync(); + var model = await catalog.GetModelAsync("nemotron"); + + if (model == null) + { + Console.WriteLine("ERROR: 'nemotron' not found in catalog."); + Console.WriteLine($" Ensure the model is downloaded to: {modelCacheDir}"); + Console.WriteLine(" The folder should contain genai_config.json, encoder.onnx, decoder.onnx, etc."); + return; + } + + Console.WriteLine($" Found model: {model.Alias}"); + await model.LoadAsync(); + Console.WriteLine(" Model loaded."); + + // === Step 3: Create live transcription session === + Console.WriteLine("[3/5] Creating live transcription session..."); + var audioClient = await model.GetAudioClientAsync(); + var session = audioClient.CreateLiveTranscriptionSession(); + session.Settings.SampleRate = 16000; + session.Settings.Channels = 1; + session.Settings.Language = "en"; + + await session.StartAsync(); + Console.WriteLine(" Session started (SDK -> Core -> GenAI pipeline active)."); + + // === Step 4: Set up microphone + transcription reader === + Console.WriteLine("[4/5] Setting up microphone..."); + + // Background task reads transcription results as they arrive + var readTask = Task.Run(async () => + { + try + { + await foreach (var result in session.GetTranscriptionStream()) + { + if (result.IsFinal) + { + Console.WriteLine(); + Console.WriteLine($" [FINAL] {result.Text}"); + Console.Out.Flush(); + } + else if (!string.IsNullOrEmpty(result.Text)) + { + Console.ForegroundColor = ConsoleColor.Cyan; + Console.Write(result.Text); + Console.ResetColor(); + Console.Out.Flush(); + } + } + } + catch (OperationCanceledException) { } + }); + + // Microphone capture via NAudio + using var waveIn = new WaveInEvent + { + WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1), + BufferMilliseconds = 100 + }; + + int totalChunks = 0; + long totalBytes = 0; + + waveIn.DataAvailable += (sender, e) => + { + if (e.BytesRecorded > 0) + { + _ = session.AppendAsync(new ReadOnlyMemory(e.Buffer, 0, e.BytesRecorded)); + totalChunks++; + totalBytes += e.BytesRecorded; + } + }; + + // === Step 5: Record === + Console.WriteLine(); + Console.WriteLine("==========================================================="); + Console.WriteLine(" LIVE TRANSCRIPTION ACTIVE"); + Console.WriteLine(" Speak into your microphone."); + Console.WriteLine(" Transcription appears in real-time (cyan text)."); + Console.WriteLine(" Press ENTER to stop recording."); + Console.WriteLine("==========================================================="); + Console.WriteLine(); + + waveIn.StartRecording(); + Console.ReadLine(); + waveIn.StopRecording(); + + var totalSeconds = totalBytes / (16000.0 * 2); + Console.WriteLine($"\n Recording: {totalSeconds:F1}s | {totalChunks} chunks | {totalBytes / 1024} KB"); + + // Stop session (flushes remaining audio through the pipeline) + Console.WriteLine("\n[5/5] Stopping session..."); + await session.StopAsync(); + await readTask; + + // Unload model + await model.UnloadAsync(); + + Console.WriteLine(); + Console.WriteLine("==========================================================="); + Console.WriteLine(" Demo complete!"); + Console.WriteLine(" Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text"); + Console.WriteLine("==========================================================="); +} +catch (Exception ex) +{ + Console.WriteLine($"\nERROR: {ex.Message}"); + if (ex.InnerException != null) + Console.WriteLine($"Inner: {ex.InnerException.Message}"); + Console.WriteLine($"\n{ex.StackTrace}"); +} diff --git a/samples/cs/LiveAudioTranscription/README.md b/samples/cs/LiveAudioTranscription/README.md new file mode 100644 index 00000000..f4897524 --- /dev/null +++ b/samples/cs/LiveAudioTranscription/README.md @@ -0,0 +1,143 @@ +# Live Audio Transcription Demo + +Real-time microphone-to-text using Foundry Local SDK, Core, and onnxruntime-genai. + +## Architecture + +``` +Microphone (NAudio, 16kHz/16-bit/mono) + | + v +Foundry Local SDK (C#) + | AppendAsync(pcmBytes) + v +Foundry Local Core (NativeAOT DLL) + | AppendAudioChunk -> CommitTranscription + v +onnxruntime-genai (StreamingProcessor + Generator) + | RNNT encoder + decoder + v +Live transcription text +``` + +## Prerequisites + +1. **Windows x64** with a microphone +2. **.NET 9.0 SDK** installed +3. **Nemotron ASR model** downloaded locally +4. **Native DLLs** (4 files — see Setup below) + +## Setup (Step by Step) + +### Step 1: Get the native DLLs + +You need 4 DLLs placed in this project folder: + +| DLL | Source | +|-----|--------| +| `Microsoft.AI.Foundry.Local.Core.dll` | Built from neutron-server (`dotnet publish` with NativeAOT) | +| `onnxruntime-genai.dll` | Built from onnxruntime-genai (Nenad's StreamingProcessor branch) | +| `onnxruntime.dll` | Comes with the Core publish output | +| `onnxruntime_providers_shared.dll` | Comes with the Core publish output | + +**Option A: From CI artifacts** +- Download the Core DLL from the neutron-server CI pipeline artifacts +- Download the GenAI native DLLs from the onnxruntime-genai pipeline artifacts + +**Option B: From a teammate** +- Ask for the 4 DLLs from someone who has already built them + +Copy all 4 DLLs to this folder (`samples/cs/LiveAudioTranscription/`). + +### Step 2: Get the Nemotron model + +The model should be in a folder with this structure: +``` +models/ + nemotron/ + genai_config.json + encoder.onnx + decoder.onnx + joint.onnx + tokenizer.json + vocab.txt +``` + +### Step 3: Build + +```powershell +cd samples/cs/LiveAudioTranscription +dotnet build -c Debug +``` + +### Step 4: Copy native DLLs to output (if not auto-copied) + +```powershell +Copy-Item onnxruntime-genai.dll bin\Debug\net9.0\win-x64\ -Force +Copy-Item onnxruntime.dll bin\Debug\net9.0\win-x64\ -Force +Copy-Item onnxruntime_providers_shared.dll bin\Debug\net9.0\win-x64\ -Force +Copy-Item Microsoft.AI.Foundry.Local.Core.dll bin\Debug\net9.0\win-x64\ -Force +``` + +### Step 5: Run + +```powershell +# Default model cache location +dotnet run -c Debug --no-build + +# Or specify model cache directory +dotnet run -c Debug --no-build -- C:\path\to\models +``` + +### Step 6: Speak! + +- The app will show `LIVE TRANSCRIPTION ACTIVE` +- Speak into your microphone +- Text appears in **cyan** as you speak +- Press **ENTER** to stop + +## Expected Output + +``` +=========================================================== + Foundry Local -- Live Audio Transcription Demo +=========================================================== + +[1/5] Initializing Foundry Local SDK... + SDK initialized. +[2/5] Loading nemotron model... + Found model: nemotron + Model loaded. +[3/5] Creating live transcription session... + Session started (SDK -> Core -> GenAI pipeline active). +[4/5] Setting up microphone... + +=========================================================== + LIVE TRANSCRIPTION ACTIVE + Speak into your microphone. + Transcription appears in real-time (cyan text). + Press ENTER to stop recording. +=========================================================== + +Hello this is a demo of live audio transcription running entirely on device + [FINAL] Hello this is a demo of live audio transcription running entirely on device + + Recording: 15.2s | 152 chunks | 475 KB + +[5/5] Stopping session... + +=========================================================== + Demo complete! + Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text +=========================================================== +``` + +## Troubleshooting + +| Error | Fix | +|-------|-----| +| `Core DLL not found` | Copy `Microsoft.AI.Foundry.Local.Core.dll` to project folder | +| `nemotron not found in catalog` | Check `ModelCacheDir` points to folder containing `nemotron/` with `genai_config.json` | +| `OgaStreamingProcessor not found` | The `onnxruntime-genai.dll` is old — rebuild from Nenad's branch or get from CI | +| `No microphone` | Ensure a mic is connected and set as default recording device | +| `num_mels unknown` | Fix `genai_config.json` — ASR params must be at model level, not nested under `speech` | From 5e981195fde7704d91f18423fea29da52a545657 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 17 Mar 2026 20:49:26 -0700 Subject: [PATCH 11/22] update for test --- samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj index a816d2ba..fb1a95a3 100644 --- a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj +++ b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj @@ -1,7 +1,7 @@ - + From d2e35138c2530b640d96ec7f78c08a688c82ad53 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 10:36:56 -0700 Subject: [PATCH 12/22] Fix C# SDK audio streaming PR: namespace corrections, restored public API, sample restructure (#538) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves all 23 review comments on the live audio transcription PR (`ruiren/audio-streaming-support-sdk`), including merge conflict resolution. Covers namespace fixes, a removed-but-needed public method, test file restoration, and sample reorganization. ## SDK fixes (`sdk_v2/cs/src/`) - **`OpenAI/AudioClient.cs`**: Restored `TranscribeAudioStreamingAsync` public method — was accidentally removed; `AudioTranscriptionExample` depends on it - **`OpenAI/LiveAudioTranscriptionClient.cs`** + **`LiveAudioTranscriptionTypes.cs`**: Changed namespace `Microsoft.AI.Foundry.Local` → `Microsoft.AI.Foundry.Local.OpenAI` (consistent with `ToolCallingExtensions.cs`, `AudioTranscriptionRequestResponseTypes.cs`); added required `using Microsoft.AI.Foundry.Local;` - **`OpenAI/LiveAudioTranscriptionClient.cs`**: Removed unused `using System.Runtime.InteropServices` (would fail build with `TreatWarningsAsErrors=true`); fixed XML doc `PushAudioAsync` → `AppendAsync`; removed leftover `#pragma warning disable` directives; cleaned up double blank lines - **`OpenAI/LiveAudioTranscriptionTypes.cs`**: Removed `Confidence` property — not populated by any code path - **`AssemblyInfo.cs`**: Removed `InternalsVisibleTo("AudioStreamTest")` — local dev artifact, not for shipped SDK ## Test fix (`sdk_v2/cs/test/`) - **`Utils.cs`**: Restored original `Microsoft.AI.Foundry.Local.Tests.Utils` class from main — file was completely overwritten with a top-level executable test script, breaking all existing tests that reference `Utils.CoreInterop`, `Utils.IsRunningInCI`, etc. ## Sample restructure (`samples/cs/`) - Removed standalone `samples/cs/LiveAudioTranscription/` (csproj, Program.cs, README) - Added `samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs` — follows `HelloFoundryLocalSdk` pattern using `Utils.GetAppLogger()`, `Utils.RunWithSpinner()`, `catalog.GetModelAsync()`; removed hardcoded DLL paths, model cache dir override, `BitsPerSample=16` (property doesn't exist), and debug diagnostics - Added cross-platform and Windows `.csproj` files under `GettingStarted/cross-platform/` and `GettingStarted/windows/` matching the structure of `AudioTranscriptionExample` > [!WARNING] > >
> Firewall rules blocked me from connecting to one or more addresses (expand for details) > > #### I tried to connect to the following addresses, but was blocked by firewall rules: > > - `0t3vsblobprodcus362.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/B2063432E236EB2499F756DC7AEAC028/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block) > - `1javsblobprodcus364.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/CDD8923456756250B6AF4E42CA6F8DFB/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block) > - `1s1vsblobprodcus386.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force` (dns block) > - `4zjvsblobprodcus390.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/79820580DC01B1F2024CE1D67DCA3751/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block) > - `51yvsblobprodcus36.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/CDD8923456756250B6AF4E42CA6F8DFB/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block) > - `80zvsblobprodcus35.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force` (dns block) > - `aiinfra.pkgs.visualstudio.com` > - Triggering command: `/opt/hostedtoolcache/CodeQL/2.24.3/x64/codeql/csharp/tools/linux64/Semmle.Autobuild.CSharp /opt/hostedtoolcache/CodeQL/2.24.3/x64/codeql/csharp/tools/linux64/Semmle.Autobuild.CSharp` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/samples/cs/GettingStarted/cross-platform/FoundrySamplesXPlatform.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/samples/cs/GettingStarted/cross-platform/AudioTranscriptionExample/AudioTranscriptionExample.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - `c50vsblobprodcus330.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - `frdvsblobprodcus327.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - `i1qvsblobprodcus353.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - `imzvsblobprodcus368.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - `k0ivsblobprodcus356.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/B2063432E236EB2499F756DC7AEAC028/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block) > - `kxqvsblobprodcus376.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - `m16vsblobprodcus374.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force` (dns block) > - `s8mvsblobprodcus38.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force` (dns block) > - `se1vsblobprodcus349.vsblob.vsassets.io` > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal /p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot /p:AllowMissingPrunePackageData=true` (dns block) > - Triggering command: `/usr/bin/dotnet dotnet restore --no-dependencies /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/63E6685CBF8FE43B2889F9BB97016C00/missingpackages_workingdir --packages /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages /p:DisableImplicitNuGetFallbackFolder=true --verbosity normal --configfile /tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config --force` (dns block) > > If you need me to access, download, or install something from one of these locations, you can either: > > - Configure [Actions setup steps](https://gh.io/copilot/actions-setup-steps) to set up my environment, which run before the firewall is enabled > - Add the appropriate URLs or hosts to the custom allowlist in this repository's [Copilot coding agent settings](https://github.com/microsoft/Foundry-Local/settings/copilot/coding_agent) (admins only) > >
Original prompt ## Context PR #485 (branch `ruiren/audio-streaming-support-sdk` targeting `main`) in microsoft/Foundry-Local adds live audio transcription streaming support to the Foundry Local C# SDK. It currently has merge conflicts with `main` and 23 review comments from Copilot bot and @kunal-vaishnavi that all need to be resolved. ## Task 1: Merge main branch and resolve conflicts The PR's `mergeable_state` is "dirty". Merge `main` into `ruiren/audio-streaming-support-sdk` and resolve all conflicts, ensuring the PR author's new code is preserved while incorporating any changes from main. ## Task 2: Resolve ALL of the following review comments ### SDK Source Code Fixes: 1. **`sdk/cs/src/Detail/JsonSerializationContext.cs`**: The file is in namespace `Microsoft.AI.Foundry.Local.Detail` but references `LiveAudioTranscriptionResult` and `CoreErrorResponse` which will be in namespace `Microsoft.AI.Foundry.Local.OpenAI` (see fix #8 below). Add a `using Microsoft.AI.Foundry.Local.OpenAI;` statement (this using may already exist from main, just ensure the types resolve correctly after the namespace change). 2. **`sdk/cs/src/OpenAI/AudioClient.cs`**: The public `TranscribeAudioStreamingAsync(...)` method was removed in the PR but the private `TranscribeAudioStreamingImplAsync(...)` still exists. **Restore the public `TranscribeAudioStreamingAsync` method** that wraps the private impl. This is used by speech-to-text models like Whisper and must NOT be removed. The original version from main is: ```csharp public async IAsyncEnumerable TranscribeAudioStreamingAsync( string audioFilePath, [EnumeratorCancellation] CancellationToken ct) { var enumerable = Utils.CallWithExceptionHandling( () => TranscribeAudioStreamingImplAsync(audioFilePath, ct), "Error during streaming audio transcription.", _logger).ConfigureAwait(false); await foreach (var item in enumerable) { yield return item; } } ``` 3. **`sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs`**: - Remove `using System.Runtime.InteropServices;` — it is unused and `TreatWarningsAsErrors=true` means this will cause CS8019 build failure. - Fix the XML doc comment that says "Thread safety: PushAudioAsync can be called from any thread" — change it to reference `AppendAsync` instead of `PushAudioAsync`. - Remove `#pragma warning disable` directives if they are not necessary. The reviewer asked why they're needed — they appear to be from development and should be removed for a clean PR. 4. **`sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs`**: - Change namespace from `Microsoft.AI.Foundry.Local` to `Microsoft.AI.Foundry.Local.OpenAI` (since the file is in the OpenAI folder, it should match the folder-based namespace convention used by the rest of the codebase). - Remove the `Confidence` property from `LiveAudioTranscriptionResult` if it is not being calculated/populated. The reviewer asked and it appears not to be calculated. 5. **`sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs`**: - Also change namespace from `Microsoft.AI.Foundry.Local` to `Microsoft.AI.Foundry.Local.OpenAI` (same reason as above — the file is in the OpenAI folder). 6. **`sdk/cs/src/Microsoft.AI.Foundry.Local.csproj`**: Remove the `InternalsVisibleTo("AudioStreamTest")` attribute/assembly attribute. This was only needed for local experimentation and should not be in the shipped SDK. 7. **Remove trailing blank lines** in any files that have extra trailing blank lines added by this PR. ### Test File Fix: 8. **`sdk/cs/test/FoundryLocal.Tests/Utils.cs`**: This file was completely rewritten in the PR with top-level executable code and a hardcoded Core DLL path. It must be **restored to its original content from main**. The original file defines the `Microsoft.AI.Foundry.Local.Tests.Utils` helper class with `TestCatalogInfo`, `AssemblyInit`, `CoreInterop`, `CreateCapturingLoggerMock`, `CreateCoreInteropWithIntercept`, `IsRunningInCI`, `BuildTestCatalog`, `GetRepoRoot` etc. Multiple tests reference `Utils.*` (e.g., `Utils.CoreInterop`, `Utils.IsRunningInCI`), so the test project won't compile without it. Restore it to match the version on `main` exactly. ### Sample Restructuring: 9. **Move the sample from `samples/cs/LiveAudioTranscription/`** to `samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/`. The sample Program.cs should be placed there. 10. **Remove the standalone `samples/cs/LiveAudioTranscription/` directory** entirely (including the README.md in it — reviewer says it's good for internal docs but these samples are public-facing, and the existing GettingStarted README covers it). 11. **Create cross-platform `.csproj`** at `samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj` following the format of the existing cross-platform AudioTranscriptionExample: ```xml Exe<...
*This pull request was created from Copilot chat.* > --- 🔒 GitHub Advanced Security automatically protects Copilot coding agent pull requests. You can protect all pull requests by enabling Advanced Security for your repositories. [Learn more about Advanced Security.](https://gh.io/cca-advanced-security) --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: rui-ren <15321482+rui-ren@users.noreply.github.com> --- .../LiveAudioTranscriptionExample.csproj | 32 ++ .../LiveAudioTranscriptionExample/Program.cs | 105 ++++ .../LiveAudioTranscriptionExample.csproj | 30 ++ .../LiveAudioTranscription.csproj | 30 -- samples/cs/LiveAudioTranscription/Program.cs | 169 ------ samples/cs/LiveAudioTranscription/README.md | 143 ----- sdk_v2/cs/src/AssemblyInfo.cs | 1 - sdk_v2/cs/src/OpenAI/AudioClient.cs | 13 + .../OpenAI/LiveAudioTranscriptionClient.cs | 13 +- .../src/OpenAI/LiveAudioTranscriptionTypes.cs | 7 +- sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs | 499 +++++++++++++++--- 11 files changed, 623 insertions(+), 419 deletions(-) create mode 100644 samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj create mode 100644 samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs create mode 100644 samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj delete mode 100644 samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj delete mode 100644 samples/cs/LiveAudioTranscription/Program.cs delete mode 100644 samples/cs/LiveAudioTranscription/README.md diff --git a/samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj b/samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj new file mode 100644 index 00000000..ad6086f5 --- /dev/null +++ b/samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj @@ -0,0 +1,32 @@ + + + + Exe + net9.0 + enable + enable + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs b/samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs new file mode 100644 index 00000000..d6e812e3 --- /dev/null +++ b/samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs @@ -0,0 +1,105 @@ +// Live Audio Transcription — Foundry Local SDK Example +// +// Demonstrates real-time microphone-to-text using: +// SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor) + +using Microsoft.AI.Foundry.Local; +using NAudio.Wave; + +Console.WriteLine("==========================================================="); +Console.WriteLine(" Foundry Local -- Live Audio Transcription Demo"); +Console.WriteLine("==========================================================="); +Console.WriteLine(); + +var config = new Configuration +{ + AppName = "foundry_local_samples", + LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information +}; + +await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger()); +var mgr = FoundryLocalManager.Instance; + +await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync()); + +var catalog = await mgr.GetCatalogAsync(); + +var model = await catalog.GetModelAsync("nemotron") ?? throw new Exception("Model \"nemotron\" not found in catalog"); + +await model.DownloadAsync(progress => +{ + Console.Write($"\rDownloading model: {progress:F2}%"); + if (progress >= 100f) + { + Console.WriteLine(); + } +}); + +Console.Write($"Loading model {model.Id}..."); +await model.LoadAsync(); +Console.WriteLine("done."); + +var audioClient = await model.GetAudioClientAsync(); +var session = audioClient.CreateLiveTranscriptionSession(); +session.Settings.SampleRate = 16000; +session.Settings.Channels = 1; +session.Settings.Language = "en"; + +await session.StartAsync(); +Console.WriteLine(" Session started"); + +var readTask = Task.Run(async () => +{ + try + { + await foreach (var result in session.GetTranscriptionStream()) + { + if (result.IsFinal) + { + Console.WriteLine(); + Console.WriteLine($" [FINAL] {result.Text}"); + Console.Out.Flush(); + } + else if (!string.IsNullOrEmpty(result.Text)) + { + Console.ForegroundColor = ConsoleColor.Cyan; + Console.Write(result.Text); + Console.ResetColor(); + Console.Out.Flush(); + } + } + } + catch (OperationCanceledException) { } +}); + +using var waveIn = new WaveInEvent +{ + WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1), + BufferMilliseconds = 100 +}; + +waveIn.DataAvailable += (sender, e) => +{ + if (e.BytesRecorded > 0) + { + _ = session.AppendAsync(new ReadOnlyMemory(e.Buffer, 0, e.BytesRecorded)); + } +}; + +Console.WriteLine(); +Console.WriteLine("==========================================================="); +Console.WriteLine(" LIVE TRANSCRIPTION ACTIVE"); +Console.WriteLine(" Speak into your microphone."); +Console.WriteLine(" Transcription appears in real-time (cyan text)."); +Console.WriteLine(" Press ENTER to stop recording."); +Console.WriteLine("==========================================================="); +Console.WriteLine(); + +waveIn.StartRecording(); +Console.ReadLine(); +waveIn.StopRecording(); + +await session.StopAsync(); +await readTask; + +await model.UnloadAsync(); diff --git a/samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj b/samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj new file mode 100644 index 00000000..b4489af2 --- /dev/null +++ b/samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj @@ -0,0 +1,30 @@ + + + + Exe + enable + enable + + net9.0-windows10.0.26100 + false + ARM64;x64 + None + false + + + + $(NETCoreSdkRuntimeIdentifier) + + + + + + + + + + + + + + diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj deleted file mode 100644 index fb1a95a3..00000000 --- a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - Exe - net9.0 - win-x64 - enable - enable - - - - - - - - - - - diff --git a/samples/cs/LiveAudioTranscription/Program.cs b/samples/cs/LiveAudioTranscription/Program.cs deleted file mode 100644 index c0ecee95..00000000 --- a/samples/cs/LiveAudioTranscription/Program.cs +++ /dev/null @@ -1,169 +0,0 @@ -// Live Audio Transcription — Foundry Local SDK Example -// -// Demonstrates real-time microphone-to-text using: -// SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor) -// -// Prerequisites: -// 1. Nemotron ASR model downloaded to a local cache folder -// 2. Microsoft.AI.Foundry.Local.Core.dll (built from neutron-server with GenAI 0.13.0+) -// 3. onnxruntime-genai.dll + onnxruntime.dll + onnxruntime_providers_shared.dll (native GenAI) -// -// Usage: -// dotnet run -- [model-cache-dir] -// dotnet run -- C:\path\to\models - -using Microsoft.AI.Foundry.Local; -using Microsoft.Extensions.Logging; -using NAudio.Wave; - -// Parse model cache directory from args or use default -var modelCacheDir = args.Length > 0 - ? args[0] - : Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), - "FoundryLocal", "models"); - -var coreDllPath = Path.Combine(AppContext.BaseDirectory, "Microsoft.AI.Foundry.Local.Core.dll"); - -var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information)); -var logger = loggerFactory.CreateLogger("LiveAudioTranscription"); - -Console.WriteLine("==========================================================="); -Console.WriteLine(" Foundry Local -- Live Audio Transcription Demo"); -Console.WriteLine("==========================================================="); -Console.WriteLine(); -Console.WriteLine($" Model cache: {modelCacheDir}"); -Console.WriteLine($" Core DLL: {coreDllPath} (exists: {File.Exists(coreDllPath)})"); -Console.WriteLine(); - -try -{ - // === Step 1: Initialize Foundry Local SDK === - Console.WriteLine("[1/5] Initializing Foundry Local SDK..."); - var config = new Configuration - { - AppName = "LiveAudioTranscription", - LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information, - ModelCacheDir = modelCacheDir, - AdditionalSettings = new Dictionary - { - { "FoundryLocalCorePath", coreDllPath } - } - }; - - await FoundryLocalManager.CreateAsync(config, logger); - Console.WriteLine(" SDK initialized."); - - // === Step 2: Find and load the nemotron ASR model === - Console.WriteLine("[2/5] Loading nemotron model..."); - var catalog = await FoundryLocalManager.Instance.GetCatalogAsync(); - var model = await catalog.GetModelAsync("nemotron"); - - if (model == null) - { - Console.WriteLine("ERROR: 'nemotron' not found in catalog."); - Console.WriteLine($" Ensure the model is downloaded to: {modelCacheDir}"); - Console.WriteLine(" The folder should contain genai_config.json, encoder.onnx, decoder.onnx, etc."); - return; - } - - Console.WriteLine($" Found model: {model.Alias}"); - await model.LoadAsync(); - Console.WriteLine(" Model loaded."); - - // === Step 3: Create live transcription session === - Console.WriteLine("[3/5] Creating live transcription session..."); - var audioClient = await model.GetAudioClientAsync(); - var session = audioClient.CreateLiveTranscriptionSession(); - session.Settings.SampleRate = 16000; - session.Settings.Channels = 1; - session.Settings.Language = "en"; - - await session.StartAsync(); - Console.WriteLine(" Session started (SDK -> Core -> GenAI pipeline active)."); - - // === Step 4: Set up microphone + transcription reader === - Console.WriteLine("[4/5] Setting up microphone..."); - - // Background task reads transcription results as they arrive - var readTask = Task.Run(async () => - { - try - { - await foreach (var result in session.GetTranscriptionStream()) - { - if (result.IsFinal) - { - Console.WriteLine(); - Console.WriteLine($" [FINAL] {result.Text}"); - Console.Out.Flush(); - } - else if (!string.IsNullOrEmpty(result.Text)) - { - Console.ForegroundColor = ConsoleColor.Cyan; - Console.Write(result.Text); - Console.ResetColor(); - Console.Out.Flush(); - } - } - } - catch (OperationCanceledException) { } - }); - - // Microphone capture via NAudio - using var waveIn = new WaveInEvent - { - WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1), - BufferMilliseconds = 100 - }; - - int totalChunks = 0; - long totalBytes = 0; - - waveIn.DataAvailable += (sender, e) => - { - if (e.BytesRecorded > 0) - { - _ = session.AppendAsync(new ReadOnlyMemory(e.Buffer, 0, e.BytesRecorded)); - totalChunks++; - totalBytes += e.BytesRecorded; - } - }; - - // === Step 5: Record === - Console.WriteLine(); - Console.WriteLine("==========================================================="); - Console.WriteLine(" LIVE TRANSCRIPTION ACTIVE"); - Console.WriteLine(" Speak into your microphone."); - Console.WriteLine(" Transcription appears in real-time (cyan text)."); - Console.WriteLine(" Press ENTER to stop recording."); - Console.WriteLine("==========================================================="); - Console.WriteLine(); - - waveIn.StartRecording(); - Console.ReadLine(); - waveIn.StopRecording(); - - var totalSeconds = totalBytes / (16000.0 * 2); - Console.WriteLine($"\n Recording: {totalSeconds:F1}s | {totalChunks} chunks | {totalBytes / 1024} KB"); - - // Stop session (flushes remaining audio through the pipeline) - Console.WriteLine("\n[5/5] Stopping session..."); - await session.StopAsync(); - await readTask; - - // Unload model - await model.UnloadAsync(); - - Console.WriteLine(); - Console.WriteLine("==========================================================="); - Console.WriteLine(" Demo complete!"); - Console.WriteLine(" Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text"); - Console.WriteLine("==========================================================="); -} -catch (Exception ex) -{ - Console.WriteLine($"\nERROR: {ex.Message}"); - if (ex.InnerException != null) - Console.WriteLine($"Inner: {ex.InnerException.Message}"); - Console.WriteLine($"\n{ex.StackTrace}"); -} diff --git a/samples/cs/LiveAudioTranscription/README.md b/samples/cs/LiveAudioTranscription/README.md deleted file mode 100644 index f4897524..00000000 --- a/samples/cs/LiveAudioTranscription/README.md +++ /dev/null @@ -1,143 +0,0 @@ -# Live Audio Transcription Demo - -Real-time microphone-to-text using Foundry Local SDK, Core, and onnxruntime-genai. - -## Architecture - -``` -Microphone (NAudio, 16kHz/16-bit/mono) - | - v -Foundry Local SDK (C#) - | AppendAsync(pcmBytes) - v -Foundry Local Core (NativeAOT DLL) - | AppendAudioChunk -> CommitTranscription - v -onnxruntime-genai (StreamingProcessor + Generator) - | RNNT encoder + decoder - v -Live transcription text -``` - -## Prerequisites - -1. **Windows x64** with a microphone -2. **.NET 9.0 SDK** installed -3. **Nemotron ASR model** downloaded locally -4. **Native DLLs** (4 files — see Setup below) - -## Setup (Step by Step) - -### Step 1: Get the native DLLs - -You need 4 DLLs placed in this project folder: - -| DLL | Source | -|-----|--------| -| `Microsoft.AI.Foundry.Local.Core.dll` | Built from neutron-server (`dotnet publish` with NativeAOT) | -| `onnxruntime-genai.dll` | Built from onnxruntime-genai (Nenad's StreamingProcessor branch) | -| `onnxruntime.dll` | Comes with the Core publish output | -| `onnxruntime_providers_shared.dll` | Comes with the Core publish output | - -**Option A: From CI artifacts** -- Download the Core DLL from the neutron-server CI pipeline artifacts -- Download the GenAI native DLLs from the onnxruntime-genai pipeline artifacts - -**Option B: From a teammate** -- Ask for the 4 DLLs from someone who has already built them - -Copy all 4 DLLs to this folder (`samples/cs/LiveAudioTranscription/`). - -### Step 2: Get the Nemotron model - -The model should be in a folder with this structure: -``` -models/ - nemotron/ - genai_config.json - encoder.onnx - decoder.onnx - joint.onnx - tokenizer.json - vocab.txt -``` - -### Step 3: Build - -```powershell -cd samples/cs/LiveAudioTranscription -dotnet build -c Debug -``` - -### Step 4: Copy native DLLs to output (if not auto-copied) - -```powershell -Copy-Item onnxruntime-genai.dll bin\Debug\net9.0\win-x64\ -Force -Copy-Item onnxruntime.dll bin\Debug\net9.0\win-x64\ -Force -Copy-Item onnxruntime_providers_shared.dll bin\Debug\net9.0\win-x64\ -Force -Copy-Item Microsoft.AI.Foundry.Local.Core.dll bin\Debug\net9.0\win-x64\ -Force -``` - -### Step 5: Run - -```powershell -# Default model cache location -dotnet run -c Debug --no-build - -# Or specify model cache directory -dotnet run -c Debug --no-build -- C:\path\to\models -``` - -### Step 6: Speak! - -- The app will show `LIVE TRANSCRIPTION ACTIVE` -- Speak into your microphone -- Text appears in **cyan** as you speak -- Press **ENTER** to stop - -## Expected Output - -``` -=========================================================== - Foundry Local -- Live Audio Transcription Demo -=========================================================== - -[1/5] Initializing Foundry Local SDK... - SDK initialized. -[2/5] Loading nemotron model... - Found model: nemotron - Model loaded. -[3/5] Creating live transcription session... - Session started (SDK -> Core -> GenAI pipeline active). -[4/5] Setting up microphone... - -=========================================================== - LIVE TRANSCRIPTION ACTIVE - Speak into your microphone. - Transcription appears in real-time (cyan text). - Press ENTER to stop recording. -=========================================================== - -Hello this is a demo of live audio transcription running entirely on device - [FINAL] Hello this is a demo of live audio transcription running entirely on device - - Recording: 15.2s | 152 chunks | 475 KB - -[5/5] Stopping session... - -=========================================================== - Demo complete! - Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text -=========================================================== -``` - -## Troubleshooting - -| Error | Fix | -|-------|-----| -| `Core DLL not found` | Copy `Microsoft.AI.Foundry.Local.Core.dll` to project folder | -| `nemotron not found in catalog` | Check `ModelCacheDir` points to folder containing `nemotron/` with `genai_config.json` | -| `OgaStreamingProcessor not found` | The `onnxruntime-genai.dll` is old — rebuild from Nenad's branch or get from CI | -| `No microphone` | Ensure a mic is connected and set as default recording device | -| `num_mels unknown` | Fix `genai_config.json` — ASR params must be at model level, not nested under `speech` | diff --git a/sdk_v2/cs/src/AssemblyInfo.cs b/sdk_v2/cs/src/AssemblyInfo.cs index 987f9de6..9bebe71b 100644 --- a/sdk_v2/cs/src/AssemblyInfo.cs +++ b/sdk_v2/cs/src/AssemblyInfo.cs @@ -7,5 +7,4 @@ using System.Runtime.CompilerServices; [assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")] -[assembly: InternalsVisibleTo("AudioStreamTest")] [assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs index e7529284..1986c330 100644 --- a/sdk_v2/cs/src/OpenAI/AudioClient.cs +++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs @@ -96,6 +96,19 @@ private async Task TranscribeAudioImplAsync(st } + public async IAsyncEnumerable TranscribeAudioStreamingAsync( + string audioFilePath, [EnumeratorCancellation] CancellationToken ct) + { + var enumerable = Utils.CallWithExceptionHandling( + () => TranscribeAudioStreamingImplAsync(audioFilePath, ct), + "Error during streaming audio transcription.", _logger).ConfigureAwait(false); + + await foreach (var item in enumerable) + { + yield return item; + } + } + private async IAsyncEnumerable TranscribeAudioStreamingImplAsync( string audioFilePath, [EnumeratorCancellation] CancellationToken ct) { diff --git a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs index 0c9e6477..39eb1683 100644 --- a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs +++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs @@ -4,16 +4,15 @@ // // -------------------------------------------------------------------------------------------------------------------- -namespace Microsoft.AI.Foundry.Local; +namespace Microsoft.AI.Foundry.Local.OpenAI; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; using System.Globalization; using System.Threading.Channels; +using Microsoft.AI.Foundry.Local; using Microsoft.AI.Foundry.Local.Detail; using Microsoft.Extensions.Logging; - /// /// Session for real-time audio streaming ASR (Automatic Speech Recognition). /// Audio data from a microphone (or other source) is pushed in as PCM chunks, @@ -21,12 +20,11 @@ namespace Microsoft.AI.Foundry.Local; /// /// Created via . /// -/// Thread safety: PushAudioAsync can be called from any thread (including high-frequency +/// Thread safety: AppendAsync can be called from any thread (including high-frequency /// audio callbacks). Pushes are internally serialized via a bounded channel to prevent /// unbounded memory growth and ensure ordering. /// - public sealed class LiveAudioTranscriptionSession : IAsyncDisposable { private readonly string _modelId; @@ -153,13 +151,8 @@ public async Task StartAsync(CancellationToken ct = default) _started = true; _stopped = false; - // Use a dedicated CTS for the push loop — NOT the caller's ct. -#pragma warning disable IDISP003 // Dispose previous before re-assigning _sessionCts = new CancellationTokenSource(); -#pragma warning restore IDISP003 -#pragma warning disable IDISP013 // Await in using _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None); -#pragma warning restore IDISP013 } /// diff --git a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs index 33820836..ef0f9edc 100644 --- a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs +++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs @@ -1,7 +1,8 @@ -namespace Microsoft.AI.Foundry.Local; +namespace Microsoft.AI.Foundry.Local.OpenAI; using System.Text.Json; using System.Text.Json.Serialization; +using Microsoft.AI.Foundry.Local; using Microsoft.AI.Foundry.Local.Detail; public record LiveAudioTranscriptionResult @@ -31,10 +32,6 @@ public record LiveAudioTranscriptionResult [JsonPropertyName("end_time")] public double? EndTime { get; init; } - /// Confidence score (0.0 - 1.0) if available. - [JsonPropertyName("confidence")] - public float? Confidence { get; init; } - internal static LiveAudioTranscriptionResult FromJson(string json) { return JsonSerializer.Deserialize(json, diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs index 6b71921a..d64a98b7 100644 --- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs +++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs @@ -1,74 +1,451 @@ -using Microsoft.AI.Foundry.Local; +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.Tests; + +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Text.Json; + +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; -var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug)); -var logger = loggerFactory.CreateLogger("AudioStreamTest"); +using Microsoft.VisualStudio.TestPlatform.TestHost; -// Point to the directory containing Core + ORT DLLs -var corePath = @"C:\Users\ruiren\Desktop\audio-stream-test\Microsoft.AI.Foundry.Local.Core.dll"; +using Moq; -var config = new Configuration +internal static class Utils { - AppName = "AudioStreamTest", - LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Debug, - AdditionalSettings = new Dictionary + internal struct TestCatalogInfo { - { "FoundryLocalCorePath", corePath } + internal readonly List TestCatalog { get; } + internal readonly string ModelListJson { get; } + + internal TestCatalogInfo(bool includeCuda) + { + + TestCatalog = Utils.BuildTestCatalog(includeCuda); + ModelListJson = JsonSerializer.Serialize(TestCatalog, JsonSerializationContext.Default.ListModelInfo); + } } -}; -Console.WriteLine("=== Initializing FoundryLocalManager ==="); -await FoundryLocalManager.CreateAsync(config, logger); -var manager = FoundryLocalManager.Instance; + internal static readonly TestCatalogInfo TestCatalog = new(true); -Console.WriteLine("=== Getting Catalog ==="); -var catalog = await manager.GetCatalogAsync(); -var models = await catalog.ListModelsAsync(); -Console.WriteLine($"Found {models.Count} models"); + [Before(Assembly)] + public static void AssemblyInit(AssemblyHookContext _) + { + using var loggerFactory = LoggerFactory.Create(builder => + { + builder + .AddConsole() + .SetMinimumLevel(LogLevel.Debug); + }); -// Find and load a whisper model -var model = await catalog.GetModelAsync("whisper-tiny"); -if (model == null) -{ - Console.WriteLine("whisper-tiny not found. Available models:"); - foreach (var m in models) - Console.WriteLine($" - {m.Alias}"); - return; -} + ILogger logger = loggerFactory.CreateLogger(); -Console.WriteLine($"=== Downloading {model.Alias} ==="); -await model.DownloadAsync(p => Console.Write($"\r Progress: {p:F1}%")); -Console.WriteLine(); - -Console.WriteLine($"=== Loading {model.Alias} ==="); -await model.LoadAsync(); -Console.WriteLine("Model loaded."); - -Console.WriteLine("=== Creating streaming session ==="); -var audioClient = await model.GetAudioClientAsync(); -var streamingClient = audioClient.CreateLiveTranscriptionSession(); -streamingClient.Settings.SampleRate = 16000; -streamingClient.Settings.Channels = 1; -streamingClient.Settings.BitsPerSample = 16; -streamingClient.Settings.Language = "en"; - -Console.WriteLine("=== Starting streaming session ==="); -await streamingClient.StartAsync(); -Console.WriteLine("Session started!"); - -// Push some fake PCM data (silence — 100ms at 16kHz 16-bit mono = 3200 bytes) -var fakePcm = new byte[3200]; -Console.WriteLine("=== Pushing audio chunks ==="); -for (int i = 0; i < 5; i++) -{ - await streamingClient.AppendAsync(fakePcm); - Console.WriteLine($" Pushed chunk {i + 1}"); -} + // Read configuration from appsettings.Test.json + logger.LogDebug("Reading configuration from appsettings.Test.json"); + var configuration = new ConfigurationBuilder() + .SetBasePath(Directory.GetCurrentDirectory()) + .AddJsonFile("appsettings.Test.json", optional: true, reloadOnChange: false) + .Build(); + + var testModelCacheDirName = "test-data-shared"; + string testDataSharedPath; + if (Path.IsPathRooted(testModelCacheDirName) || + testModelCacheDirName.Contains(Path.DirectorySeparatorChar) || + testModelCacheDirName.Contains(Path.AltDirectorySeparatorChar)) + { + // It's a relative or complete filepath, resolve from current directory + testDataSharedPath = Path.GetFullPath(testModelCacheDirName); + } + else + { + // It's just a directory name, combine with repo root parent + testDataSharedPath = Path.GetFullPath(Path.Combine(GetRepoRoot(), "..", testModelCacheDirName)); + } + + logger.LogInformation("Using test model cache directory: {testDataSharedPath}", testDataSharedPath); + + if (!Directory.Exists(testDataSharedPath)) + { + throw new DirectoryNotFoundException($"Test model cache directory does not exist: {testDataSharedPath}"); + + } + + var config = new Configuration + { + AppName = "FoundryLocalSdkTest", + LogLevel = Local.LogLevel.Debug, + Web = new Configuration.WebService + { + Urls = "http://127.0.0.1:0" + }, + ModelCacheDir = testDataSharedPath + }; + + // Initialize the singleton instance. + FoundryLocalManager.CreateAsync(config, logger).GetAwaiter().GetResult(); + + // standalone instance for testing individual components that skips the 'initialize' command + CoreInterop = new CoreInterop(logger); + } + + internal static ICoreInterop CoreInterop { get; private set; } = default!; + + internal static Mock CreateCapturingLoggerMock(List sink) + { + var mock = new Mock(); + mock.Setup(x => x.Log( + It.IsAny(), + It.IsAny(), + It.IsAny(), + It.IsAny(), + (Func)It.IsAny())) + .Callback((LogLevel level, EventId id, object state, Exception? ex, Delegate formatter) => + { + var message = formatter.DynamicInvoke(state, ex) as string; + sink.Add($"{level}: {message}"); + }); + + return mock; + } + + internal sealed record InteropCommandInterceptInfo + { + public string CommandName { get; init; } = default!; + public string? CommandInput { get; init; } + public string ResponseData { get; init; } = default!; + public string? ResponseError { get; init; } + } + + internal static Mock CreateCoreInteropWithIntercept(ICoreInterop coreInterop, + List intercepts) + { + var mock = new Mock(); + var interceptNames = new HashSet(StringComparer.InvariantCulture); + + foreach (var intercept in intercepts) + { + if (!interceptNames.Add(intercept.CommandName)) + { + throw new ArgumentException($"Duplicate intercept for command {intercept.CommandName}"); + } + + mock.Setup(x => x.ExecuteCommand(It.Is(s => s == intercept.CommandName), It.IsAny())) + .Returns(new ICoreInterop.Response + { + Data = intercept.ResponseData, + Error = intercept.ResponseError + }); + + mock.Setup(x => x.ExecuteCommandAsync(It.Is(s => s == intercept.CommandName), + It.IsAny(), + It.IsAny())) + .ReturnsAsync(new ICoreInterop.Response + { + Data = intercept.ResponseData, + Error = intercept.ResponseError + }); + } + + mock.Setup(x => x.ExecuteCommand(It.Is(s => !interceptNames.Contains(s)), + It.IsAny())) + .Returns((string commandName, CoreInteropRequest? commandInput) => + coreInterop.ExecuteCommand(commandName, commandInput)); + + mock.Setup(x => x.ExecuteCommandAsync(It.Is(s => !interceptNames.Contains(s)), + It.IsAny(), + It.IsAny())) + .Returns((string commandName, CoreInteropRequest? commandInput, CancellationToken? ct) => + coreInterop.ExecuteCommandAsync(commandName, commandInput, ct)); + + return mock; + } + + internal static bool IsRunningInCI() + { + var azureDevOps = Environment.GetEnvironmentVariable("TF_BUILD"); + var githubActions = Environment.GetEnvironmentVariable("GITHUB_ACTIONS"); + var isCI = string.Equals(azureDevOps, "True", StringComparison.OrdinalIgnoreCase) || + string.Equals(githubActions, "true", StringComparison.OrdinalIgnoreCase); + + return isCI; + } + + private static List BuildTestCatalog(bool includeCuda = true) + { + // Mirrors MOCK_CATALOG_DATA ordering and fields (Python tests) + var common = new + { + ProviderType = "AzureFoundry", + Version = 1, + ModelType = "ONNX", + PromptTemplate = (PromptTemplate?)null, + Publisher = "Microsoft", + Task = "chat-completion", + FileSizeMb = 10403, + ModelSettings = new ModelSettings { Parameters = [] }, + SupportsToolCalling = false, + License = "MIT", + LicenseDescription = "License…", + MaxOutputTokens = 1024L, + MinFLVersion = "1.0.0", + }; -Console.WriteLine("=== Stopping session ==="); -await streamingClient.StopAsync(); -Console.WriteLine("Session stopped."); + var list = new List + { + // model-1 generic-gpu, generic-cpu:2, generic-cpu:1 + new() + { + Id = "model-1-generic-gpu:1", + Name = "model-1-generic-gpu", + DisplayName = "model-1-generic-gpu", + Uri = "azureml://registries/azureml/models/model-1-generic-gpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" }, + Alias = "model-1", + // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1", + ProviderType = common.ProviderType, Version = common.Version, ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, License = common.License, + LicenseDescription = common.LicenseDescription, MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, + new() + { + Id = "model-1-generic-cpu:2", + Name = "model-1-generic-cpu", + DisplayName = "model-1-generic-cpu", + Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/2", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Alias = "model-1", + // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/2", + ProviderType = common.ProviderType, + Version = common.Version, ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb - 10, // smaller so default chosen in test that sorts on this + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, + new() + { + Id = "model-1-generic-cpu:1", + Name = "model-1-generic-cpu", + DisplayName = "model-1-generic-cpu", + Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Alias = "model-1", + //ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, + ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, -Console.WriteLine("=== Unloading model ==="); -await model.UnloadAsync(); -Console.WriteLine("Done! All plumbing works end-to-end."); \ No newline at end of file + // model-2 npu:2, npu:1, generic-cpu:1 + new() + { + Id = "model-2-npu:2", + Name = "model-2-npu", + DisplayName = "model-2-npu", + Uri = "azureml://registries/azureml/models/model-2-npu/versions/2", + Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" }, + Alias = "model-2", + //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/2", + ProviderType = common.ProviderType, + Version = common.Version, ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, + new() + { + Id = "model-2-npu:1", + Name = "model-2-npu", + DisplayName = "model-2-npu", + Uri = "azureml://registries/azureml/models/model-2-npu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" }, + Alias = "model-2", + //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, + new() + { + Id = "model-2-generic-cpu:1", + Name = "model-2-generic-cpu", + DisplayName = "model-2-generic-cpu", + Uri = "azureml://registries/azureml/models/model-2-generic-cpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Alias = "model-2", + //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, + }; + + // model-3 cuda-gpu (optional), generic-gpu, generic-cpu + if (includeCuda) + { + list.Add(new ModelInfo + { + Id = "model-3-cuda-gpu:1", + Name = "model-3-cuda-gpu", + DisplayName = "model-3-cuda-gpu", + Uri = "azureml://registries/azureml/models/model-3-cuda-gpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "CUDAExecutionProvider" }, + Alias = "model-3", + //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, + ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, + Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }); + } + + list.AddRange(new[] + { + new ModelInfo + { + Id = "model-3-generic-gpu:1", + Name = "model-3-generic-gpu", + DisplayName = "model-3-generic-gpu", + Uri = "azureml://registries/azureml/models/model-3-generic-gpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" }, + Alias = "model-3", + //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }, + new ModelInfo + { + Id = "model-3-generic-cpu:1", + Name = "model-3-generic-cpu", + DisplayName = "model-3-generic-cpu", + Uri = "azureml://registries/azureml/models/model-3-generic-cpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" }, + Alias = "model-3", + //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, + ModelType = common.ModelType, + PromptTemplate = common.PromptTemplate, + Publisher = common.Publisher, Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + } + }); + + // model-4 generic-gpu (nullable prompt) + list.Add(new ModelInfo + { + Id = "model-4-generic-gpu:1", + Name = "model-4-generic-gpu", + DisplayName = "model-4-generic-gpu", + Uri = "azureml://registries/azureml/models/model-4-generic-gpu/versions/1", + Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" }, + Alias = "model-4", + //ParentModelUri = "azureml://registries/azureml/models/model-4/versions/1", + ProviderType = common.ProviderType, + Version = common.Version, + ModelType = common.ModelType, + PromptTemplate = null, + Publisher = common.Publisher, + Task = common.Task, + FileSizeMb = common.FileSizeMb, + ModelSettings = common.ModelSettings, + SupportsToolCalling = common.SupportsToolCalling, + License = common.License, + LicenseDescription = common.LicenseDescription, + MaxOutputTokens = common.MaxOutputTokens, + MinFLVersion = common.MinFLVersion + }); + + return list; + } + + private static string GetSourceFilePath([CallerFilePath] string path = "") => path; + + // Gets the root directory of the foundry-local-sdk repository by finding the .git directory. + private static string GetRepoRoot() + { + var sourceFile = GetSourceFilePath(); + var dir = new DirectoryInfo(Path.GetDirectoryName(sourceFile)!); + + while (dir != null) + { + if (Directory.Exists(Path.Combine(dir.FullName, ".git"))) + return dir.FullName; + + dir = dir.Parent; + } + + throw new InvalidOperationException("Could not find git repository root from test file location"); + } +} From 0cac7f3b4f9492979e04d8398e87b9535c04c1ec Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Sun, 22 Mar 2026 16:59:48 -0700 Subject: [PATCH 13/22] update response type --- sdk/cs/README.md | 60 ++++++++++++++++++ sdk/cs/src/Detail/JsonSerializationContext.cs | 6 +- .../OpenAI/LiveAudioTranscriptionClient.cs | 10 +-- .../src/OpenAI/LiveAudioTranscriptionTypes.cs | 63 ++++++++++++++----- 4 files changed, 117 insertions(+), 22 deletions(-) diff --git a/sdk/cs/README.md b/sdk/cs/README.md index f58e41e0..48f37d05 100644 --- a/sdk/cs/README.md +++ b/sdk/cs/README.md @@ -233,6 +233,64 @@ audioClient.Settings.Language = "en"; audioClient.Settings.Temperature = 0.0f; ``` +### Live Audio Transcription (Real-Time Streaming) + +For real-time microphone-to-text transcription, use `CreateLiveTranscriptionSession()`. Audio is pushed as raw PCM chunks and transcription results stream back as an `IAsyncEnumerable`. + +The streaming result type (`LiveAudioTranscriptionResponse`) extends `AudioCreateTranscriptionResponse` from the Betalgo OpenAI SDK, so it's compatible with the file-based transcription output format while adding streaming-specific fields. + +```csharp +var audioClient = await model.GetAudioClientAsync(); +var session = audioClient.CreateLiveTranscriptionSession(); + +// Configure audio format (must be set before StartAsync) +session.Settings.SampleRate = 16000; +session.Settings.Channels = 1; +session.Settings.Language = "en"; + +await session.StartAsync(); + +// Push audio from a microphone callback (thread-safe) +waveIn.DataAvailable += (sender, e) => +{ + _ = session.AppendAsync(new ReadOnlyMemory(e.Buffer, 0, e.BytesRecorded)); +}; + +// Read transcription results as they arrive +await foreach (var result in session.GetTranscriptionStream()) +{ + // result inherits from AudioCreateTranscriptionResponse + // - result.Text — incremental transcribed text (per chunk, not accumulated) + // - result.IsFinal — true for final results, false for interim hypotheses + // - result.Segments — segment-level timing data (Start/End in seconds) + // - result.Language — language code + Console.Write(result.Text); +} + +await session.StopAsync(); +``` + +#### Output Type + +| Field | Type | Description | +|-------|------|-------------| +| `Text` | `string` | Transcribed text from this audio chunk (inherited from `AudioCreateTranscriptionResponse`) | +| `IsFinal` | `bool` | Whether this is a final or interim result. Nemotron always returns `true`. | +| `Language` | `string` | Language code (inherited) | +| `Duration` | `float` | Audio duration in seconds (inherited) | +| `Segments` | `List` | Segment timing with `Start`/`End` offsets (inherited) | +| `Words` | `List` | Word-level timing (inherited, when available) | + +#### Session Lifecycle + +| Method | Description | +|--------|-------------| +| `StartAsync()` | Initialize the streaming session. Settings are frozen after this call. | +| `AppendAsync(pcmData)` | Push a chunk of raw PCM audio. Thread-safe (bounded internal queue). | +| `GetTranscriptionStream()` | Async enumerable of transcription results. | +| `StopAsync()` | Signal end-of-audio, flush remaining audio, and clean up. | +| `DisposeAsync()` | Calls `StopAsync` if needed. Use `await using` for automatic cleanup. | + ### Web Service Start an OpenAI-compatible REST endpoint for use by external tools or processes: @@ -297,6 +355,8 @@ Key types: | [`ModelVariant`](./docs/api/microsoft.ai.foundry.local.modelvariant.md) | Specific model variant (hardware/quantization) | | [`OpenAIChatClient`](./docs/api/microsoft.ai.foundry.local.openaichatclient.md) | Chat completions (sync + streaming) | | [`OpenAIAudioClient`](./docs/api/microsoft.ai.foundry.local.openaiaudioclient.md) | Audio transcription (sync + streaming) | +| [`LiveAudioTranscriptionSession`](./docs/api/microsoft.ai.foundry.local.openai.liveaudiotranscriptionsession.md) | Real-time audio streaming session | +| [`LiveAudioTranscriptionResponse`](./docs/api/microsoft.ai.foundry.local.openai.liveaudiotranscriptionresponse.md) | Streaming transcription result (extends `AudioCreateTranscriptionResponse`) | | [`ModelInfo`](./docs/api/microsoft.ai.foundry.local.modelinfo.md) | Full model metadata record | ## Tests diff --git a/sdk/cs/src/Detail/JsonSerializationContext.cs b/sdk/cs/src/Detail/JsonSerializationContext.cs index 9ca3f539..ea5f5c21 100644 --- a/sdk/cs/src/Detail/JsonSerializationContext.cs +++ b/sdk/cs/src/Detail/JsonSerializationContext.cs @@ -33,9 +33,11 @@ namespace Microsoft.AI.Foundry.Local.Detail; [JsonSerializable(typeof(IList))] [JsonSerializable(typeof(PropertyDefinition))] [JsonSerializable(typeof(IList))] -// --- NEW: Audio streaming types --- -[JsonSerializable(typeof(LiveAudioTranscriptionResult))] +// --- Audio streaming types --- +[JsonSerializable(typeof(LiveAudioTranscriptionResponse))] +[JsonSerializable(typeof(LiveAudioTranscriptionRaw))] [JsonSerializable(typeof(CoreErrorResponse))] +[JsonSerializable(typeof(AudioCreateTranscriptionResponse.Segment))] [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull, WriteIndented = false)] internal partial class JsonSerializationContext : JsonSerializerContext diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs index 39eb1683..453eb23f 100644 --- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs +++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs @@ -38,7 +38,7 @@ public sealed class LiveAudioTranscriptionSession : IAsyncDisposable private bool _stopped; // Output channel: native callback writes, user reads via GetTranscriptionStream - private Channel? _outputChannel; + private Channel? _outputChannel; // Internal push queue: user writes audio chunks, background loop drains to native core. // Bounded to prevent unbounded memory growth if native core is slower than real-time. @@ -103,7 +103,7 @@ public async Task StartAsync(CancellationToken ct = default) // Freeze settings _activeSettings = Settings.Snapshot(); - _outputChannel = Channel.CreateUnbounded( + _outputChannel = Channel.CreateUnbounded( new UnboundedChannelOptions { SingleWriter = true, // only the native callback writes @@ -208,7 +208,7 @@ private async Task PushLoopAsync(CancellationToken ct) { try { - var transcription = LiveAudioTranscriptionResult.FromJson(response.Data); + var transcription = LiveAudioTranscriptionResponse.FromJson(response.Data); if (!string.IsNullOrEmpty(transcription.Text)) { _outputChannel?.Writer.TryWrite(transcription); @@ -266,7 +266,7 @@ private async Task PushLoopAsync(CancellationToken ct) /// /// Cancellation token. /// Async enumerable of transcription results. - public async IAsyncEnumerable GetTranscriptionStream( + public async IAsyncEnumerable GetTranscriptionStream( [EnumeratorCancellation] CancellationToken ct = default) { if (_outputChannel == null) @@ -347,7 +347,7 @@ public async Task StopAsync(CancellationToken ct = default) { try { - var finalResult = LiveAudioTranscriptionResult.FromJson(response.Data); + var finalResult = LiveAudioTranscriptionResponse.FromJson(response.Data); if (!string.IsNullOrEmpty(finalResult.Text)) { _outputChannel?.Writer.TryWrite(finalResult); diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs index ef0f9edc..21a2c5a3 100644 --- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs +++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs @@ -2,10 +2,16 @@ namespace Microsoft.AI.Foundry.Local.OpenAI; using System.Text.Json; using System.Text.Json.Serialization; +using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels; using Microsoft.AI.Foundry.Local; using Microsoft.AI.Foundry.Local.Detail; -public record LiveAudioTranscriptionResult +/// +/// Transcription result for real-time audio streaming sessions. +/// Extends to provide a consistent +/// output format with file-based transcription, while adding streaming-specific fields. +/// +public class LiveAudioTranscriptionResponse : AudioCreateTranscriptionResponse { /// /// Whether this is a final or partial (interim) result. @@ -16,28 +22,55 @@ public record LiveAudioTranscriptionResult [JsonPropertyName("is_final")] public bool IsFinal { get; init; } - /// - /// Newly transcribed text from this audio chunk only (incremental hypothesis). - /// This is NOT the full accumulated transcript — each result contains only - /// the text decoded from the most recent audio chunk. - /// + internal static LiveAudioTranscriptionResponse FromJson(string json) + { + // Deserialize the core's JSON (which has is_final, text, start_time, end_time) + // into an intermediate record, then map to the response type. + var raw = JsonSerializer.Deserialize(json, + JsonSerializationContext.Default.LiveAudioTranscriptionRaw) + ?? throw new FoundryLocalException("Failed to deserialize live audio transcription result"); + + var response = new LiveAudioTranscriptionResponse + { + Text = raw.Text, + IsFinal = raw.IsFinal, + }; + + // Map start_time/end_time into a Segment for OpenAI-compatible output + if (raw.StartTime.HasValue || raw.EndTime.HasValue) + { + response.Segments = + [ + new Segment + { + Start = (float)(raw.StartTime ?? 0), + End = (float)(raw.EndTime ?? 0), + Text = raw.Text + } + ]; + } + + return response; + } +} + +/// +/// Internal raw deserialization target matching the Core's JSON format. +/// Mapped to in FromJson. +/// +internal record LiveAudioTranscriptionRaw +{ + [JsonPropertyName("is_final")] + public bool IsFinal { get; init; } + [JsonPropertyName("text")] public string Text { get; init; } = string.Empty; - /// Start time offset of this segment in the audio stream (seconds). [JsonPropertyName("start_time")] public double? StartTime { get; init; } - /// End time offset of this segment in the audio stream (seconds). [JsonPropertyName("end_time")] public double? EndTime { get; init; } - - internal static LiveAudioTranscriptionResult FromJson(string json) - { - return JsonSerializer.Deserialize(json, - JsonSerializationContext.Default.LiveAudioTranscriptionResult) - ?? throw new FoundryLocalException("Failed to deserialize LiveAudioTranscriptionResult"); - } } internal record CoreErrorResponse From 06dc45c4988b2cf2204ed3a2a780fa4a28a3eaa8 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Sun, 22 Mar 2026 17:20:36 -0700 Subject: [PATCH 14/22] fix nenad --- sdk/cs/src/OpenAI/AudioClient.cs | 47 ++++++------ .../OpenAI/LiveAudioTranscriptionClient.cs | 74 +++++++------------ 2 files changed, 50 insertions(+), 71 deletions(-) diff --git a/sdk/cs/src/OpenAI/AudioClient.cs b/sdk/cs/src/OpenAI/AudioClient.cs index 1986c330..cccc3ee4 100644 --- a/sdk/cs/src/OpenAI/AudioClient.cs +++ b/sdk/cs/src/OpenAI/AudioClient.cs @@ -45,16 +45,6 @@ public record AudioSettings /// public AudioSettings Settings { get; } = new(); - /// - /// Create a real-time streaming transcription session. - /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream. - /// - /// A streaming session that must be disposed when done. - public LiveAudioTranscriptionSession CreateLiveTranscriptionSession() - { - return new LiveAudioTranscriptionSession(_modelId); - } - /// /// Transcribe audio from a file. /// @@ -72,6 +62,29 @@ public async Task TranscribeAudioAsync(string .ConfigureAwait(false); } + public async IAsyncEnumerable TranscribeAudioStreamingAsync( + string audioFilePath, [EnumeratorCancellation] CancellationToken ct) + { + var enumerable = Utils.CallWithExceptionHandling( + () => TranscribeAudioStreamingImplAsync(audioFilePath, ct), + "Error during streaming audio transcription.", _logger).ConfigureAwait(false); + + await foreach (var item in enumerable) + { + yield return item; + } + } + + /// + /// Create a real-time streaming transcription session. + /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream. + /// + /// A streaming session that must be disposed when done. + public LiveAudioTranscriptionSession CreateLiveTranscriptionSession() + { + return new LiveAudioTranscriptionSession(_modelId); + } + private async Task TranscribeAudioImplAsync(string audioFilePath, CancellationToken? ct) { @@ -95,20 +108,6 @@ private async Task TranscribeAudioImplAsync(st return output; } - - public async IAsyncEnumerable TranscribeAudioStreamingAsync( - string audioFilePath, [EnumeratorCancellation] CancellationToken ct) - { - var enumerable = Utils.CallWithExceptionHandling( - () => TranscribeAudioStreamingImplAsync(audioFilePath, ct), - "Error during streaming audio transcription.", _logger).ConfigureAwait(false); - - await foreach (var item in enumerable) - { - yield return item; - } - } - private async IAsyncEnumerable TranscribeAudioStreamingImplAsync( string audioFilePath, [EnumeratorCancellation] CancellationToken ct) { diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs index 453eb23f..d2b42b8f 100644 --- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs +++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs @@ -178,13 +178,10 @@ public async ValueTask AppendAsync(ReadOnlyMemory pcmData, CancellationTok /// /// Internal loop that drains the push queue and sends chunks to native core one at a time. - /// Implements retry for transient native errors and terminates the session on permanent failures. + /// Terminates the session on any native error. /// private async Task PushLoopAsync(CancellationToken ct) { - const int maxRetries = 3; - var initialRetryDelay = TimeSpan.FromMilliseconds(50); - try { await foreach (var audioData in _pushChannel!.Reader.ReadAllAsync(ct).ConfigureAwait(false)) @@ -194,57 +191,36 @@ private async Task PushLoopAsync(CancellationToken ct) Params = new Dictionary { { "SessionHandle", _sessionHandle! } } }; - var pushed = false; - for (int attempt = 0; attempt <= maxRetries && !pushed; attempt++) + var response = _coreInterop.PushAudioData(request, audioData); + + if (response.Error != null) { - var response = _coreInterop.PushAudioData(request, audioData); + var errorInfo = CoreErrorResponse.TryParse(response.Error); + var fatalEx = new FoundryLocalException( + $"Push failed (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}", + _logger); + _logger.LogError("Terminating push loop due to push failure: {Error}", + response.Error); + _outputChannel?.Writer.TryComplete(fatalEx); + return; + } - if (response.Error == null) + // Parse transcription result from push response and surface it + if (!string.IsNullOrEmpty(response.Data)) + { + try { - pushed = true; - - // Parse transcription result from push response and surface it - if (!string.IsNullOrEmpty(response.Data)) + var transcription = LiveAudioTranscriptionResponse.FromJson(response.Data); + if (!string.IsNullOrEmpty(transcription.Text)) { - try - { - var transcription = LiveAudioTranscriptionResponse.FromJson(response.Data); - if (!string.IsNullOrEmpty(transcription.Text)) - { - _outputChannel?.Writer.TryWrite(transcription); - } - } - catch (Exception parseEx) - { - // Non-fatal: log and continue if response isn't a transcription result - _logger.LogDebug(parseEx, "Could not parse push response as transcription result"); - } + _outputChannel?.Writer.TryWrite(transcription); } - - continue; } - - // Parse structured error to determine transient vs permanent - var errorInfo = CoreErrorResponse.TryParse(response.Error); - - if (errorInfo?.IsTransient == true && attempt < maxRetries) + catch (Exception parseEx) { - var delay = initialRetryDelay * Math.Pow(2, attempt); - _logger.LogWarning( - "Transient push error (attempt {Attempt}/{Max}): {Code}. Retrying in {Delay}ms", - attempt + 1, maxRetries, errorInfo.Code, delay.TotalMilliseconds); - await Task.Delay(delay, ct).ConfigureAwait(false); - continue; + // Non-fatal: log and continue if response isn't a transcription result + _logger.LogDebug(parseEx, "Could not parse push response as transcription result"); } - - // Permanent error or retries exhausted — terminate the session - var fatalEx = new FoundryLocalException( - $"Push failed permanently (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}", - _logger); - _logger.LogError("Terminating push loop due to permanent push failure: {Error}", - response.Error); - _outputChannel?.Writer.TryComplete(fatalEx); - return; // exit push loop } } } @@ -375,6 +351,10 @@ public async Task StopAsync(CancellationToken ct = default) } } + /// + /// Dispose the streaming session. Calls if the session is still active. + /// Safe to call multiple times. + /// public async ValueTask DisposeAsync() { try From 709788ced058b0e65d14dce0aa0534e786e36ab7 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 10:14:19 -0700 Subject: [PATCH 15/22] add unitest --- .../OpenAI/LiveAudioTranscriptionClient.cs | 3 + .../src/OpenAI/LiveAudioTranscriptionTypes.cs | 2 +- .../LiveAudioTranscriptionTests.cs | 162 ++++++++++++++++++ 3 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs index d2b42b8f..4b4b6d9a 100644 --- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs +++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs @@ -151,8 +151,11 @@ public async Task StartAsync(CancellationToken ct = default) _started = true; _stopped = false; + _sessionCts?.Dispose(); _sessionCts = new CancellationTokenSource(); +#pragma warning disable IDISP013 // Await in using — Task.Run is intentionally fire-and-forget here _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None); +#pragma warning restore IDISP013 } /// diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs index 21a2c5a3..c9650232 100644 --- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs +++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs @@ -11,7 +11,7 @@ namespace Microsoft.AI.Foundry.Local.OpenAI; /// Extends to provide a consistent /// output format with file-based transcription, while adding streaming-specific fields. /// -public class LiveAudioTranscriptionResponse : AudioCreateTranscriptionResponse +public record LiveAudioTranscriptionResponse : AudioCreateTranscriptionResponse { /// /// Whether this is a final or partial (interim) result. diff --git a/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs new file mode 100644 index 00000000..ae768fe6 --- /dev/null +++ b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs @@ -0,0 +1,162 @@ +// -------------------------------------------------------------------------------------------------------------------- +// +// Copyright (c) Microsoft. All rights reserved. +// +// -------------------------------------------------------------------------------------------------------------------- + +namespace Microsoft.AI.Foundry.Local.Tests; + +using System.Text.Json; +using Microsoft.AI.Foundry.Local.Detail; +using Microsoft.AI.Foundry.Local.OpenAI; + +internal sealed class LiveAudioTranscriptionTests +{ + // --- LiveAudioTranscriptionResponse.FromJson tests --- + + [Test] + public async Task FromJson_ParsesTextAndIsFinal() + { + var json = """{"is_final":true,"text":"hello world","start_time":null,"end_time":null}"""; + + var result = LiveAudioTranscriptionResponse.FromJson(json); + + await Assert.That(result.Text).IsEqualTo("hello world"); + await Assert.That(result.IsFinal).IsTrue(); + await Assert.That(result.Segments).IsNull(); + } + + [Test] + public async Task FromJson_MapsTimingToSegments() + { + var json = """{"is_final":false,"text":"partial","start_time":1.5,"end_time":3.0}"""; + + var result = LiveAudioTranscriptionResponse.FromJson(json); + + await Assert.That(result.Text).IsEqualTo("partial"); + await Assert.That(result.IsFinal).IsFalse(); + await Assert.That(result.Segments).IsNotNull(); + await Assert.That(result.Segments!.Count).IsEqualTo(1); + await Assert.That(result.Segments[0].Start).IsEqualTo(1.5f); + await Assert.That(result.Segments[0].End).IsEqualTo(3.0f); + await Assert.That(result.Segments[0].Text).IsEqualTo("partial"); + } + + [Test] + public async Task FromJson_EmptyText_ParsesSuccessfully() + { + var json = """{"is_final":true,"text":"","start_time":null,"end_time":null}"""; + + var result = LiveAudioTranscriptionResponse.FromJson(json); + + await Assert.That(result.Text).IsEqualTo(""); + await Assert.That(result.IsFinal).IsTrue(); + } + + [Test] + public async Task FromJson_OnlyStartTime_CreatesSegment() + { + var json = """{"is_final":true,"text":"word","start_time":2.0,"end_time":null}"""; + + var result = LiveAudioTranscriptionResponse.FromJson(json); + + await Assert.That(result.Segments).IsNotNull(); + await Assert.That(result.Segments!.Count).IsEqualTo(1); + await Assert.That(result.Segments[0].Start).IsEqualTo(2.0f); + await Assert.That(result.Segments[0].End).IsEqualTo(0f); + } + + [Test] + public async Task FromJson_InvalidJson_Throws() + { + var ex = Assert.Throws(() => + LiveAudioTranscriptionResponse.FromJson("not valid json")); + await Assert.That(ex).IsNotNull(); + } + + [Test] + public async Task FromJson_InheritsFromAudioCreateTranscriptionResponse() + { + var json = """{"is_final":true,"text":"test","start_time":null,"end_time":null}"""; + + var result = LiveAudioTranscriptionResponse.FromJson(json); + + // Verify it's assignable to the base type + Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels.AudioCreateTranscriptionResponse baseRef = result; + await Assert.That(baseRef.Text).IsEqualTo("test"); + } + + // --- LiveAudioTranscriptionOptions tests --- + + [Test] + public async Task Options_DefaultValues() + { + var options = new LiveAudioTranscriptionSession.LiveAudioTranscriptionOptions(); + + await Assert.That(options.SampleRate).IsEqualTo(16000); + await Assert.That(options.Channels).IsEqualTo(1); + await Assert.That(options.Language).IsNull(); + await Assert.That(options.PushQueueCapacity).IsEqualTo(100); + } + + // --- CoreErrorResponse tests --- + + [Test] + public async Task CoreErrorResponse_TryParse_ValidJson() + { + var json = """{"code":"ASR_SESSION_NOT_FOUND","message":"Session not found","isTransient":false}"""; + + var error = CoreErrorResponse.TryParse(json); + + await Assert.That(error).IsNotNull(); + await Assert.That(error!.Code).IsEqualTo("ASR_SESSION_NOT_FOUND"); + await Assert.That(error.Message).IsEqualTo("Session not found"); + await Assert.That(error.IsTransient).IsFalse(); + } + + [Test] + public async Task CoreErrorResponse_TryParse_InvalidJson_ReturnsNull() + { + var result = CoreErrorResponse.TryParse("not json"); + await Assert.That(result).IsNull(); + } + + [Test] + public async Task CoreErrorResponse_TryParse_TransientError() + { + var json = """{"code":"BUSY","message":"Model busy","isTransient":true}"""; + + var error = CoreErrorResponse.TryParse(json); + + await Assert.That(error).IsNotNull(); + await Assert.That(error!.IsTransient).IsTrue(); + } + + // --- Session state guard tests --- + + [Test] + public async Task AppendAsync_BeforeStart_Throws() + { + var session = new LiveAudioTranscriptionSession("test-model"); + var data = new ReadOnlyMemory(new byte[100]); + + var ex = Assert.ThrowsAsync( + async () => await session.AppendAsync(data)); + await Assert.That(ex).IsNotNull(); + } + + [Test] + public async Task GetTranscriptionStream_BeforeStart_Throws() + { + var session = new LiveAudioTranscriptionSession("test-model"); + + var ex = Assert.ThrowsAsync(async () => + { + await foreach (var _ in session.GetTranscriptionStream()) + { + // should not reach here + } + }); + await Assert.That(ex).IsNotNull(); + } +} From 24aacb1ed160b676575ca98d9e992d0b1c0ac384 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 10:30:40 -0700 Subject: [PATCH 16/22] update the ci core package --- sdk/cs/src/Microsoft.AI.Foundry.Local.csproj | 4 ++-- sdk/cs/src/OpenAI/AudioClient.cs | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index 905f9652..dc600e28 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -99,8 +99,8 @@ $(FoundryLocalCoreVersion) - 0.9.0.8-rc3 - 0.9.0.8-rc3 + 0.9.0 + 0.9.0 True diff --git a/sdk/cs/src/OpenAI/AudioClient.cs b/sdk/cs/src/OpenAI/AudioClient.cs index cccc3ee4..a8cbc1d7 100644 --- a/sdk/cs/src/OpenAI/AudioClient.cs +++ b/sdk/cs/src/OpenAI/AudioClient.cs @@ -62,6 +62,15 @@ public async Task TranscribeAudioAsync(string .ConfigureAwait(false); } + /// + /// Transcribe audio from a file with streamed output. + /// + /// + /// Path to file containing audio recording. + /// Supported formats: mp3 + /// + /// Cancellation token. + /// An asynchronous enumerable of transcription responses. public async IAsyncEnumerable TranscribeAudioStreamingAsync( string audioFilePath, [EnumeratorCancellation] CancellationToken ct) { From eeb34b874e95181576c248ca8edaf162cac21e45 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 10:39:00 -0700 Subject: [PATCH 17/22] update the ci core package --- .../LiveAudioTranscriptionTests.cs | 32 +++++++++++++------ sdk/cs/test/FoundryLocal.Tests/ModelTests.cs | 4 +-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs index ae768fe6..b29ecd77 100644 --- a/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs @@ -69,7 +69,7 @@ public async Task FromJson_OnlyStartTime_CreatesSegment() [Test] public async Task FromJson_InvalidJson_Throws() { - var ex = Assert.Throws(() => + var ex = Assert.Throws(() => LiveAudioTranscriptionResponse.FromJson("not valid json")); await Assert.That(ex).IsNotNull(); } @@ -137,26 +137,40 @@ public async Task CoreErrorResponse_TryParse_TransientError() [Test] public async Task AppendAsync_BeforeStart_Throws() { - var session = new LiveAudioTranscriptionSession("test-model"); + await using var session = new LiveAudioTranscriptionSession("test-model"); var data = new ReadOnlyMemory(new byte[100]); - var ex = Assert.ThrowsAsync( - async () => await session.AppendAsync(data)); - await Assert.That(ex).IsNotNull(); + FoundryLocalException? caught = null; + try + { + await session.AppendAsync(data); + } + catch (FoundryLocalException ex) + { + caught = ex; + } + + await Assert.That(caught).IsNotNull(); } [Test] public async Task GetTranscriptionStream_BeforeStart_Throws() { - var session = new LiveAudioTranscriptionSession("test-model"); + await using var session = new LiveAudioTranscriptionSession("test-model"); - var ex = Assert.ThrowsAsync(async () => + FoundryLocalException? caught = null; + try { await foreach (var _ in session.GetTranscriptionStream()) { // should not reach here } - }); - await Assert.That(ex).IsNotNull(); + } + catch (FoundryLocalException ex) + { + caught = ex; + } + + await Assert.That(caught).IsNotNull(); } } diff --git a/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs b/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs index 0e2ea1dc..1f49560d 100644 --- a/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs @@ -51,6 +51,4 @@ public async Task GetLastestVersion_Works() var latestB = model.GetLatestVersion(variants[2]); await Assert.That(latestB).IsEqualTo(variants[1]); } -} - - +} \ No newline at end of file From 292a5bc1555b2fafa1b1e9f24b24c84867ab24d3 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 11:53:04 -0700 Subject: [PATCH 18/22] Add live audio transcription support to JS SDK --- sdk/js/script/install.cjs | 4 +- sdk/js/src/imodel.ts | 6 + sdk/js/src/index.ts | 2 + sdk/js/src/model.ts | 9 + sdk/js/src/modelVariant.ts | 9 + .../openai/liveAudioTranscriptionClient.ts | 369 ++++++++++++++++++ .../src/openai/liveAudioTranscriptionTypes.ts | 49 +++ 7 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 sdk/js/src/openai/liveAudioTranscriptionClient.ts create mode 100644 sdk/js/src/openai/liveAudioTranscriptionTypes.ts diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs index 3db771b8..a058c5f3 100644 --- a/sdk/js/script/install.cjs +++ b/sdk/js/script/install.cjs @@ -54,14 +54,14 @@ const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED; const FOUNDRY_LOCAL_CORE_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core', - version: '0.9.0.8-rc3', + version: '0.9.0', feed: ORT_NIGHTLY_FEED, nightly: useNightly } const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core.WinML', - version: '0.9.0.8-rc3', + version: '0.9.0', feed: ORT_NIGHTLY_FEED, nightly: useNightly } diff --git a/sdk/js/src/imodel.ts b/sdk/js/src/imodel.ts index be0913d6..eff742f0 100644 --- a/sdk/js/src/imodel.ts +++ b/sdk/js/src/imodel.ts @@ -1,5 +1,6 @@ import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; +import { LiveAudioTranscriptionClient } from './openai/liveAudioTranscriptionClient.js'; import { ResponsesClient } from './openai/responsesClient.js'; export interface IModel { @@ -16,6 +17,11 @@ export interface IModel { createChatClient(): ChatClient; createAudioClient(): AudioClient; + /** + * Creates a LiveAudioTranscriptionClient for real-time audio streaming ASR. + * @returns A LiveAudioTranscriptionClient instance. + */ + createLiveTranscriptionClient(): LiveAudioTranscriptionClient; /** * Creates a ResponsesClient for interacting with the model via the Responses API. * Unlike createChatClient/createAudioClient (which use FFI), the Responses API diff --git a/sdk/js/src/index.ts b/sdk/js/src/index.ts index 7d7ee17a..63f971fd 100644 --- a/sdk/js/src/index.ts +++ b/sdk/js/src/index.ts @@ -6,6 +6,8 @@ export { ModelVariant } from './modelVariant.js'; export type { IModel } from './imodel.js'; export { ChatClient, ChatClientSettings } from './openai/chatClient.js'; export { AudioClient, AudioClientSettings } from './openai/audioClient.js'; +export { LiveAudioTranscriptionClient, LiveAudioTranscriptionSettings } from './openai/liveAudioTranscriptionClient.js'; +export type { LiveAudioTranscriptionResult, CoreErrorResponse } from './openai/liveAudioTranscriptionTypes.js'; export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js'; export { ModelLoadManager } from './detail/modelLoadManager.js'; /** @internal */ diff --git a/sdk/js/src/model.ts b/sdk/js/src/model.ts index e2b37119..2ea1da01 100644 --- a/sdk/js/src/model.ts +++ b/sdk/js/src/model.ts @@ -1,6 +1,7 @@ import { ModelVariant } from './modelVariant.js'; import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; +import { LiveAudioTranscriptionClient } from './openai/liveAudioTranscriptionClient.js'; import { ResponsesClient } from './openai/responsesClient.js'; import { IModel } from './imodel.js'; @@ -159,6 +160,14 @@ export class Model implements IModel { return this.selectedVariant.createAudioClient(); } + /** + * Creates a LiveAudioTranscriptionClient for real-time audio streaming ASR. + * @returns A LiveAudioTranscriptionClient instance. + */ + public createLiveTranscriptionClient(): LiveAudioTranscriptionClient { + return this.selectedVariant.createLiveTranscriptionClient(); + } + /** * Creates a ResponsesClient for interacting with the model via the Responses API. * @param baseUrl - The base URL of the Foundry Local web service. diff --git a/sdk/js/src/modelVariant.ts b/sdk/js/src/modelVariant.ts index 4d3e2bee..c5bbf24e 100644 --- a/sdk/js/src/modelVariant.ts +++ b/sdk/js/src/modelVariant.ts @@ -3,6 +3,7 @@ import { ModelLoadManager } from './detail/modelLoadManager.js'; import { ModelInfo } from './types.js'; import { ChatClient } from './openai/chatClient.js'; import { AudioClient } from './openai/audioClient.js'; +import { LiveAudioTranscriptionClient } from './openai/liveAudioTranscriptionClient.js'; import { ResponsesClient } from './openai/responsesClient.js'; import { IModel } from './imodel.js'; @@ -129,6 +130,14 @@ export class ModelVariant implements IModel { return new AudioClient(this._modelInfo.id, this.coreInterop); } + /** + * Creates a LiveAudioTranscriptionClient for real-time audio streaming ASR. + * @returns A LiveAudioTranscriptionClient instance. + */ + public createLiveTranscriptionClient(): LiveAudioTranscriptionClient { + return new LiveAudioTranscriptionClient(this._modelInfo.id, this.coreInterop); + } + /** * Creates a ResponsesClient for interacting with the model via the Responses API. * @param baseUrl - The base URL of the Foundry Local web service. diff --git a/sdk/js/src/openai/liveAudioTranscriptionClient.ts b/sdk/js/src/openai/liveAudioTranscriptionClient.ts new file mode 100644 index 00000000..0857f840 --- /dev/null +++ b/sdk/js/src/openai/liveAudioTranscriptionClient.ts @@ -0,0 +1,369 @@ +import { CoreInterop } from '../detail/coreInterop.js'; +import { LiveAudioTranscriptionResult, tryParseCoreError } from './liveAudioTranscriptionTypes.js'; + +/** + * Audio format settings for a streaming session. + * Must be configured before calling start(). + * Settings are frozen once the session starts. + */ +export class LiveAudioTranscriptionSettings { + /** PCM sample rate in Hz. Default: 16000. */ + sampleRate: number = 16000; + /** Number of audio channels. Default: 1 (mono). */ + channels: number = 1; + /** Bits per sample. Default: 16. */ + bitsPerSample: number = 16; + /** Optional BCP-47 language hint (e.g., "en", "zh"). */ + language?: string; + /** Maximum number of audio chunks buffered in the internal push queue. Default: 100. */ + pushQueueCapacity: number = 100; + + /** @internal Create a frozen copy of these settings. */ + snapshot(): LiveAudioTranscriptionSettings { + const copy = new LiveAudioTranscriptionSettings(); + copy.sampleRate = this.sampleRate; + copy.channels = this.channels; + copy.bitsPerSample = this.bitsPerSample; + copy.language = this.language; + copy.pushQueueCapacity = this.pushQueueCapacity; + return Object.freeze(copy) as LiveAudioTranscriptionSettings; + } +} + +/** + * Internal async queue that acts like C#'s Channel. + * Supports a single consumer reading via async iteration and multiple producers writing. + * @internal + */ +class AsyncQueue { + private queue: T[] = []; + private waitingResolve: ((value: IteratorResult) => void) | null = null; + private completed = false; + private completionError: Error | null = null; + private maxCapacity: number; + private backpressureResolve: (() => void) | null = null; + + constructor(maxCapacity: number = Infinity) { + this.maxCapacity = maxCapacity; + } + + /** Push an item. If at capacity, waits until space is available. */ + async write(item: T): Promise { + if (this.completed) { + throw new Error('Cannot write to a completed queue.'); + } + + if (this.waitingResolve) { + const resolve = this.waitingResolve; + this.waitingResolve = null; + resolve({ value: item, done: false }); + return; + } + + if (this.queue.length >= this.maxCapacity) { + await new Promise((resolve) => { + this.backpressureResolve = resolve; + }); + } + + this.queue.push(item); + } + + /** Push an item synchronously (no backpressure wait). */ + tryWrite(item: T): boolean { + if (this.completed) return false; + + if (this.waitingResolve) { + const resolve = this.waitingResolve; + this.waitingResolve = null; + resolve({ value: item, done: false }); + return true; + } + + this.queue.push(item); + return true; + } + + /** Signal that no more items will be written. */ + complete(error?: Error): void { + if (this.completed) return; + this.completed = true; + this.completionError = error ?? null; + + if (this.backpressureResolve) { + this.backpressureResolve(); + this.backpressureResolve = null; + } + + if (this.waitingResolve) { + const resolve = this.waitingResolve; + this.waitingResolve = null; + resolve({ value: undefined as any, done: true }); + } + } + + get error(): Error | null { + return this.completionError; + } + + /** Async iterator for consuming items. */ + async *[Symbol.asyncIterator](): AsyncGenerator { + while (true) { + if (this.backpressureResolve && this.queue.length < this.maxCapacity) { + const resolve = this.backpressureResolve; + this.backpressureResolve = null; + resolve(); + } + + if (this.queue.length > 0) { + yield this.queue.shift()!; + continue; + } + + if (this.completed) { + if (this.completionError) { + throw this.completionError; + } + return; + } + + const result = await new Promise>((resolve) => { + this.waitingResolve = resolve; + }); + + if (result.done) { + if (this.completionError) { + throw this.completionError; + } + return; + } + + yield result.value; + } + } +} + +/** + * Client for real-time audio streaming ASR (Automatic Speech Recognition). + * Audio data from a microphone (or other source) is pushed in as PCM chunks, + * and transcription results are returned as an async iterable. + * + * Mirrors the C# LiveAudioTranscriptionSession. + */ +export class LiveAudioTranscriptionClient { + private modelId: string; + private coreInterop: CoreInterop; + + private sessionHandle: string | null = null; + private started = false; + private stopped = false; + + private outputQueue: AsyncQueue | null = null; + private pushQueue: AsyncQueue | null = null; + private pushLoopPromise: Promise | null = null; + private activeSettings: LiveAudioTranscriptionSettings | null = null; + private sessionAbortController: AbortController | null = null; + + /** + * Configuration settings for the streaming session. + * Must be configured before calling start(). Settings are frozen after start(). + */ + public settings = new LiveAudioTranscriptionSettings(); + + /** + * @internal + * Users should create clients via Model.createLiveTranscriptionClient(). + */ + constructor(modelId: string, coreInterop: CoreInterop) { + this.modelId = modelId; + this.coreInterop = coreInterop; + } + + /** + * Start a real-time audio streaming session. + * Must be called before pushAudioData() or getTranscriptionStream(). + * Settings are frozen after this call. + */ + public async start(): Promise { + if (this.started) { + throw new Error('Streaming session already started. Call stop() first.'); + } + + this.activeSettings = this.settings.snapshot(); + this.outputQueue = new AsyncQueue(); + this.pushQueue = new AsyncQueue(this.activeSettings.pushQueueCapacity); + + const params: Record = { + Model: this.modelId, + SampleRate: this.activeSettings.sampleRate.toString(), + Channels: this.activeSettings.channels.toString(), + BitsPerSample: this.activeSettings.bitsPerSample.toString(), + }; + + if (this.activeSettings.language) { + params['Language'] = this.activeSettings.language; + } + + try { + const response = this.coreInterop.executeCommand("audio_stream_start", { + Params: params + }); + + this.sessionHandle = response; + if (!this.sessionHandle) { + throw new Error('Native core did not return a session handle.'); + } + } catch (error) { + this.outputQueue.complete(); + throw new Error( + `Error starting audio stream session: ${error instanceof Error ? error.message : String(error)}`, + { cause: error } + ); + } + + this.started = true; + this.stopped = false; + + this.sessionAbortController = new AbortController(); + this.pushLoopPromise = this.pushLoop(); + } + + /** + * Push a chunk of raw PCM audio data to the streaming session. + * Can be called from any context. Chunks are internally queued + * and serialized to native core one at a time. + * + * @param pcmData - Raw PCM audio bytes matching the configured format. + */ + public async pushAudioData(pcmData: Uint8Array): Promise { + if (!this.started || this.stopped) { + throw new Error('No active streaming session. Call start() first.'); + } + + const copy = new Uint8Array(pcmData.length); + copy.set(pcmData); + + await this.pushQueue!.write(copy); + } + + /** + * Internal loop that drains the push queue and sends chunks to native core one at a time. + * Terminates the session on any native error. + * @internal + */ + private async pushLoop(): Promise { + try { + for await (const audioData of this.pushQueue!) { + if (this.sessionAbortController?.signal.aborted) { + break; + } + + try { + this.coreInterop.executeCommand("audio_stream_push", { + Params: { + SessionHandle: this.sessionHandle!, + AudioDataLength: audioData.length.toString() + } + }); + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + const errorInfo = tryParseCoreError(errorMsg); + + const fatalError = new Error( + `Push failed (code=${errorInfo?.code ?? 'UNKNOWN'}): ${errorMsg}`, + { cause: error } + ); + console.error('Terminating push loop due to push failure:', errorMsg); + this.outputQueue?.complete(fatalError); + return; + } + } + } catch (error) { + if (this.sessionAbortController?.signal.aborted) { + return; + } + const err = error instanceof Error ? error : new Error(String(error)); + console.error('Push loop terminated with unexpected error:', err.message); + this.outputQueue?.complete(new Error('Push loop terminated unexpectedly.', { cause: err })); + } + } + + /** + * Get the async iterable of transcription results. + * Results arrive as the native ASR engine processes audio data. + * + * Usage: + * ```ts + * for await (const result of client.getTranscriptionStream()) { + * console.log(result.text); + * } + * ``` + */ + public async *getTranscriptionStream(): AsyncGenerator { + if (!this.outputQueue) { + throw new Error('No active streaming session. Call start() first.'); + } + + for await (const item of this.outputQueue) { + yield item; + } + } + + /** + * Signal end-of-audio and stop the streaming session. + * Any remaining buffered audio in the push queue will be drained to native core first. + * Final results are delivered through getTranscriptionStream() before it completes. + */ + public async stop(): Promise { + if (!this.started || this.stopped) { + return; + } + + this.stopped = true; + + this.pushQueue?.complete(); + + if (this.pushLoopPromise) { + await this.pushLoopPromise; + } + + this.sessionAbortController?.abort(); + + let stopError: Error | null = null; + try { + this.coreInterop.executeCommand("audio_stream_stop", { + Params: { SessionHandle: this.sessionHandle! } + }); + } catch (error) { + stopError = error instanceof Error ? error : new Error(String(error)); + console.error('Error stopping audio stream session:', stopError.message); + } + + this.sessionHandle = null; + this.started = false; + this.sessionAbortController = null; + + this.outputQueue?.complete(); + + if (stopError) { + throw new Error( + `Error stopping audio stream session: ${stopError.message}`, + { cause: stopError } + ); + } + } + + /** + * Dispose the client and stop any active session. + * Safe to call multiple times. + */ + public async dispose(): Promise { + try { + if (this.started && !this.stopped) { + await this.stop(); + } + } catch (error) { + console.warn('Error during dispose cleanup:', error instanceof Error ? error.message : String(error)); + } + } +} diff --git a/sdk/js/src/openai/liveAudioTranscriptionTypes.ts b/sdk/js/src/openai/liveAudioTranscriptionTypes.ts new file mode 100644 index 00000000..eb521cbd --- /dev/null +++ b/sdk/js/src/openai/liveAudioTranscriptionTypes.ts @@ -0,0 +1,49 @@ +/** + * Types for real-time audio streaming transcription results and structured errors. + * Mirrors the C# LiveAudioTranscriptionResponse and CoreErrorResponse. + */ + +/** + * A transcription result from a real-time audio streaming session. + * Mirrors the C# LiveAudioTranscriptionResponse which extends AudioCreateTranscriptionResponse. + */ +export interface LiveAudioTranscriptionResult { + /** Whether this is a partial (interim) or final result for this segment. */ + is_final: boolean; + /** The transcribed text. */ + text: string; + /** Start time offset of this segment in the audio stream (seconds). */ + start_time?: number | null; + /** End time offset of this segment in the audio stream (seconds). */ + end_time?: number | null; +} + +/** + * Structured error response from native core audio streaming commands. + * @internal + */ +export interface CoreErrorResponse { + /** Machine-readable error code. */ + code: string; + /** Human-readable error message. */ + message: string; + /** Whether this error is transient and may succeed on retry. */ + isTransient: boolean; +} + +/** + * Attempt to parse a native error string as a structured CoreErrorResponse. + * Returns null if the error is not valid JSON or doesn't match the schema. + * @internal + */ +export function tryParseCoreError(errorString: string): CoreErrorResponse | null { + try { + const parsed = JSON.parse(errorString); + if (typeof parsed.code === 'string' && typeof parsed.isTransient === 'boolean') { + return parsed as CoreErrorResponse; + } + return null; + } catch { + return null; + } +} From 5287519772b5a95814ebf4017527cdab1ce19c29 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 12:16:51 -0700 Subject: [PATCH 19/22] Remove leftover sdk_v2/ directory --- .../cs/src/Microsoft.AI.Foundry.Local.csproj | 128 ------------------ sdk_v2/js/src/index.ts | 17 --- 2 files changed, 145 deletions(-) delete mode 100644 sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj delete mode 100644 sdk_v2/js/src/index.ts diff --git a/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj deleted file mode 100644 index ffc83a94..00000000 --- a/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj +++ /dev/null @@ -1,128 +0,0 @@ - - - Microsoft AI Foundry Local - Microsoft Foundry Local SDK - Microsoft - Microsoft Corporation - © Microsoft Corporation. All rights reserved. - LICENSE.txt - https://github.com/microsoft/Foundry-Local - Microsoft AI Foundry Local SDK for .NET - Microsoft AI Foundry SDK - README.md - https://github.com/microsoft/Foundry-Local - git - - net8.0 - win-x64;win-arm64;linux-x64;linux-arm64;osx-arm64 - - true - False - enable - True - True - enable - - - true - snupkg - true - - - false - win-x64;win-arm64 - - - - - $([System.DateTime]::Now.ToString("yyyyMMddHHmmss")) - 0.5.0-dev.local.$(BuildTimestamp) - - - - true - true - true - - - $(DefineConstants);IS_WINDOWS - $(DefineConstants);IS_OSX - $(DefineConstants);IS_LINUX - - - - - - - - - - - - - - - - - - - - - - - - - - - Microsoft AI Foundry Local for WinML - Microsoft Foundry Local SDK for WinML - Microsoft.AI.Foundry.Local.WinML - Microsoft.AI.Foundry.Local.WinML - net8.0-windows10.0.26100.0 - win-x64;win-arm64 - - 10.0.17763.0 - - - $(NoWarn);CsWinRT1028 - - - - - $(FoundryLocalCoreVersion) - 0.9.0-dev-20260227T230631-2a3af92 - 0.9.0-dev-20260227T222239-2a3af92 - - - True - - - True - - - - - $(NoWarn);NU1604 - - - - - - - - - - - \ No newline at end of file diff --git a/sdk_v2/js/src/index.ts b/sdk_v2/js/src/index.ts deleted file mode 100644 index 63f971fd..00000000 --- a/sdk_v2/js/src/index.ts +++ /dev/null @@ -1,17 +0,0 @@ -export { FoundryLocalManager } from './foundryLocalManager.js'; -export type { FoundryLocalConfig } from './configuration.js'; -export { Catalog } from './catalog.js'; -export { Model } from './model.js'; -export { ModelVariant } from './modelVariant.js'; -export type { IModel } from './imodel.js'; -export { ChatClient, ChatClientSettings } from './openai/chatClient.js'; -export { AudioClient, AudioClientSettings } from './openai/audioClient.js'; -export { LiveAudioTranscriptionClient, LiveAudioTranscriptionSettings } from './openai/liveAudioTranscriptionClient.js'; -export type { LiveAudioTranscriptionResult, CoreErrorResponse } from './openai/liveAudioTranscriptionTypes.js'; -export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js'; -export { ModelLoadManager } from './detail/modelLoadManager.js'; -/** @internal */ -export { CoreInterop } from './detail/coreInterop.js'; -/** @internal */ -export { Configuration } from './configuration.js'; -export * from './types.js'; From 57ce4608abe5d2adf1d7dc9f119d5ce523dfbfb8 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 12:31:18 -0700 Subject: [PATCH 20/22] Update Core version to 0.9.0 in JS install script --- sdk/js/script/install.cjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs index 3db771b8..a058c5f3 100644 --- a/sdk/js/script/install.cjs +++ b/sdk/js/script/install.cjs @@ -54,14 +54,14 @@ const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED; const FOUNDRY_LOCAL_CORE_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core', - version: '0.9.0.8-rc3', + version: '0.9.0', feed: ORT_NIGHTLY_FEED, nightly: useNightly } const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core.WinML', - version: '0.9.0.8-rc3', + version: '0.9.0', feed: ORT_NIGHTLY_FEED, nightly: useNightly } From 18389cb489a1a90bf329f64c3267242033cac5c4 Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 14:39:56 -0700 Subject: [PATCH 21/22] Update Core version to 0.9.0 in JS install script --- sdk/js/script/install.cjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs index a058c5f3..a08dccb4 100644 --- a/sdk/js/script/install.cjs +++ b/sdk/js/script/install.cjs @@ -55,14 +55,14 @@ const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED; const FOUNDRY_LOCAL_CORE_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core', version: '0.9.0', - feed: ORT_NIGHTLY_FEED, + feed: CORE_FEED, nightly: useNightly } const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core.WinML', version: '0.9.0', - feed: ORT_NIGHTLY_FEED, + feed: CORE_FEED, nightly: useNightly } From 10bbcb8a88cbae1b8960a08de5f3cd3051163fca Mon Sep 17 00:00:00 2001 From: ruiren_microsoft Date: Tue, 24 Mar 2026 19:54:45 -0700 Subject: [PATCH 22/22] update the npkg --- sdk/cs/src/Microsoft.AI.Foundry.Local.csproj | 4 ++-- sdk/js/script/install.cjs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj index dc600e28..9f203a9b 100644 --- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj +++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj @@ -99,8 +99,8 @@ $(FoundryLocalCoreVersion) - 0.9.0 - 0.9.0 + 0.9.0-dev + 0.9.0-dev True diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs index a058c5f3..600741ae 100644 --- a/sdk/js/script/install.cjs +++ b/sdk/js/script/install.cjs @@ -49,19 +49,19 @@ const ORT_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/O const ORT_NIGHTLY_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json'; // If nightly is requested, pull Core/GenAI from the ORT-Nightly feed where nightly builds are published. -// Otherwise use the standard NuGet.org feed. +// Otherwise use the ORT stable feed where release Core packages are published. const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED; const FOUNDRY_LOCAL_CORE_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core', - version: '0.9.0', + version: '0.9.0-dev', feed: ORT_NIGHTLY_FEED, nightly: useNightly } const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = { name: 'Microsoft.AI.Foundry.Local.Core.WinML', - version: '0.9.0', + version: '0.9.0-dev', feed: ORT_NIGHTLY_FEED, nightly: useNightly }