From c045bf38eb964685458e0bbf8113260b8fc73f62 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Thu, 5 Mar 2026 10:24:31 -0800
Subject: [PATCH 01/22] support audio streaming-csharp

---
 sdk_v2/cs/src/Detail/CoreInterop.cs           | 188 +++++++++
 sdk_v2/cs/src/Detail/ICoreInterop.cs          |  23 +
 .../cs/src/Detail/JsonSerializationContext.cs |   3 +
 sdk_v2/cs/src/IModel.cs                       |   7 +
 sdk_v2/cs/src/Model.cs                        |   5 +
 sdk_v2/cs/src/ModelVariant.cs                 |  17 +
 .../OpenAI/AudioStreamTranscriptionTypes.cs   |  65 +++
 sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs  | 399 ++++++++++++++++++
 .../AudioStreamingClientTests.cs              | 221 ++++++++++
 9 files changed, 928 insertions(+)
 create mode 100644 sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
 create mode 100644 sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
 create mode 100644 sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs
diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs
index 8411473b..a178bdca 100644
--- a/sdk_v2/cs/src/Detail/CoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/CoreInterop.cs
@@ -158,6 +158,28 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
                                                                       nint callbackPtr, // NativeCallbackFn pointer
                                                                       nint userData);
 
+    // --- Audio streaming P/Invoke imports ---
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamStart(
+        RequestBuffer* request,
+        ResponseBuffer* response,
+        nint callbackPtr,
+        nint userData);
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamPush(
+        StreamingRequestBuffer* request,
+        ResponseBuffer* response);
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamStop(
+        RequestBuffer* request,
+        ResponseBuffer* response);
+
     // helper to capture exceptions in callbacks
     internal class CallbackHelper
     {
@@ -331,4 +353,170 @@ public Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreIn
         return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct);
     }
 
+    // --- Audio streaming managed implementations ---
+
+    public AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback)
+    {
+        try
+        {
+            var commandInputJson = request.ToJson();
+            byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_start");
+            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
+
+            IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length);
+            Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length);
+
+            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
+            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
+
+            var reqBuf = new RequestBuffer
+            {
+                Command = commandPtr,
+                CommandLength = commandBytes.Length,
+                Data = inputPtr,
+                DataLength = inputBytes.Length
+            };
+
+            ResponseBuffer response = default;
+
+            var helper = new CallbackHelper(transcriptionCallback);
+            var funcPtr = Marshal.GetFunctionPointerForDelegate(handleCallbackDelegate);
+            var helperHandle = GCHandle.Alloc(helper);
+            var helperPtr = GCHandle.ToIntPtr(helperHandle);
+
+            try
+            {
+                unsafe
+                {
+                    CoreAudioStreamStart(&reqBuf, &response, funcPtr, helperPtr);
+                }
+            }
+            catch
+            {
+                // Free on failure — native core never saw the handle
+                helperHandle.Free();
+                throw;
+            }
+            finally
+            {
+                Marshal.FreeHGlobal(commandPtr);
+                Marshal.FreeHGlobal(inputPtr);
+            }
+
+            // Marshal response inline (matching existing ExecuteCommandImpl pattern)
+            Response result = new();
+            if (response.Data != IntPtr.Zero && response.DataLength > 0)
+            {
+                byte[] managedResponse = new byte[response.DataLength];
+                Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
+                result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
+            }
+            if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
+            {
+                result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
+            }
+            Marshal.FreeHGlobal(response.Data);
+            Marshal.FreeHGlobal(response.Error);
+
+            // Return the GCHandle alongside the response — caller is responsible for
+            // keeping it alive during the session and freeing it in StopAudioStream.
+            return new AudioStreamSession(result, helperHandle);
+        }
+        catch (Exception ex) when (ex is not OperationCanceledException)
+        {
+            throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger);
+        }
+    }
+
+    public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData)
+    {
+        try
+        {
+            var commandInputJson = request.ToJson();
+            byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_push");
+            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
+
+            IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length);
+            Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length);
+
+            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
+            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
+
+            // Pin the managed audio data so GC won't move it during the native call
+            using var audioHandle = audioData.Pin();
+
+            unsafe
+            {
+                var reqBuf = new StreamingRequestBuffer
+                {
+                    Command = commandPtr,
+                    CommandLength = commandBytes.Length,
+                    Data = inputPtr,
+                    DataLength = inputBytes.Length,
+                    BinaryData = (nint)audioHandle.Pointer,
+                    BinaryDataLength = audioData.Length
+                };
+
+                ResponseBuffer response = default;
+
+                try
+                {
+                    CoreAudioStreamPush(&reqBuf, &response);
+                }
+                finally
+                {
+                    Marshal.FreeHGlobal(commandPtr);
+                    Marshal.FreeHGlobal(inputPtr);
+                }
+
+                // Marshal response inline
+                Response result = new();
+                if (response.Data != IntPtr.Zero && response.DataLength > 0)
+                {
+                    byte[] managedResponse = new byte[response.DataLength];
+                    Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
+                    result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
+                }
+                if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
+                {
+                    result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
+                }
+                Marshal.FreeHGlobal(response.Data);
+                Marshal.FreeHGlobal(response.Error);
+
+                return result;
+            }
+        }
+        catch (Exception ex) when (ex is not OperationCanceledException)
+        {
+            throw new FoundryLocalException("Error executing audio_stream_push", ex, _logger);
+        }
+    }
+
+    public Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle)
+    {
+        try
+        {
+            var result = ExecuteCommand("audio_stream_stop", request);
+
+            // Free the GCHandle that was keeping the callback delegate alive.
+            // After this point, the native core must not invoke the callback.
+            if (callbackHandle.IsAllocated)
+            {
+                callbackHandle.Free();
+            }
+
+            return result;
+        }
+        catch (Exception ex) when (ex is not OperationCanceledException)
+        {
+            // Still free the handle on failure to avoid leaks
+            if (callbackHandle.IsAllocated)
+            {
+                callbackHandle.Free();
+            }
+            throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger);
+        }
+    }
+
 }
diff --git a/sdk_v2/cs/src/Detail/ICoreInterop.cs b/sdk_v2/cs/src/Detail/ICoreInterop.cs
index 1fff9dde..cd342ce5 100644
--- a/sdk_v2/cs/src/Detail/ICoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/ICoreInterop.cs
@@ -51,4 +51,27 @@ Task<Response> ExecuteCommandAsync(string commandName, CoreInteropRequest? comma
     Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreInteropRequest? commandInput,
                                                    CallbackFn callback,
                                                    CancellationToken? ct = null);
+
+    // --- Audio streaming session support ---
+
+    [StructLayout(LayoutKind.Sequential)]
+    protected unsafe struct StreamingRequestBuffer
+    {
+        public nint Command;
+        public int CommandLength;
+        public nint Data;          // JSON params
+        public int DataLength;
+        public nint BinaryData;    // raw PCM audio bytes
+        public int BinaryDataLength;
+    }
+
+    /// <summary>
+    /// Returned by StartAudioStream. Holds the session handle and the GCHandle
+    /// that must remain alive for the callback lifetime.
+    /// </summary>
+    internal record AudioStreamSession(Response Response, GCHandle CallbackHandle);
+
+    AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback);
+    Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData);
+    Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle);
 }
diff --git a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
index 894f9454..3cc079f3 100644
--- a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
+++ b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
@@ -33,6 +33,9 @@ namespace Microsoft.AI.Foundry.Local.Detail;
 [JsonSerializable(typeof(IList<FunctionDefinition>))]
 [JsonSerializable(typeof(PropertyDefinition))]
 [JsonSerializable(typeof(IList<PropertyDefinition>))]
+// --- NEW: Audio streaming types ---
+[JsonSerializable(typeof(AudioStreamTranscriptionResult))]
+[JsonSerializable(typeof(CoreErrorResponse))]
 [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
                              WriteIndented = false)]
 internal partial class JsonSerializationContext : JsonSerializerContext
diff --git a/sdk_v2/cs/src/IModel.cs b/sdk_v2/cs/src/IModel.cs
index c3acba61..20eca014 100644
--- a/sdk_v2/cs/src/IModel.cs
+++ b/sdk_v2/cs/src/IModel.cs
@@ -67,4 +67,11 @@ Task DownloadAsync(Action<float>? downloadProgress = null,
     /// <param name="ct">Optional cancellation token.</param>
     /// <returns>OpenAI.AudioClient</returns>
     Task<OpenAIAudioClient> GetAudioClientAsync(CancellationToken? ct = null);
+
+    /// <summary>
+    /// Get a real-time audio streaming client for ASR.
+    /// </summary>
+    /// <param name="ct">Optional cancellation token.</param>
+    /// <returns>OpenAIAudioStreamingClient for real-time transcription.</returns>
+    Task<OpenAIAudioStreamingClient> GetAudioStreamingClientAsync(CancellationToken? ct = null);
 }
diff --git a/sdk_v2/cs/src/Model.cs b/sdk_v2/cs/src/Model.cs
index 83bcef69..ffe8bb1c 100644
--- a/sdk_v2/cs/src/Model.cs
+++ b/sdk_v2/cs/src/Model.cs
@@ -114,6 +114,11 @@ public async Task<OpenAIAudioClient> GetAudioClientAsync(CancellationToken? ct =
         return await SelectedVariant.GetAudioClientAsync(ct).ConfigureAwait(false);
     }
 
+    public async Task<OpenAIAudioStreamingClient> GetAudioStreamingClientAsync(CancellationToken? ct = null)
+    {
+        return await SelectedVariant.GetAudioStreamingClientAsync(ct).ConfigureAwait(false);
+    }
+
     public async Task UnloadAsync(CancellationToken? ct = null)
     {
         await SelectedVariant.UnloadAsync(ct).ConfigureAwait(false);
diff --git a/sdk_v2/cs/src/ModelVariant.cs b/sdk_v2/cs/src/ModelVariant.cs
index 6ca7cda7..d5285c1c 100644
--- a/sdk_v2/cs/src/ModelVariant.cs
+++ b/sdk_v2/cs/src/ModelVariant.cs
@@ -190,4 +190,21 @@ private async Task<OpenAIAudioClient> GetAudioClientImplAsync(CancellationToken?
 
         return new OpenAIAudioClient(Id);
     }
+
+    public async Task<OpenAIAudioStreamingClient> GetAudioStreamingClientAsync(CancellationToken? ct = null)
+    {
+        return await Utils.CallWithExceptionHandling(() => GetAudioStreamingClientImplAsync(ct),
+                                                     "Error getting audio streaming client for model", _logger)
+                                                    .ConfigureAwait(false);
+    }
+
+    private async Task<OpenAIAudioStreamingClient> GetAudioStreamingClientImplAsync(CancellationToken? ct = null)
+    {
+        if (!await IsLoadedAsync(ct))
+        {
+            throw new FoundryLocalException($"Model {Id} is not loaded. Call LoadAsync first.");
+        }
+
+        return new OpenAIAudioStreamingClient(Id);
+    }
 }
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
new file mode 100644
index 00000000..7736cb47
--- /dev/null
+++ b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
@@ -0,0 +1,65 @@
+namespace Microsoft.AI.Foundry.Local;
+
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Microsoft.AI.Foundry.Local.Detail;
+
+public record AudioStreamTranscriptionResult
+{
+    /// <summary>Whether this is a partial (interim) or final result for this segment.</summary>
+    [JsonPropertyName("is_final")]
+    public bool IsFinal { get; init; }
+
+    /// <summary>The transcribed text.</summary>
+    [JsonPropertyName("text")]
+    public string Text { get; init; } = string.Empty;
+
+    /// <summary>Start time offset of this segment in the audio stream (seconds).</summary>
+    [JsonPropertyName("start_time")]
+    public double? StartTime { get; init; }
+
+    /// <summary>End time offset of this segment in the audio stream (seconds).</summary>
+    [JsonPropertyName("end_time")]
+    public double? EndTime { get; init; }
+
+    /// <summary>Confidence score (0.0 - 1.0) if available.</summary>
+    [JsonPropertyName("confidence")]
+    public float? Confidence { get; init; }
+
+    internal static AudioStreamTranscriptionResult FromJson(string json)
+    {
+        return JsonSerializer.Deserialize(json,
+            JsonSerializationContext.Default.AudioStreamTranscriptionResult)
+            ?? throw new FoundryLocalException("Failed to deserialize AudioStreamTranscriptionResult");
+    }
+}
+
+internal record CoreErrorResponse
+{
+    [JsonPropertyName("code")]
+    public string Code { get; init; } = "";
+
+    [JsonPropertyName("message")]
+    public string Message { get; init; } = "";
+
+    [JsonPropertyName("isTransient")]
+    public bool IsTransient { get; init; }
+
+    /// <summary>
+    /// Attempt to parse a native error string as structured JSON.
+    /// Returns null if the error is not valid JSON or doesn't match the schema,
+    /// which should be treated as a permanent/unknown error.
+    /// </summary>
+    internal static CoreErrorResponse? TryParse(string errorString)
+    {
+        try
+        {
+            return JsonSerializer.Deserialize(errorString,
+                JsonSerializationContext.Default.CoreErrorResponse);
+        }
+        catch
+        {
+            return null; // unstructured error — treat as permanent
+        }
+    }
+}
\ No newline at end of file
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
new file mode 100644
index 00000000..27e1bbea
--- /dev/null
+++ b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
@@ -0,0 +1,399 @@
+// --------------------------------------------------------------------------------------------------------------------
+// <copyright company="Microsoft">
+//   Copyright (c) Microsoft. All rights reserved.
+// </copyright>
+// --------------------------------------------------------------------------------------------------------------------
+
+namespace Microsoft.AI.Foundry.Local;
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Globalization;
+using System.Threading.Channels;
+using Microsoft.AI.Foundry.Local.Detail;
+using Microsoft.Extensions.Logging;
+
+
+/// <summary>
+/// Client for real-time audio streaming ASR (Automatic Speech Recognition).
+/// Audio data from a microphone (or other source) is pushed in as PCM chunks,
+/// and partial transcription results are returned as an async stream.
+///
+/// Thread safety: PushAudioDataAsync can be called from any thread (including high-frequency
+/// audio callbacks). Pushes are internally serialized via a bounded channel to prevent
+/// unbounded memory growth and ensure ordering.
+/// </summary>
+
+
+public sealed class OpenAIAudioStreamingClient : IAsyncDisposable
+{
+    private readonly string _modelId;
+    private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop;
+    private readonly ILogger _logger = FoundryLocalManager.Instance.Logger;
+
+    // Session state — protected by _lock
+    private readonly AsyncLock _lock = new();
+    private string? _sessionHandle;
+    private GCHandle _callbackHandle;
+    private bool _started;
+    private bool _stopped;
+
+    // Output channel: native callback writes, user reads via GetTranscriptionStream
+    private Channel<AudioStreamTranscriptionResult>? _outputChannel;
+
+    // Internal push queue: user writes audio chunks, background loop drains to native core.
+    // Bounded to prevent unbounded memory growth if native core is slower than real-time.
+    private Channel<ReadOnlyMemory<byte>>? _pushChannel;
+    private Task? _pushLoopTask;
+
+    // Dedicated CTS for the push loop — decoupled from StartAsync's caller token.
+    // Cancelled only during StopAsync/DisposeAsync to allow clean drain.
+    private CancellationTokenSource? _sessionCts;
+
+    // Stored as a field so the delegate is not garbage collected while native core holds a reference.
+    private ICoreInterop.CallbackFn? _transcriptionCallback;
+
+    // Snapshot of settings captured at StartAsync — prevents mutation after session starts.
+    private StreamingAudioSettings? _activeSettings;
+
+    /// <summary>
+    /// Audio format settings for the streaming session.
+    /// Must be configured before calling <see cref="StartAsync"/>.
+    /// Settings are frozen once the session starts.
+    /// </summary>
+    public record StreamingAudioSettings
+    {
+        /// <summary>PCM sample rate in Hz. Default: 16000.</summary>
+        public int SampleRate { get; set; } = 16000;
+
+        /// <summary>Number of audio channels. Default: 1 (mono).</summary>
+        public int Channels { get; set; } = 1;
+
+        /// <summary>Bits per sample. Default: 16.</summary>
+        public int BitsPerSample { get; set; } = 16;
+
+        /// <summary>Optional BCP-47 language hint (e.g., "en", "zh").</summary>
+        public string? Language { get; set; }
+
+        /// <summary>
+        /// Maximum number of audio chunks buffered in the internal push queue.
+        /// If the queue is full, PushAudioDataAsync will asynchronously wait.
+        /// Default: 100 (~3 seconds of audio at typical chunk sizes).
+        /// </summary>
+        public int PushQueueCapacity { get; set; } = 100;
+
+        internal StreamingAudioSettings Snapshot() => this with { }; // record copy
+    }
+
+    public StreamingAudioSettings Settings { get; } = new();
+
+    internal OpenAIAudioStreamingClient(string modelId)
+    {
+        _modelId = modelId;
+    }
+
+    /// <summary>
+    /// Start a real-time audio streaming session.
+    /// Must be called before <see cref="PushAudioDataAsync"/> or <see cref="GetTranscriptionStream"/>.
+    /// Settings are frozen after this call.
+    /// </summary>
+    /// <param name="ct">Cancellation token.</param>
+    public async Task StartAsync(CancellationToken ct = default)
+    {
+        using var disposable = await _lock.LockAsync().ConfigureAwait(false);
+
+        if (_started)
+        {
+            throw new FoundryLocalException("Streaming session already started. Call StopAsync first.");
+        }
+
+        // Freeze settings
+        _activeSettings = Settings.Snapshot();
+
+        _outputChannel = Channel.CreateUnbounded<AudioStreamTranscriptionResult>(
+            new UnboundedChannelOptions
+            {
+                SingleWriter = true,  // only the native callback writes
+                SingleReader = true,
+                AllowSynchronousContinuations = true
+            });
+
+        _pushChannel = Channel.CreateBounded<ReadOnlyMemory<byte>>(
+            new BoundedChannelOptions(_activeSettings.PushQueueCapacity)
+            {
+                SingleReader = true,   // only the push loop reads
+                SingleWriter = false,  // multiple threads may push audio data
+                FullMode = BoundedChannelFullMode.Wait
+            });
+
+        var request = new CoreInteropRequest
+        {
+            Params = new Dictionary<string, string>
+            {
+                { "Model", _modelId },
+                { "SampleRate", _activeSettings.SampleRate.ToString(CultureInfo.InvariantCulture) },
+                { "Channels", _activeSettings.Channels.ToString(CultureInfo.InvariantCulture) },
+                { "BitsPerSample", _activeSettings.BitsPerSample.ToString(CultureInfo.InvariantCulture) },
+            }
+        };
+
+        if (_activeSettings.Language != null)
+        {
+            request.Params["Language"] = _activeSettings.Language;
+        }
+
+        // Store the callback as a field so the delegate is rooted for the session lifetime.
+        _transcriptionCallback = (callbackData) =>
+        {
+            try
+            {
+                var result = AudioStreamTranscriptionResult.FromJson(callbackData);
+                // TryWrite always succeeds on unbounded channels
+                _outputChannel.Writer.TryWrite(result);
+            }
+            catch (Exception ex)
+            {
+                _logger.LogError(ex, "Error processing audio stream transcription callback");
+                _outputChannel.Writer.TryComplete(
+                    new FoundryLocalException("Error processing audio streaming callback.", ex, _logger));
+            }
+        };
+
+        // StartAudioStream is synchronous (P/Invoke) — run on thread pool
+        var session = await Task.Run(
+            () => _coreInterop.StartAudioStream(request, _transcriptionCallback), ct)
+            .ConfigureAwait(false);
+
+        if (session.Response.Error != null)
+        {
+            // Free handle on failure
+            if (session.CallbackHandle.IsAllocated)
+            {
+                session.CallbackHandle.Free();
+            }
+            _outputChannel.Writer.TryComplete();
+            throw new FoundryLocalException(
+                $"Error starting audio stream session: {session.Response.Error}", _logger);
+        }
+
+        _sessionHandle = session.Response.Data
+            ?? throw new FoundryLocalException("Native core did not return a session handle.", _logger);
+        _callbackHandle = session.CallbackHandle;
+        _started = true;
+        _stopped = false;
+
+        // Use a dedicated CTS for the push loop — NOT the caller's ct.
+#pragma warning disable IDISP003 // Dispose previous before re-assigning
+        _sessionCts = new CancellationTokenSource();
+#pragma warning restore IDISP003
+#pragma warning disable IDISP013 // Await in using
+        _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None);
+#pragma warning restore IDISP013
+    }
+
+    /// <summary>
+    /// Push a chunk of raw PCM audio data to the streaming session.
+    /// Can be called from any thread (including audio device callbacks).
+    /// Chunks are internally queued and serialized to the native core.
+    /// </summary>
+    /// <param name="pcmData">Raw PCM audio bytes matching the configured format.</param>
+    /// <param name="ct">Cancellation token.</param>
+    public async ValueTask PushAudioDataAsync(ReadOnlyMemory<byte> pcmData, CancellationToken ct = default)
+    {
+        if (!_started || _stopped)
+        {
+            throw new FoundryLocalException("No active streaming session. Call StartAsync first.");
+        }
+
+        // Copy the data to avoid issues if the caller reuses the buffer (e.g. NAudio reuses e.Buffer)
+        var copy = new byte[pcmData.Length];
+        pcmData.CopyTo(copy);
+
+        await _pushChannel!.Writer.WriteAsync(copy, ct).ConfigureAwait(false);
+    }
+
+    /// <summary>
+    /// Internal loop that drains the push queue and sends chunks to native core one at a time.
+    /// Implements retry for transient native errors and terminates the session on permanent failures.
+    /// </summary>
+    private async Task PushLoopAsync(CancellationToken ct)
+    {
+        const int maxRetries = 3;
+        var initialRetryDelay = TimeSpan.FromMilliseconds(50);
+
+        try
+        {
+            await foreach (var audioData in _pushChannel!.Reader.ReadAllAsync(ct).ConfigureAwait(false))
+            {
+                var request = new CoreInteropRequest
+                {
+                    Params = new Dictionary<string, string> { { "SessionHandle", _sessionHandle! } }
+                };
+
+                var pushed = false;
+                for (int attempt = 0; attempt <= maxRetries && !pushed; attempt++)
+                {
+                    var response = _coreInterop.PushAudioData(request, audioData);
+
+                    if (response.Error == null)
+                    {
+                        pushed = true;
+                        continue;
+                    }
+
+                    // Parse structured error to determine transient vs permanent
+                    var errorInfo = CoreErrorResponse.TryParse(response.Error);
+
+                    if (errorInfo?.IsTransient == true && attempt < maxRetries)
+                    {
+                        var delay = initialRetryDelay * Math.Pow(2, attempt);
+                        _logger.LogWarning(
+                            "Transient push error (attempt {Attempt}/{Max}): {Code}. Retrying in {Delay}ms",
+                            attempt + 1, maxRetries, errorInfo.Code, delay.TotalMilliseconds);
+                        await Task.Delay(delay, ct).ConfigureAwait(false);
+                        continue;
+                    }
+
+                    // Permanent error or retries exhausted — terminate the session
+                    var fatalEx = new FoundryLocalException(
+                        $"Push failed permanently (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}",
+                        _logger);
+                    _logger.LogError("Terminating push loop due to permanent push failure: {Error}",
+                                     response.Error);
+                    _outputChannel?.Writer.TryComplete(fatalEx);
+                    return; // exit push loop
+                }
+            }
+        }
+        catch (OperationCanceledException)
+        {
+            // Expected on cancellation — push loop exits cleanly
+        }
+        catch (Exception ex)
+        {
+            _logger.LogError(ex, "Push loop terminated with unexpected error");
+            _outputChannel?.Writer.TryComplete(
+                new FoundryLocalException("Push loop terminated unexpectedly.", ex, _logger));
+        }
+    }
+
+    /// <summary>
+    /// Get the async stream of transcription results.
+    /// Results arrive as the native ASR engine processes audio data.
+    /// </summary>
+    /// <param name="ct">Cancellation token.</param>
+    /// <returns>Async enumerable of transcription results.</returns>
+    public async IAsyncEnumerable<AudioStreamTranscriptionResult> GetTranscriptionStream(
+        [EnumeratorCancellation] CancellationToken ct = default)
+    {
+        if (_outputChannel == null)
+        {
+            throw new FoundryLocalException("No active streaming session. Call StartAsync first.");
+        }
+
+        await foreach (var item in _outputChannel.Reader.ReadAllAsync(ct).ConfigureAwait(false))
+        {
+            yield return item;
+        }
+    }
+
+    /// <summary>
+    /// Signal end-of-audio and stop the streaming session.
+    /// Any remaining buffered audio in the push queue will be drained to native core first.
+    /// Final results are delivered through <see cref="GetTranscriptionStream"/> before it completes.
+    /// </summary>
+    /// <param name="ct">Cancellation token.</param>
+    public async Task StopAsync(CancellationToken ct = default)
+    {
+        using var disposable = await _lock.LockAsync().ConfigureAwait(false);
+
+        if (!_started || _stopped)
+        {
+            return; // already stopped or never started
+        }
+
+        _stopped = true;
+
+        // 1. Complete the push channel so the push loop drains remaining items and exits
+        _pushChannel?.Writer.TryComplete();
+
+        // 2. Wait for the push loop to finish draining
+        if (_pushLoopTask != null)
+        {
+            await _pushLoopTask.ConfigureAwait(false);
+        }
+
+        // 3. Cancel the session CTS (no-op if push loop already exited)
+        _sessionCts?.Cancel();
+
+        // 4. Tell native core to flush and finalize.
+        //    This MUST happen even if ct is cancelled — otherwise native session leaks.
+        var request = new CoreInteropRequest
+        {
+            Params = new Dictionary<string, string> { { "SessionHandle", _sessionHandle! } }
+        };
+
+        ICoreInterop.Response? response = null;
+        try
+        {
+            response = await Task.Run(
+                () => _coreInterop.StopAudioStream(request, _callbackHandle), ct)
+                .ConfigureAwait(false);
+        }
+        catch (OperationCanceledException) when (ct.IsCancellationRequested)
+        {
+            // ct fired, but we MUST still stop the native session to avoid a leak.
+            _logger.LogWarning("StopAsync cancelled — performing best-effort native session stop.");
+            try
+            {
+                response = await Task.Run(
+                    () => _coreInterop.StopAudioStream(request, _callbackHandle))
+                    .ConfigureAwait(false);
+            }
+            catch (Exception cleanupEx)
+            {
+                _logger.LogError(cleanupEx, "Best-effort native session stop failed.");
+            }
+
+            throw; // Re-throw the cancellation after cleanup
+        }
+        finally
+        {
+            _sessionHandle = null;
+            _transcriptionCallback = null;
+            _started = false;
+            _sessionCts?.Dispose();
+            _sessionCts = null;
+
+            // 5. Complete the output channel AFTER StopAudioStream returns
+            _outputChannel?.Writer.TryComplete();
+        }
+
+        if (response?.Error != null)
+        {
+            throw new FoundryLocalException(
+                $"Error stopping audio stream session: {response.Error}", _logger);
+        }
+    }
+
+    public async ValueTask DisposeAsync()
+    {
+        try
+        {
+            if (_started && !_stopped)
+            {
+                await StopAsync().ConfigureAwait(false);
+            }
+        }
+        catch (Exception ex)
+        {
+            // DisposeAsync must never throw — log and swallow
+            _logger.LogWarning(ex, "Error during DisposeAsync cleanup.");
+        }
+        finally
+        {
+            _sessionCts?.Dispose();
+            _lock.Dispose();
+        }
+    }
+}
\ No newline at end of file
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs
new file mode 100644
index 00000000..3a0e2ef7
--- /dev/null
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs
@@ -0,0 +1,221 @@
+// --------------------------------------------------------------------------------------------------------------------
+// <copyright company="Microsoft">
+//   Copyright (c) Microsoft. All rights reserved.
+// </copyright>
+// --------------------------------------------------------------------------------------------------------------------
+
+namespace Microsoft.AI.Foundry.Local.Tests;
+
+using System.Threading.Tasks;
+using Microsoft.AI.Foundry.Local.Detail;
+
+/// <summary>
+/// Unit tests for audio streaming types and settings.
+/// These test the serialization, deserialization, and settings behavior
+/// without requiring the native library or a loaded model.
+/// </summary>
+internal sealed class AudioStreamingClientTests
+{
+    // --- AudioStreamTranscriptionResult deserialization tests ---
+
+    [Test]
+    public async Task AudioStreamTranscriptionResult_FromJson_FinalResult_AllFields()
+    {
+        var json = """{"text":"hello world","is_final":true,"start_time":0.0,"end_time":1.5,"confidence":0.95}""";
+
+        var result = AudioStreamTranscriptionResult.FromJson(json);
+
+        await Assert.That(result).IsNotNull();
+        await Assert.That(result.Text).IsEqualTo("hello world");
+        await Assert.That(result.IsFinal).IsTrue();
+        await Assert.That(result.StartTime).IsEqualTo(0.0);
+        await Assert.That(result.EndTime).IsEqualTo(1.5);
+        await Assert.That(result.Confidence).IsEqualTo(0.95f);
+    }
+
+    [Test]
+    public async Task AudioStreamTranscriptionResult_FromJson_PartialResult_OptionalFieldsNull()
+    {
+        var json = """{"text":"hel","is_final":false}""";
+
+        var result = AudioStreamTranscriptionResult.FromJson(json);
+
+        await Assert.That(result).IsNotNull();
+        await Assert.That(result.Text).IsEqualTo("hel");
+        await Assert.That(result.IsFinal).IsFalse();
+        await Assert.That(result.StartTime).IsNull();
+        await Assert.That(result.EndTime).IsNull();
+        await Assert.That(result.Confidence).IsNull();
+    }
+
+    [Test]
+    public async Task AudioStreamTranscriptionResult_FromJson_EmptyText()
+    {
+        var json = """{"text":"","is_final":false}""";
+
+        var result = AudioStreamTranscriptionResult.FromJson(json);
+
+        await Assert.That(result).IsNotNull();
+        await Assert.That(result.Text).IsEqualTo(string.Empty);
+        await Assert.That(result.IsFinal).IsFalse();
+    }
+
+    [Test]
+    public async Task AudioStreamTranscriptionResult_FromJson_InvalidJson_Throws()
+    {
+        FoundryLocalException? caught = null;
+        try
+        {
+            AudioStreamTranscriptionResult.FromJson("not valid json");
+        }
+        catch (FoundryLocalException ex)
+        {
+            caught = ex;
+        }
+        catch (System.Text.Json.JsonException)
+        {
+            // Also acceptable — JsonSerializer may throw before our wrapper
+            caught = new FoundryLocalException("json parse error");
+        }
+
+        await Assert.That(caught).IsNotNull();
+    }
+
+    [Test]
+    public async Task AudioStreamTranscriptionResult_FromJson_EmptyJson_Throws()
+    {
+        FoundryLocalException? caught = null;
+        try
+        {
+            AudioStreamTranscriptionResult.FromJson("");
+        }
+        catch (FoundryLocalException ex)
+        {
+            caught = ex;
+        }
+        catch (System.Text.Json.JsonException)
+        {
+            caught = new FoundryLocalException("json parse error");
+        }
+
+        await Assert.That(caught).IsNotNull();
+    }
+
+    // --- CoreErrorResponse parsing tests ---
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_TransientError_Succeeds()
+    {
+        var json = """{"code":"ASR_BACKEND_OVERLOADED","message":"try again later","isTransient":true}""";
+
+        var error = CoreErrorResponse.TryParse(json);
+
+        await Assert.That(error).IsNotNull();
+        await Assert.That(error!.Code).IsEqualTo("ASR_BACKEND_OVERLOADED");
+        await Assert.That(error.Message).IsEqualTo("try again later");
+        await Assert.That(error.IsTransient).IsTrue();
+    }
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_PermanentError_Succeeds()
+    {
+        var json = """{"code":"ASR_SESSION_NOT_FOUND","message":"session gone","isTransient":false}""";
+
+        var error = CoreErrorResponse.TryParse(json);
+
+        await Assert.That(error).IsNotNull();
+        await Assert.That(error!.Code).IsEqualTo("ASR_SESSION_NOT_FOUND");
+        await Assert.That(error.IsTransient).IsFalse();
+    }
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_InvalidJson_ReturnsNull()
+    {
+        var error = CoreErrorResponse.TryParse("not json at all");
+
+        await Assert.That(error).IsNull();
+    }
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_EmptyString_ReturnsNull()
+    {
+        var error = CoreErrorResponse.TryParse("");
+
+        await Assert.That(error).IsNull();
+    }
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_ValidJsonWrongShape_ReturnsDefaultValues()
+    {
+        // Valid JSON but no matching fields — should deserialize with defaults
+        var json = """{"unrelated":"field"}""";
+
+        var error = CoreErrorResponse.TryParse(json);
+
+        await Assert.That(error).IsNotNull();
+        await Assert.That(error!.Code).IsEqualTo("");
+        await Assert.That(error.IsTransient).IsFalse();
+    }
+
+    // --- StreamingAudioSettings tests ---
+
+    [Test]
+    public async Task StreamingAudioSettings_Defaults_AreCorrect()
+    {
+        var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings();
+
+        await Assert.That(settings.SampleRate).IsEqualTo(16000);
+        await Assert.That(settings.Channels).IsEqualTo(1);
+        await Assert.That(settings.BitsPerSample).IsEqualTo(16);
+        await Assert.That(settings.Language).IsNull();
+        await Assert.That(settings.PushQueueCapacity).IsEqualTo(100);
+    }
+
+    [Test]
+    public async Task StreamingAudioSettings_Snapshot_IsIndependentCopy()
+    {
+        var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings
+        {
+            SampleRate = 44100,
+            Channels = 2,
+            BitsPerSample = 32,
+            Language = "zh",
+            PushQueueCapacity = 50
+        };
+
+        var snapshot = settings.Snapshot();
+
+        // Modify original after snapshot
+        settings.SampleRate = 8000;
+        settings.Channels = 1;
+        settings.Language = "fr";
+        settings.PushQueueCapacity = 200;
+
+        // Snapshot should retain original values
+        await Assert.That(snapshot.SampleRate).IsEqualTo(44100);
+        await Assert.That(snapshot.Channels).IsEqualTo(2);
+        await Assert.That(snapshot.BitsPerSample).IsEqualTo(32);
+        await Assert.That(snapshot.Language).IsEqualTo("zh");
+        await Assert.That(snapshot.PushQueueCapacity).IsEqualTo(50);
+    }
+
+    [Test]
+    public async Task StreamingAudioSettings_Snapshot_DoesNotAffectOriginal()
+    {
+        var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings
+        {
+            SampleRate = 16000,
+            Language = "en"
+        };
+
+        var snapshot = settings.Snapshot();
+
+        // Modify snapshot
+        snapshot.SampleRate = 48000;
+        snapshot.Language = "de";
+
+        // Original should be unaffected
+        await Assert.That(settings.SampleRate).IsEqualTo(16000);
+        await Assert.That(settings.Language).IsEqualTo("en");
+    }
+}

From 9a1578c54802ba0094eba54766cc6ecf50a4b1af Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Thu, 5 Mar 2026 11:21:13 -0800
Subject: [PATCH 02/22] support audio streaming-js

---
 sdk_v2/js/src/imodel.ts                      |   8 +
 sdk_v2/js/src/index.ts                       |   2 +
 sdk_v2/js/src/model.ts                       |   9 +
 sdk_v2/js/src/modelVariant.ts                |   9 +
 sdk_v2/js/src/openai/audioStreamingClient.ts | 440 +++++++++++++++++++
 sdk_v2/js/src/openai/audioStreamingTypes.ts  |  52 +++
 6 files changed, 520 insertions(+)
 create mode 100644 sdk_v2/js/src/openai/audioStreamingClient.ts
 create mode 100644 sdk_v2/js/src/openai/audioStreamingTypes.ts

diff --git a/sdk_v2/js/src/imodel.ts b/sdk_v2/js/src/imodel.ts
index 5797ce3b..7268fa1d 100644
--- a/sdk_v2/js/src/imodel.ts
+++ b/sdk_v2/js/src/imodel.ts
@@ -1,5 +1,6 @@
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { AudioStreamingClient } from './openai/audioStreamingClient.js';
 
 export interface IModel {
     get id(): string;
@@ -15,4 +16,11 @@ export interface IModel {
 
     createChatClient(): ChatClient;
     createAudioClient(): AudioClient;
+
+    /**
+     * Creates an AudioStreamingClient for real-time audio streaming ASR.
+     * The model must be loaded before calling this method.
+     * @returns An AudioStreamingClient instance.
+     */
+    createAudioStreamingClient(): AudioStreamingClient;
 }
diff --git a/sdk_v2/js/src/index.ts b/sdk_v2/js/src/index.ts
index 1af50af8..4061084e 100644
--- a/sdk_v2/js/src/index.ts
+++ b/sdk_v2/js/src/index.ts
@@ -6,6 +6,8 @@ export { ModelVariant } from './modelVariant.js';
 export type { IModel } from './imodel.js';
 export { ChatClient, ChatClientSettings } from './openai/chatClient.js';
 export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
+export { AudioStreamingClient, StreamingAudioSettings } from './openai/audioStreamingClient.js';
+export type { AudioStreamTranscriptionResult, CoreErrorResponse } from './openai/audioStreamingTypes.js';
 export { ModelLoadManager } from './detail/modelLoadManager.js';
 /** @internal */
 export { CoreInterop } from './detail/coreInterop.js';
diff --git a/sdk_v2/js/src/model.ts b/sdk_v2/js/src/model.ts
index c2848524..679de121 100644
--- a/sdk_v2/js/src/model.ts
+++ b/sdk_v2/js/src/model.ts
@@ -1,6 +1,7 @@
 import { ModelVariant } from './modelVariant.js';
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { AudioStreamingClient } from './openai/audioStreamingClient.js';
 import { IModel } from './imodel.js';
 
 /**
@@ -146,4 +147,12 @@ export class Model implements IModel {
     public createAudioClient(): AudioClient {
         return this.selectedVariant.createAudioClient();
     }
+
+    /**
+     * Creates an AudioStreamingClient for real-time audio streaming ASR.
+     * @returns An AudioStreamingClient instance.
+     */
+    public createAudioStreamingClient(): AudioStreamingClient {
+        return this.selectedVariant.createAudioStreamingClient();
+    }
 }
diff --git a/sdk_v2/js/src/modelVariant.ts b/sdk_v2/js/src/modelVariant.ts
index 7c8b8023..b69f0a45 100644
--- a/sdk_v2/js/src/modelVariant.ts
+++ b/sdk_v2/js/src/modelVariant.ts
@@ -3,6 +3,7 @@ import { ModelLoadManager } from './detail/modelLoadManager.js';
 import { ModelInfo } from './types.js';
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { AudioStreamingClient } from './openai/audioStreamingClient.js';
 import { IModel } from './imodel.js';
 
 /**
@@ -127,4 +128,12 @@ export class ModelVariant implements IModel {
     public createAudioClient(): AudioClient {
         return new AudioClient(this._modelInfo.id, this.coreInterop);
     }
+
+    /**
+     * Creates an AudioStreamingClient for real-time audio streaming ASR.
+     * @returns An AudioStreamingClient instance.
+     */
+    public createAudioStreamingClient(): AudioStreamingClient {
+        return new AudioStreamingClient(this._modelInfo.id, this.coreInterop);
+    }
 }
diff --git a/sdk_v2/js/src/openai/audioStreamingClient.ts b/sdk_v2/js/src/openai/audioStreamingClient.ts
new file mode 100644
index 00000000..f8213161
--- /dev/null
+++ b/sdk_v2/js/src/openai/audioStreamingClient.ts
@@ -0,0 +1,440 @@
+import { CoreInterop } from '../detail/coreInterop.js';
+import { AudioStreamTranscriptionResult, tryParseCoreError } from './audioStreamingTypes.js';
+
+/**
+ * Audio format settings for a streaming session.
+ * Must be configured before calling start().
+ * Settings are frozen once the session starts.
+ */
+export class StreamingAudioSettings {
+    /** PCM sample rate in Hz. Default: 16000. */
+    sampleRate: number = 16000;
+    /** Number of audio channels. Default: 1 (mono). */
+    channels: number = 1;
+    /** Bits per sample. Default: 16. */
+    bitsPerSample: number = 16;
+    /** Optional BCP-47 language hint (e.g., "en", "zh"). */
+    language?: string;
+    /** Maximum number of audio chunks buffered in the internal push queue. Default: 100. */
+    pushQueueCapacity: number = 100;
+
+    /** @internal Create a frozen copy of these settings. */
+    snapshot(): StreamingAudioSettings {
+        const copy = new StreamingAudioSettings();
+        copy.sampleRate = this.sampleRate;
+        copy.channels = this.channels;
+        copy.bitsPerSample = this.bitsPerSample;
+        copy.language = this.language;
+        copy.pushQueueCapacity = this.pushQueueCapacity;
+        return Object.freeze(copy) as StreamingAudioSettings;
+    }
+}
+
+/**
+ * Internal async queue that acts like C#'s Channel<T>.
+ * Supports a single consumer reading via async iteration and multiple producers writing.
+ * @internal
+ */
+class AsyncQueue<T> {
+    private queue: T[] = [];
+    private waitingResolve: ((value: IteratorResult<T>) => void) | null = null;
+    private completed = false;
+    private completionError: Error | null = null;
+    private maxCapacity: number;
+    private backpressureResolve: (() => void) | null = null;
+
+    constructor(maxCapacity: number = Infinity) {
+        this.maxCapacity = maxCapacity;
+    }
+
+    /** Push an item. If at capacity, waits until space is available. */
+    async write(item: T): Promise<void> {
+        if (this.completed) {
+            throw new Error('Cannot write to a completed queue.');
+        }
+
+        // If someone is waiting to read, deliver directly
+        if (this.waitingResolve) {
+            const resolve = this.waitingResolve;
+            this.waitingResolve = null;
+            resolve({ value: item, done: false });
+            return;
+        }
+
+        // If at capacity, wait for space
+        if (this.queue.length >= this.maxCapacity) {
+            await new Promise<void>((resolve) => {
+                this.backpressureResolve = resolve;
+            });
+        }
+
+        this.queue.push(item);
+    }
+
+    /** Push an item synchronously (no backpressure wait). Used by native callbacks. */
+    tryWrite(item: T): boolean {
+        if (this.completed) return false;
+
+        if (this.waitingResolve) {
+            const resolve = this.waitingResolve;
+            this.waitingResolve = null;
+            resolve({ value: item, done: false });
+            return true;
+        }
+
+        this.queue.push(item);
+        return true;
+    }
+
+    /** Signal that no more items will be written. */
+    complete(error?: Error): void {
+        if (this.completed) return;
+        this.completed = true;
+        this.completionError = error ?? null;
+
+        // Release backpressure waiter
+        if (this.backpressureResolve) {
+            this.backpressureResolve();
+            this.backpressureResolve = null;
+        }
+
+        // Wake up any waiting reader
+        if (this.waitingResolve) {
+            const resolve = this.waitingResolve;
+            this.waitingResolve = null;
+            if (this.completionError) {
+                // Can't reject through iterator result — reader will get done:true
+                // and the error is surfaced via the completionError property
+            }
+            resolve({ value: undefined as any, done: true });
+        }
+    }
+
+    get error(): Error | null {
+        return this.completionError;
+    }
+
+    /** Async iterator for consuming items. */
+    async *[Symbol.asyncIterator](): AsyncGenerator<T> {
+        while (true) {
+            // Release backpressure if queue drained below capacity
+            if (this.backpressureResolve && this.queue.length < this.maxCapacity) {
+                const resolve = this.backpressureResolve;
+                this.backpressureResolve = null;
+                resolve();
+            }
+
+            if (this.queue.length > 0) {
+                yield this.queue.shift()!;
+                continue;
+            }
+
+            if (this.completed) {
+                if (this.completionError) {
+                    throw this.completionError;
+                }
+                return;
+            }
+
+            // Wait for next item or completion
+            const result = await new Promise<IteratorResult<T>>((resolve) => {
+                this.waitingResolve = resolve;
+            });
+
+            if (result.done) {
+                if (this.completionError) {
+                    throw this.completionError;
+                }
+                return;
+            }
+
+            yield result.value;
+        }
+    }
+}
+
+/**
+ * Client for real-time audio streaming ASR (Automatic Speech Recognition).
+ * Audio data from a microphone (or other source) is pushed in as PCM chunks,
+ * and partial transcription results are returned as an async iterable.
+ *
+ * Thread safety: pushAudioData() can be called from any context.
+ * Pushes are internally queued and serialized to native core one at a time.
+ *
+ * Mirrors the C# OpenAIAudioStreamingClient.
+ */
+export class AudioStreamingClient {
+    private modelId: string;
+    private coreInterop: CoreInterop;
+
+    // Session state
+    private sessionHandle: string | null = null;
+    private started = false;
+    private stopped = false;
+
+    // Output queue: native callback writes, user reads via getTranscriptionStream()
+    private outputQueue: AsyncQueue<AudioStreamTranscriptionResult> | null = null;
+
+    // Internal push queue: user writes audio chunks, push loop drains to native core
+    private pushQueue: AsyncQueue<Uint8Array> | null = null;
+    private pushLoopPromise: Promise<void> | null = null;
+
+    // Frozen settings snapshot
+    private activeSettings: StreamingAudioSettings | null = null;
+
+    // Abort controller for the push loop — decoupled from caller's signal
+    private sessionAbortController: AbortController | null = null;
+
+    // Whether native callback has been registered (for tracking)
+    private nativeCallbackRegistered = false;
+
+    /**
+     * Configuration settings for the streaming session.
+     * Must be configured before calling start(). Settings are frozen after start().
+     */
+    public settings = new StreamingAudioSettings();
+
+    /**
+     * @internal
+     * Restricted to internal use. Users should create clients via Model.createAudioStreamingClient().
+     */
+    constructor(modelId: string, coreInterop: CoreInterop) {
+        this.modelId = modelId;
+        this.coreInterop = coreInterop;
+    }
+
+    /**
+     * Start a real-time audio streaming session.
+     * Must be called before pushAudioData() or getTranscriptionStream().
+     * Settings are frozen after this call.
+     */
+    public async start(): Promise<void> {
+        if (this.started) {
+            throw new Error('Streaming session already started. Call stop() first.');
+        }
+
+        // Freeze settings
+        this.activeSettings = this.settings.snapshot();
+
+        this.outputQueue = new AsyncQueue<AudioStreamTranscriptionResult>();
+        this.pushQueue = new AsyncQueue<Uint8Array>(this.activeSettings.pushQueueCapacity);
+
+        const params: Record<string, string> = {
+            Model: this.modelId,
+            SampleRate: this.activeSettings.sampleRate.toString(),
+            Channels: this.activeSettings.channels.toString(),
+            BitsPerSample: this.activeSettings.bitsPerSample.toString(),
+        };
+
+        if (this.activeSettings.language) {
+            params['Language'] = this.activeSettings.language;
+        }
+
+        // Start session via native core with a callback for transcription results.
+        // executeCommandStreaming registers a callback and calls the native function async.
+        // For audio_stream_start, the native function returns immediately (non-blocking)
+        // and invokes the callback on a native thread whenever partial results are ready.
+        //
+        // However, the current CoreInterop.executeCommandStreaming wraps the call in
+        // execute_command_with_callback which blocks until the command completes.
+        // For audio streaming, we need the start command to return immediately.
+        // We use executeCommand (synchronous) for start, and the callback is registered
+        // by the native core during that call.
+        //
+        // NOTE: This matches the C# pattern where StartAudioStream is synchronous and
+        // the callback is registered during the P/Invoke call. The JS koffi FFI works
+        // similarly — the native function registers our callback pointer and returns.
+
+        try {
+            const response = this.coreInterop.executeCommand("audio_stream_start", {
+                Params: params
+            });
+
+            this.sessionHandle = response;
+            if (!this.sessionHandle) {
+                throw new Error('Native core did not return a session handle.');
+            }
+        } catch (error) {
+            this.outputQueue.complete();
+            throw new Error(
+                `Error starting audio stream session: ${error instanceof Error ? error.message : String(error)}`,
+                { cause: error }
+            );
+        }
+
+        this.started = true;
+        this.stopped = false;
+
+        // Start the background push loop
+        this.sessionAbortController = new AbortController();
+        this.pushLoopPromise = this.pushLoop();
+    }
+
+    /**
+     * Push a chunk of raw PCM audio data to the streaming session.
+     * Can be called from any context. Chunks are internally queued
+     * and serialized to native core one at a time.
+     *
+     * @param pcmData - Raw PCM audio bytes matching the configured format.
+     */
+    public async pushAudioData(pcmData: Uint8Array): Promise<void> {
+        if (!this.started || this.stopped) {
+            throw new Error('No active streaming session. Call start() first.');
+        }
+
+        // Copy the buffer to avoid issues if the caller reuses it
+        const copy = new Uint8Array(pcmData.length);
+        copy.set(pcmData);
+
+        await this.pushQueue!.write(copy);
+    }
+
+    /**
+     * Internal loop that drains the push queue and sends chunks to native core one at a time.
+     * Implements retry for transient native errors and terminates on permanent failures.
+     * @internal
+     */
+    private async pushLoop(): Promise<void> {
+        const maxRetries = 3;
+        const initialRetryDelayMs = 50;
+
+        try {
+            for await (const audioData of this.pushQueue!) {
+                // Check if aborted
+                if (this.sessionAbortController?.signal.aborted) {
+                    break;
+                }
+
+                let pushed = false;
+                for (let attempt = 0; attempt <= maxRetries && !pushed; attempt++) {
+                    try {
+                        // Send audio data to native core.
+                        // The native core receives the session handle and audio details via JSON params.
+                        this.coreInterop.executeCommand("audio_stream_push", {
+                            Params: {
+                                SessionHandle: this.sessionHandle!,
+                                AudioDataLength: audioData.length.toString()
+                            }
+                        });
+                        pushed = true;
+                    } catch (error) {
+                        const errorMsg = error instanceof Error ? error.message : String(error);
+                        const errorInfo = tryParseCoreError(errorMsg);
+
+                        if (errorInfo?.isTransient && attempt < maxRetries) {
+                            const delay = initialRetryDelayMs * Math.pow(2, attempt);
+                            console.warn(
+                                `Transient push error (attempt ${attempt + 1}/${maxRetries}): ${errorInfo.code}. Retrying in ${delay}ms`
+                            );
+                            await new Promise(resolve => setTimeout(resolve, delay));
+                            continue;
+                        }
+
+                        // Permanent error or retries exhausted
+                        const fatalError = new Error(
+                            `Push failed permanently (code=${errorInfo?.code ?? 'UNKNOWN'}): ${errorMsg}`,
+                            { cause: error }
+                        );
+                        console.error('Terminating push loop due to permanent push failure:', errorMsg);
+                        this.outputQueue?.complete(fatalError);
+                        return;
+                    }
+                }
+            }
+        } catch (error) {
+            if (this.sessionAbortController?.signal.aborted) {
+                // Expected on cancellation
+                return;
+            }
+            const err = error instanceof Error ? error : new Error(String(error));
+            console.error('Push loop terminated with unexpected error:', err.message);
+            this.outputQueue?.complete(new Error('Push loop terminated unexpectedly.', { cause: err }));
+        }
+    }
+
+    /**
+     * Get the async iterable of transcription results.
+     * Results arrive as the native ASR engine processes audio data.
+     *
+     * Usage:
+     * ```ts
+     * for await (const result of client.getTranscriptionStream()) {
+     *     console.log(result.text);
+     * }
+     * ```
+     */
+    public async *getTranscriptionStream(): AsyncGenerator<AudioStreamTranscriptionResult> {
+        if (!this.outputQueue) {
+            throw new Error('No active streaming session. Call start() first.');
+        }
+
+        for await (const item of this.outputQueue) {
+            yield item;
+        }
+    }
+
+    /**
+     * Signal end-of-audio and stop the streaming session.
+     * Any remaining buffered audio in the push queue will be drained to native core first.
+     * Final results are delivered through getTranscriptionStream() before it completes.
+     */
+    public async stop(): Promise<void> {
+        if (!this.started || this.stopped) {
+            return; // already stopped or never started
+        }
+
+        this.stopped = true;
+
+        // 1. Complete the push queue so the push loop drains remaining items and exits
+        this.pushQueue?.complete();
+
+        // 2. Wait for the push loop to finish draining
+        if (this.pushLoopPromise) {
+            await this.pushLoopPromise;
+        }
+
+        // 3. Abort the session (no-op if push loop already exited)
+        this.sessionAbortController?.abort();
+
+        // 4. Tell native core to flush and finalize
+        let stopError: Error | null = null;
+        try {
+            this.coreInterop.executeCommand("audio_stream_stop", {
+                Params: { SessionHandle: this.sessionHandle! }
+            });
+        } catch (error) {
+            stopError = error instanceof Error ? error : new Error(String(error));
+            console.error('Error stopping audio stream session:', stopError.message);
+        }
+
+        // 5. Clean up state
+        this.sessionHandle = null;
+        this.started = false;
+        this.sessionAbortController = null;
+
+        // 6. Complete the output queue AFTER the native stop so final callbacks are captured
+        this.outputQueue?.complete();
+
+        if (stopError) {
+            throw new Error(
+                `Error stopping audio stream session: ${stopError.message}`,
+                { cause: stopError }
+            );
+        }
+    }
+
+    /**
+     * Dispose the client and stop any active session.
+     * Safe to call multiple times.
+     */
+    public async dispose(): Promise<void> {
+        try {
+            if (this.started && !this.stopped) {
+                await this.stop();
+            }
+        } catch (error) {
+            // dispose must not throw — log and swallow
+            console.warn('Error during dispose cleanup:', error instanceof Error ? error.message : String(error));
+        }
+    }
+}
diff --git a/sdk_v2/js/src/openai/audioStreamingTypes.ts b/sdk_v2/js/src/openai/audioStreamingTypes.ts
new file mode 100644
index 00000000..ced58e10
--- /dev/null
+++ b/sdk_v2/js/src/openai/audioStreamingTypes.ts
@@ -0,0 +1,52 @@
+/**
+ * Types for real-time audio streaming transcription results and structured errors.
+ * Mirrors the C# AudioStreamTranscriptionResult and CoreErrorResponse.
+ */
+
+/**
+ * A transcription result from a real-time audio streaming session.
+ */
+export interface AudioStreamTranscriptionResult {
+    /** Whether this is a partial (interim) or final result for this segment. */
+    is_final: boolean;
+    /** The transcribed text. */
+    text: string;
+    /** Start time offset of this segment in the audio stream (seconds). */
+    start_time?: number | null;
+    /** End time offset of this segment in the audio stream (seconds). */
+    end_time?: number | null;
+    /** Confidence score (0.0 - 1.0) if available. */
+    confidence?: number | null;
+}
+
+/**
+ * Structured error response from native core audio streaming commands.
+ * Used by the push loop to distinguish transient vs permanent failures.
+ * @internal
+ */
+export interface CoreErrorResponse {
+    /** Machine-readable error code. */
+    code: string;
+    /** Human-readable error message. */
+    message: string;
+    /** Whether this error is transient and may succeed on retry. */
+    isTransient: boolean;
+}
+
+/**
+ * Attempt to parse a native error string as a structured CoreErrorResponse.
+ * Returns null if the error is not valid JSON or doesn't match the schema,
+ * which should be treated as a permanent/unknown error.
+ * @internal
+ */
+export function tryParseCoreError(errorString: string): CoreErrorResponse | null {
+    try {
+        const parsed = JSON.parse(errorString);
+        if (typeof parsed.code === 'string' && typeof parsed.isTransient === 'boolean') {
+            return parsed as CoreErrorResponse;
+        }
+        return null;
+    } catch {
+        return null; // unstructured error — treat as permanent
+    }
+}

From 397093637f243e8029b0e14b4540ade1b4ae2310 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Thu, 5 Mar 2026 13:49:49 -0800
Subject: [PATCH 03/22] delete dll mock test

---
 .../AudioStreamingClientTests.cs              | 221 ------------------
 1 file changed, 221 deletions(-)
 delete mode 100644 sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs

diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs
deleted file mode 100644
index 3a0e2ef7..00000000
--- a/sdk_v2/cs/test/FoundryLocal.Tests/AudioStreamingClientTests.cs
+++ /dev/null
@@ -1,221 +0,0 @@
-// --------------------------------------------------------------------------------------------------------------------
-// <copyright company="Microsoft">
-//   Copyright (c) Microsoft. All rights reserved.
-// </copyright>
-// --------------------------------------------------------------------------------------------------------------------
-
-namespace Microsoft.AI.Foundry.Local.Tests;
-
-using System.Threading.Tasks;
-using Microsoft.AI.Foundry.Local.Detail;
-
-/// <summary>
-/// Unit tests for audio streaming types and settings.
-/// These test the serialization, deserialization, and settings behavior
-/// without requiring the native library or a loaded model.
-/// </summary>
-internal sealed class AudioStreamingClientTests
-{
-    // --- AudioStreamTranscriptionResult deserialization tests ---
-
-    [Test]
-    public async Task AudioStreamTranscriptionResult_FromJson_FinalResult_AllFields()
-    {
-        var json = """{"text":"hello world","is_final":true,"start_time":0.0,"end_time":1.5,"confidence":0.95}""";
-
-        var result = AudioStreamTranscriptionResult.FromJson(json);
-
-        await Assert.That(result).IsNotNull();
-        await Assert.That(result.Text).IsEqualTo("hello world");
-        await Assert.That(result.IsFinal).IsTrue();
-        await Assert.That(result.StartTime).IsEqualTo(0.0);
-        await Assert.That(result.EndTime).IsEqualTo(1.5);
-        await Assert.That(result.Confidence).IsEqualTo(0.95f);
-    }
-
-    [Test]
-    public async Task AudioStreamTranscriptionResult_FromJson_PartialResult_OptionalFieldsNull()
-    {
-        var json = """{"text":"hel","is_final":false}""";
-
-        var result = AudioStreamTranscriptionResult.FromJson(json);
-
-        await Assert.That(result).IsNotNull();
-        await Assert.That(result.Text).IsEqualTo("hel");
-        await Assert.That(result.IsFinal).IsFalse();
-        await Assert.That(result.StartTime).IsNull();
-        await Assert.That(result.EndTime).IsNull();
-        await Assert.That(result.Confidence).IsNull();
-    }
-
-    [Test]
-    public async Task AudioStreamTranscriptionResult_FromJson_EmptyText()
-    {
-        var json = """{"text":"","is_final":false}""";
-
-        var result = AudioStreamTranscriptionResult.FromJson(json);
-
-        await Assert.That(result).IsNotNull();
-        await Assert.That(result.Text).IsEqualTo(string.Empty);
-        await Assert.That(result.IsFinal).IsFalse();
-    }
-
-    [Test]
-    public async Task AudioStreamTranscriptionResult_FromJson_InvalidJson_Throws()
-    {
-        FoundryLocalException? caught = null;
-        try
-        {
-            AudioStreamTranscriptionResult.FromJson("not valid json");
-        }
-        catch (FoundryLocalException ex)
-        {
-            caught = ex;
-        }
-        catch (System.Text.Json.JsonException)
-        {
-            // Also acceptable — JsonSerializer may throw before our wrapper
-            caught = new FoundryLocalException("json parse error");
-        }
-
-        await Assert.That(caught).IsNotNull();
-    }
-
-    [Test]
-    public async Task AudioStreamTranscriptionResult_FromJson_EmptyJson_Throws()
-    {
-        FoundryLocalException? caught = null;
-        try
-        {
-            AudioStreamTranscriptionResult.FromJson("");
-        }
-        catch (FoundryLocalException ex)
-        {
-            caught = ex;
-        }
-        catch (System.Text.Json.JsonException)
-        {
-            caught = new FoundryLocalException("json parse error");
-        }
-
-        await Assert.That(caught).IsNotNull();
-    }
-
-    // --- CoreErrorResponse parsing tests ---
-
-    [Test]
-    public async Task CoreErrorResponse_TryParse_TransientError_Succeeds()
-    {
-        var json = """{"code":"ASR_BACKEND_OVERLOADED","message":"try again later","isTransient":true}""";
-
-        var error = CoreErrorResponse.TryParse(json);
-
-        await Assert.That(error).IsNotNull();
-        await Assert.That(error!.Code).IsEqualTo("ASR_BACKEND_OVERLOADED");
-        await Assert.That(error.Message).IsEqualTo("try again later");
-        await Assert.That(error.IsTransient).IsTrue();
-    }
-
-    [Test]
-    public async Task CoreErrorResponse_TryParse_PermanentError_Succeeds()
-    {
-        var json = """{"code":"ASR_SESSION_NOT_FOUND","message":"session gone","isTransient":false}""";
-
-        var error = CoreErrorResponse.TryParse(json);
-
-        await Assert.That(error).IsNotNull();
-        await Assert.That(error!.Code).IsEqualTo("ASR_SESSION_NOT_FOUND");
-        await Assert.That(error.IsTransient).IsFalse();
-    }
-
-    [Test]
-    public async Task CoreErrorResponse_TryParse_InvalidJson_ReturnsNull()
-    {
-        var error = CoreErrorResponse.TryParse("not json at all");
-
-        await Assert.That(error).IsNull();
-    }
-
-    [Test]
-    public async Task CoreErrorResponse_TryParse_EmptyString_ReturnsNull()
-    {
-        var error = CoreErrorResponse.TryParse("");
-
-        await Assert.That(error).IsNull();
-    }
-
-    [Test]
-    public async Task CoreErrorResponse_TryParse_ValidJsonWrongShape_ReturnsDefaultValues()
-    {
-        // Valid JSON but no matching fields — should deserialize with defaults
-        var json = """{"unrelated":"field"}""";
-
-        var error = CoreErrorResponse.TryParse(json);
-
-        await Assert.That(error).IsNotNull();
-        await Assert.That(error!.Code).IsEqualTo("");
-        await Assert.That(error.IsTransient).IsFalse();
-    }
-
-    // --- StreamingAudioSettings tests ---
-
-    [Test]
-    public async Task StreamingAudioSettings_Defaults_AreCorrect()
-    {
-        var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings();
-
-        await Assert.That(settings.SampleRate).IsEqualTo(16000);
-        await Assert.That(settings.Channels).IsEqualTo(1);
-        await Assert.That(settings.BitsPerSample).IsEqualTo(16);
-        await Assert.That(settings.Language).IsNull();
-        await Assert.That(settings.PushQueueCapacity).IsEqualTo(100);
-    }
-
-    [Test]
-    public async Task StreamingAudioSettings_Snapshot_IsIndependentCopy()
-    {
-        var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings
-        {
-            SampleRate = 44100,
-            Channels = 2,
-            BitsPerSample = 32,
-            Language = "zh",
-            PushQueueCapacity = 50
-        };
-
-        var snapshot = settings.Snapshot();
-
-        // Modify original after snapshot
-        settings.SampleRate = 8000;
-        settings.Channels = 1;
-        settings.Language = "fr";
-        settings.PushQueueCapacity = 200;
-
-        // Snapshot should retain original values
-        await Assert.That(snapshot.SampleRate).IsEqualTo(44100);
-        await Assert.That(snapshot.Channels).IsEqualTo(2);
-        await Assert.That(snapshot.BitsPerSample).IsEqualTo(32);
-        await Assert.That(snapshot.Language).IsEqualTo("zh");
-        await Assert.That(snapshot.PushQueueCapacity).IsEqualTo(50);
-    }
-
-    [Test]
-    public async Task StreamingAudioSettings_Snapshot_DoesNotAffectOriginal()
-    {
-        var settings = new OpenAIAudioStreamingClient.StreamingAudioSettings
-        {
-            SampleRate = 16000,
-            Language = "en"
-        };
-
-        var snapshot = settings.Snapshot();
-
-        // Modify snapshot
-        snapshot.SampleRate = 48000;
-        snapshot.Language = "de";
-
-        // Original should be unaffected
-        await Assert.That(settings.SampleRate).IsEqualTo(16000);
-        await Assert.That(settings.Language).IsEqualTo("en");
-    }
-}

From ef2e9e04e6be1f9e2320df0fe757fffaedc20ba9 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Thu, 5 Mar 2026 15:51:16 -0800
Subject: [PATCH 04/22] update core api

---
 sdk_v2/cs/src/Detail/CoreInterop.cs          | 119 ++-----------------
 sdk_v2/cs/src/Detail/ICoreInterop.cs         |  10 +-
 sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs |  44 ++-----
 3 files changed, 19 insertions(+), 154 deletions(-)

diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs
index a178bdca..7def104f 100644
--- a/sdk_v2/cs/src/Detail/CoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/CoreInterop.cs
@@ -160,26 +160,12 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
 
     // --- Audio streaming P/Invoke imports ---
 
-    [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
+    [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")]
     [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
-    private static unsafe partial void CoreAudioStreamStart(
-        RequestBuffer* request,
-        ResponseBuffer* response,
-        nint callbackPtr,
-        nint userData);
-
-    [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")]
-    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
-    private static unsafe partial void CoreAudioStreamPush(
+    private static unsafe partial void CoreExecuteCommandWithBinary(
         StreamingRequestBuffer* request,
         ResponseBuffer* response);
 
-    [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")]
-    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
-    private static unsafe partial void CoreAudioStreamStop(
-        RequestBuffer* request,
-        ResponseBuffer* response);
-
     // helper to capture exceptions in callbacks
     internal class CallbackHelper
     {
@@ -355,77 +341,10 @@ public Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreIn
 
     // --- Audio streaming managed implementations ---
 
-    public AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback)
+    public Response StartAudioStream(CoreInteropRequest request)
     {
-        try
-        {
-            var commandInputJson = request.ToJson();
-            byte[] commandBytes = System.Text.Encoding.UTF8.GetBytes("audio_stream_start");
-            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
-
-            IntPtr commandPtr = Marshal.AllocHGlobal(commandBytes.Length);
-            Marshal.Copy(commandBytes, 0, commandPtr, commandBytes.Length);
-
-            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
-            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
-
-            var reqBuf = new RequestBuffer
-            {
-                Command = commandPtr,
-                CommandLength = commandBytes.Length,
-                Data = inputPtr,
-                DataLength = inputBytes.Length
-            };
-
-            ResponseBuffer response = default;
-
-            var helper = new CallbackHelper(transcriptionCallback);
-            var funcPtr = Marshal.GetFunctionPointerForDelegate(handleCallbackDelegate);
-            var helperHandle = GCHandle.Alloc(helper);
-            var helperPtr = GCHandle.ToIntPtr(helperHandle);
-
-            try
-            {
-                unsafe
-                {
-                    CoreAudioStreamStart(&reqBuf, &response, funcPtr, helperPtr);
-                }
-            }
-            catch
-            {
-                // Free on failure — native core never saw the handle
-                helperHandle.Free();
-                throw;
-            }
-            finally
-            {
-                Marshal.FreeHGlobal(commandPtr);
-                Marshal.FreeHGlobal(inputPtr);
-            }
-
-            // Marshal response inline (matching existing ExecuteCommandImpl pattern)
-            Response result = new();
-            if (response.Data != IntPtr.Zero && response.DataLength > 0)
-            {
-                byte[] managedResponse = new byte[response.DataLength];
-                Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
-                result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
-            }
-            if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
-            {
-                result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
-            }
-            Marshal.FreeHGlobal(response.Data);
-            Marshal.FreeHGlobal(response.Error);
-
-            // Return the GCHandle alongside the response — caller is responsible for
-            // keeping it alive during the session and freeing it in StopAudioStream.
-            return new AudioStreamSession(result, helperHandle);
-        }
-        catch (Exception ex) when (ex is not OperationCanceledException)
-        {
-            throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger);
-        }
+        // Uses existing execute_command entry point with "audio_stream_start" command
+        return ExecuteCommand("audio_stream_start", request);
     }
 
     public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData)
@@ -461,7 +380,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
 
                 try
                 {
-                    CoreAudioStreamPush(&reqBuf, &response);
+                    CoreExecuteCommandWithBinary(&reqBuf, &response);
                 }
                 finally
                 {
@@ -493,30 +412,10 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
         }
     }
 
-    public Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle)
+    public Response StopAudioStream(CoreInteropRequest request)
     {
-        try
-        {
-            var result = ExecuteCommand("audio_stream_stop", request);
-
-            // Free the GCHandle that was keeping the callback delegate alive.
-            // After this point, the native core must not invoke the callback.
-            if (callbackHandle.IsAllocated)
-            {
-                callbackHandle.Free();
-            }
-
-            return result;
-        }
-        catch (Exception ex) when (ex is not OperationCanceledException)
-        {
-            // Still free the handle on failure to avoid leaks
-            if (callbackHandle.IsAllocated)
-            {
-                callbackHandle.Free();
-            }
-            throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger);
-        }
+        // Uses existing execute_command entry point with "audio_stream_stop" command
+        return ExecuteCommand("audio_stream_stop", request);
     }
 
 }
diff --git a/sdk_v2/cs/src/Detail/ICoreInterop.cs b/sdk_v2/cs/src/Detail/ICoreInterop.cs
index cd342ce5..b493dfb7 100644
--- a/sdk_v2/cs/src/Detail/ICoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/ICoreInterop.cs
@@ -65,13 +65,7 @@ protected unsafe struct StreamingRequestBuffer
         public int BinaryDataLength;
     }
 
-    /// <summary>
-    /// Returned by StartAudioStream. Holds the session handle and the GCHandle
-    /// that must remain alive for the callback lifetime.
-    /// </summary>
-    internal record AudioStreamSession(Response Response, GCHandle CallbackHandle);
-
-    AudioStreamSession StartAudioStream(CoreInteropRequest request, CallbackFn transcriptionCallback);
+    Response StartAudioStream(CoreInteropRequest request);
     Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData);
-    Response StopAudioStream(CoreInteropRequest request, GCHandle callbackHandle);
+    Response StopAudioStream(CoreInteropRequest request);
 }
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
index 27e1bbea..303362e3 100644
--- a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
@@ -34,7 +34,6 @@ public sealed class OpenAIAudioStreamingClient : IAsyncDisposable
     // Session state — protected by _lock
     private readonly AsyncLock _lock = new();
     private string? _sessionHandle;
-    private GCHandle _callbackHandle;
     private bool _started;
     private bool _stopped;
 
@@ -50,9 +49,6 @@ public sealed class OpenAIAudioStreamingClient : IAsyncDisposable
     // Cancelled only during StopAsync/DisposeAsync to allow clean drain.
     private CancellationTokenSource? _sessionCts;
 
-    // Stored as a field so the delegate is not garbage collected while native core holds a reference.
-    private ICoreInterop.CallbackFn? _transcriptionCallback;
-
     // Snapshot of settings captured at StartAsync — prevents mutation after session starts.
     private StreamingAudioSettings? _activeSettings;
 
@@ -142,43 +138,20 @@ public async Task StartAsync(CancellationToken ct = default)
             request.Params["Language"] = _activeSettings.Language;
         }
 
-        // Store the callback as a field so the delegate is rooted for the session lifetime.
-        _transcriptionCallback = (callbackData) =>
-        {
-            try
-            {
-                var result = AudioStreamTranscriptionResult.FromJson(callbackData);
-                // TryWrite always succeeds on unbounded channels
-                _outputChannel.Writer.TryWrite(result);
-            }
-            catch (Exception ex)
-            {
-                _logger.LogError(ex, "Error processing audio stream transcription callback");
-                _outputChannel.Writer.TryComplete(
-                    new FoundryLocalException("Error processing audio streaming callback.", ex, _logger));
-            }
-        };
-
-        // StartAudioStream is synchronous (P/Invoke) — run on thread pool
-        var session = await Task.Run(
-            () => _coreInterop.StartAudioStream(request, _transcriptionCallback), ct)
+        // StartAudioStream uses existing execute_command entry point — synchronous P/Invoke
+        var response = await Task.Run(
+            () => _coreInterop.StartAudioStream(request), ct)
             .ConfigureAwait(false);
 
-        if (session.Response.Error != null)
+        if (response.Error != null)
         {
-            // Free handle on failure
-            if (session.CallbackHandle.IsAllocated)
-            {
-                session.CallbackHandle.Free();
-            }
             _outputChannel.Writer.TryComplete();
             throw new FoundryLocalException(
-                $"Error starting audio stream session: {session.Response.Error}", _logger);
+                $"Error starting audio stream session: {response.Error}", _logger);
         }
 
-        _sessionHandle = session.Response.Data
+        _sessionHandle = response.Data
             ?? throw new FoundryLocalException("Native core did not return a session handle.", _logger);
-        _callbackHandle = session.CallbackHandle;
         _started = true;
         _stopped = false;
 
@@ -337,7 +310,7 @@ public async Task StopAsync(CancellationToken ct = default)
         try
         {
             response = await Task.Run(
-                () => _coreInterop.StopAudioStream(request, _callbackHandle), ct)
+                () => _coreInterop.StopAudioStream(request), ct)
                 .ConfigureAwait(false);
         }
         catch (OperationCanceledException) when (ct.IsCancellationRequested)
@@ -347,7 +320,7 @@ public async Task StopAsync(CancellationToken ct = default)
             try
             {
                 response = await Task.Run(
-                    () => _coreInterop.StopAudioStream(request, _callbackHandle))
+                    () => _coreInterop.StopAudioStream(request))
                     .ConfigureAwait(false);
             }
             catch (Exception cleanupEx)
@@ -360,7 +333,6 @@ public async Task StopAsync(CancellationToken ct = default)
         finally
         {
             _sessionHandle = null;
-            _transcriptionCallback = null;
             _started = false;
             _sessionCts?.Dispose();
             _sessionCts = null;

From 535b73596b567cbd90e73ee40060ddc9e5b643d8 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 10 Mar 2026 18:09:38 -0700
Subject: [PATCH 05/22] update sdk

---
 sdk_v2/cs/src/AssemblyInfo.cs                 |   1 +
 sdk_v2/cs/src/Detail/CoreInterop.cs           | 136 ++++-
 sdk_v2/cs/src/IModel.cs                       |   7 -
 sdk_v2/cs/src/Model.cs                        |   5 -
 sdk_v2/cs/src/ModelVariant.cs                 |  17 -
 sdk_v2/cs/src/OpenAI/AudioClient.cs           | 124 +----
 .../OpenAI/AudioStreamTranscriptionTypes.cs   |  13 +-
 sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs  |  68 ++-
 .../cs/test/FoundryLocal.Tests/ModelTests.cs  |   2 +
 sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs    | 500 +++---------------
 10 files changed, 249 insertions(+), 624 deletions(-)

diff --git a/sdk_v2/cs/src/AssemblyInfo.cs b/sdk_v2/cs/src/AssemblyInfo.cs
index 9bebe71b..987f9de6 100644
--- a/sdk_v2/cs/src/AssemblyInfo.cs
+++ b/sdk_v2/cs/src/AssemblyInfo.cs
@@ -7,4 +7,5 @@
 using System.Runtime.CompilerServices;
 
 [assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")]
+[assembly: InternalsVisibleTo("AudioStreamTest")]
 [assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop
diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs
index 7def104f..e4c88e9b 100644
--- a/sdk_v2/cs/src/Detail/CoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/CoreInterop.cs
@@ -160,12 +160,24 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
 
     // --- Audio streaming P/Invoke imports ---
 
-    [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")]
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
     [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
-    private static unsafe partial void CoreExecuteCommandWithBinary(
+    private static unsafe partial void CoreAudioStreamStart(
+        RequestBuffer* request,
+        ResponseBuffer* response);
+
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_push")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamPush(
         StreamingRequestBuffer* request,
         ResponseBuffer* response);
 
+    [LibraryImport(LibraryName, EntryPoint = "audio_stream_stop")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreAudioStreamStop(
+        RequestBuffer* request,
+        ResponseBuffer* response);
+
     // helper to capture exceptions in callbacks
     internal class CallbackHelper
     {
@@ -339,12 +351,71 @@ public Task<Response> ExecuteCommandWithCallbackAsync(string commandName, CoreIn
         return Task.Run(() => ExecuteCommandWithCallback(commandName, commandInput, callback), ct);
     }
 
+    /// <summary>
+    /// Marshal a ResponseBuffer from unmanaged memory into a managed Response and free the unmanaged memory.
+    /// </summary>
+    private Response MarshalResponse(ResponseBuffer response)
+    {
+        Response result = new();
+
+        if (response.Data != IntPtr.Zero && response.DataLength > 0)
+        {
+            byte[] managedResponse = new byte[response.DataLength];
+            Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
+            result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
+        }
+
+        if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
+        {
+            result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
+        }
+
+        Marshal.FreeHGlobal(response.Data);
+        Marshal.FreeHGlobal(response.Error);
+
+        return result;
+    }
+
     // --- Audio streaming managed implementations ---
 
     public Response StartAudioStream(CoreInteropRequest request)
     {
-        // Uses existing execute_command entry point with "audio_stream_start" command
-        return ExecuteCommand("audio_stream_start", request);
+        try
+        {
+            var commandInputJson = request.ToJson();
+            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
+
+            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
+            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
+
+            unsafe
+            {
+                var reqBuf = new RequestBuffer
+                {
+                    Command = IntPtr.Zero,
+                    CommandLength = 0,
+                    Data = inputPtr,
+                    DataLength = inputBytes.Length
+                };
+
+                ResponseBuffer response = default;
+
+                try
+                {
+                    CoreAudioStreamStart(&reqBuf, &response);
+                }
+                finally
+                {
+                    Marshal.FreeHGlobal(inputPtr);
+                }
+
+                return MarshalResponse(response);
+            }
+        }
+        catch (Exception ex) when (ex is not OperationCanceledException)
+        {
+            throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger);
+        }
     }
 
     public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData)
@@ -380,7 +451,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
 
                 try
                 {
-                    CoreExecuteCommandWithBinary(&reqBuf, &response);
+                    CoreAudioStreamPush(&reqBuf, &response);
                 }
                 finally
                 {
@@ -388,22 +459,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
                     Marshal.FreeHGlobal(inputPtr);
                 }
 
-                // Marshal response inline
-                Response result = new();
-                if (response.Data != IntPtr.Zero && response.DataLength > 0)
-                {
-                    byte[] managedResponse = new byte[response.DataLength];
-                    Marshal.Copy(response.Data, managedResponse, 0, response.DataLength);
-                    result.Data = System.Text.Encoding.UTF8.GetString(managedResponse);
-                }
-                if (response.Error != IntPtr.Zero && response.ErrorLength > 0)
-                {
-                    result.Error = Marshal.PtrToStringUTF8(response.Error, response.ErrorLength)!;
-                }
-                Marshal.FreeHGlobal(response.Data);
-                Marshal.FreeHGlobal(response.Error);
-
-                return result;
+                return MarshalResponse(response);
             }
         }
         catch (Exception ex) when (ex is not OperationCanceledException)
@@ -414,8 +470,42 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
 
     public Response StopAudioStream(CoreInteropRequest request)
     {
-        // Uses existing execute_command entry point with "audio_stream_stop" command
-        return ExecuteCommand("audio_stream_stop", request);
+        try
+        {
+            var commandInputJson = request.ToJson();
+            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
+
+            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
+            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
+
+            unsafe
+            {
+                var reqBuf = new RequestBuffer
+                {
+                    Command = IntPtr.Zero,
+                    CommandLength = 0,
+                    Data = inputPtr,
+                    DataLength = inputBytes.Length
+                };
+
+                ResponseBuffer response = default;
+
+                try
+                {
+                    CoreAudioStreamStop(&reqBuf, &response);
+                }
+                finally
+                {
+                    Marshal.FreeHGlobal(inputPtr);
+                }
+
+                return MarshalResponse(response);
+            }
+        }
+        catch (Exception ex) when (ex is not OperationCanceledException)
+        {
+            throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger);
+        }
     }
 
 }
diff --git a/sdk_v2/cs/src/IModel.cs b/sdk_v2/cs/src/IModel.cs
index 20eca014..c3acba61 100644
--- a/sdk_v2/cs/src/IModel.cs
+++ b/sdk_v2/cs/src/IModel.cs
@@ -67,11 +67,4 @@ Task DownloadAsync(Action<float>? downloadProgress = null,
     /// <param name="ct">Optional cancellation token.</param>
     /// <returns>OpenAI.AudioClient</returns>
     Task<OpenAIAudioClient> GetAudioClientAsync(CancellationToken? ct = null);
-
-    /// <summary>
-    /// Get a real-time audio streaming client for ASR.
-    /// </summary>
-    /// <param name="ct">Optional cancellation token.</param>
-    /// <returns>OpenAIAudioStreamingClient for real-time transcription.</returns>
-    Task<OpenAIAudioStreamingClient> GetAudioStreamingClientAsync(CancellationToken? ct = null);
 }
diff --git a/sdk_v2/cs/src/Model.cs b/sdk_v2/cs/src/Model.cs
index ffe8bb1c..83bcef69 100644
--- a/sdk_v2/cs/src/Model.cs
+++ b/sdk_v2/cs/src/Model.cs
@@ -114,11 +114,6 @@ public async Task<OpenAIAudioClient> GetAudioClientAsync(CancellationToken? ct =
         return await SelectedVariant.GetAudioClientAsync(ct).ConfigureAwait(false);
     }
 
-    public async Task<OpenAIAudioStreamingClient> GetAudioStreamingClientAsync(CancellationToken? ct = null)
-    {
-        return await SelectedVariant.GetAudioStreamingClientAsync(ct).ConfigureAwait(false);
-    }
-
     public async Task UnloadAsync(CancellationToken? ct = null)
     {
         await SelectedVariant.UnloadAsync(ct).ConfigureAwait(false);
diff --git a/sdk_v2/cs/src/ModelVariant.cs b/sdk_v2/cs/src/ModelVariant.cs
index d5285c1c..6ca7cda7 100644
--- a/sdk_v2/cs/src/ModelVariant.cs
+++ b/sdk_v2/cs/src/ModelVariant.cs
@@ -190,21 +190,4 @@ private async Task<OpenAIAudioClient> GetAudioClientImplAsync(CancellationToken?
 
         return new OpenAIAudioClient(Id);
     }
-
-    public async Task<OpenAIAudioStreamingClient> GetAudioStreamingClientAsync(CancellationToken? ct = null)
-    {
-        return await Utils.CallWithExceptionHandling(() => GetAudioStreamingClientImplAsync(ct),
-                                                     "Error getting audio streaming client for model", _logger)
-                                                    .ConfigureAwait(false);
-    }
-
-    private async Task<OpenAIAudioStreamingClient> GetAudioStreamingClientImplAsync(CancellationToken? ct = null)
-    {
-        if (!await IsLoadedAsync(ct))
-        {
-            throw new FoundryLocalException($"Model {Id} is not loaded. Call LoadAsync first.");
-        }
-
-        return new OpenAIAudioStreamingClient(Id);
-    }
 }
diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
index 5475185c..1f44996b 100644
--- a/sdk_v2/cs/src/OpenAI/AudioClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -6,9 +6,6 @@
 
 namespace Microsoft.AI.Foundry.Local;
 
-using System.Runtime.CompilerServices;
-using System.Threading.Channels;
-
 using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
 using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
 
@@ -46,6 +43,16 @@ public record AudioSettings
     /// </summary>
     public AudioSettings Settings { get; } = new();
 
+    /// <summary>
+    /// Create a real-time streaming transcription session.
+    /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
+    /// </summary>
+    /// <returns>A streaming session that must be disposed when done.</returns>
+    public AudioTranscriptionStreamSession CreateStreamingSession()
+    {
+        return new AudioTranscriptionStreamSession(_modelId);
+    }
+
     /// <summary>
     /// Transcribe audio from a file.
     /// </summary>
@@ -63,28 +70,6 @@ public async Task<AudioCreateTranscriptionResponse> TranscribeAudioAsync(string
                                                     .ConfigureAwait(false);
     }
 
-    /// <summary>
-    /// Transcribe audio from a file with streamed output.
-    /// </summary>
-    /// <param name="audioFilePath">
-    /// Path to file containing audio recording.
-    /// Supported formats: mp3
-    /// </param>
-    /// <param name="ct">Cancellation token.</param>
-    /// <returns>An asynchronous enumerable of transcription responses.</returns>
-    public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
-        string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
-    {
-        var enumerable = Utils.CallWithExceptionHandling(
-            () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
-            "Error during streaming audio transcription.", _logger).ConfigureAwait(false);
-
-        await foreach (var item in enumerable)
-        {
-            yield return item;
-        }
-    }
-
     private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(string audioFilePath,
                                                                                   CancellationToken? ct)
     {
@@ -107,93 +92,4 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
 
         return output;
     }
-
-    private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
-        string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
-    {
-        var openaiRequest = AudioTranscriptionCreateRequestExtended.FromUserInput(_modelId, audioFilePath, Settings);
-
-        var request = new CoreInteropRequest
-        {
-            Params = new Dictionary<string, string>
-            {
-                { "OpenAICreateRequest",  openaiRequest.ToJson() },
-            }
-        };
-
-        var channel = Channel.CreateUnbounded<AudioCreateTranscriptionResponse>(
-                        new UnboundedChannelOptions
-                        {
-                            SingleWriter = true,
-                            SingleReader = true,
-                            AllowSynchronousContinuations = true
-                        });
-
-        // The callback will push ChatResponse objects into the channel.
-        // The channel reader will return the values to the user.
-        // This setup prevents the user from blocking the thread generating the responses.
-        _ = Task.Run(async () =>
-        {
-            try
-            {
-                var failed = false;
-
-                var res = await _coreInterop.ExecuteCommandWithCallbackAsync(
-                    "audio_transcribe",
-                    request,
-                    async (callbackData) =>
-                    {
-                        try
-                        {
-                            if (!failed)
-                            {
-                                var audioCompletion = callbackData.ToAudioTranscription(_logger);
-                                await channel.Writer.WriteAsync(audioCompletion);
-                            }
-                        }
-                        catch (Exception ex)
-                        {
-                            // propagate exception to reader
-                            channel.Writer.TryComplete(
-                                new FoundryLocalException(
-                                    "Error processing streaming audio transcription callback data.", ex, _logger));
-                            failed = true;
-                        }
-                    },
-                    ct
-                ).ConfigureAwait(false);
-
-                // If the native layer returned an error (e.g. missing audio file, invalid model)
-                // without invoking any callbacks, propagate it so the caller sees an exception
-                // instead of an empty stream.
-                if (res.Error != null)
-                {
-                    channel.Writer.TryComplete(
-                        new FoundryLocalException($"Error from audio_transcribe command: {res.Error}", _logger));
-                    return;
-                }
-
-                // use TryComplete as an exception in the callback may have already closed the channel
-                _ = channel.Writer.TryComplete();
-            }
-            // Ignore cancellation exceptions so we don't convert them into errors
-            catch (Exception ex) when (ex is not OperationCanceledException)
-            {
-                channel.Writer.TryComplete(
-                    new FoundryLocalException("Error executing streaming chat completion.", ex, _logger));
-            }
-            catch (OperationCanceledException)
-            {
-                // Complete the channel on cancellation but don't turn it into an error
-                channel.Writer.TryComplete();
-            }
-        }, ct);
-
-        // Start reading from the channel as items arrive.
-        // This will continue until ExecuteCommandWithCallbackAsync completes and closes the channel.
-        await foreach (var item in channel.Reader.ReadAllAsync(ct))
-        {
-            yield return item;
-        }
-    }
 }
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
index 7736cb47..02c4169e 100644
--- a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
@@ -6,11 +6,20 @@ namespace Microsoft.AI.Foundry.Local;
 
 public record AudioStreamTranscriptionResult
 {
-    /// <summary>Whether this is a partial (interim) or final result for this segment.</summary>
+    /// <summary>
+    /// Whether this is a final or partial (interim) result.
+    /// - Nemotron models always return <c>true</c> (every result is final).
+    /// - Other models (e.g., Azure Embedded) may return <c>false</c> for interim
+    ///   hypotheses that will be replaced by a subsequent final result.
+    /// </summary>
     [JsonPropertyName("is_final")]
     public bool IsFinal { get; init; }
 
-    /// <summary>The transcribed text.</summary>
+    /// <summary>
+    /// Newly transcribed text from this audio chunk only (incremental hypothesis).
+    /// This is NOT the full accumulated transcript — each result contains only
+    /// the text decoded from the most recent audio chunk.
+    /// </summary>
     [JsonPropertyName("text")]
     public string Text { get; init; } = string.Empty;
 
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
index 303362e3..f0a1904d 100644
--- a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
@@ -15,17 +15,19 @@ namespace Microsoft.AI.Foundry.Local;
 
 
 /// <summary>
-/// Client for real-time audio streaming ASR (Automatic Speech Recognition).
+/// Session for real-time audio streaming ASR (Automatic Speech Recognition).
 /// Audio data from a microphone (or other source) is pushed in as PCM chunks,
-/// and partial transcription results are returned as an async stream.
+/// and transcription results are returned as an async stream.
 ///
-/// Thread safety: PushAudioDataAsync can be called from any thread (including high-frequency
+/// Created via <see cref="OpenAIAudioClient.CreateStreamingSessionAsync"/>.
+///
+/// Thread safety: PushAudioAsync can be called from any thread (including high-frequency
 /// audio callbacks). Pushes are internally serialized via a bounded channel to prevent
 /// unbounded memory growth and ensure ordering.
 /// </summary>
 
 
-public sealed class OpenAIAudioStreamingClient : IAsyncDisposable
+public sealed class AudioTranscriptionStreamSession : IAsyncDisposable
 {
     private readonly string _modelId;
     private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop;
@@ -50,14 +52,14 @@ public sealed class OpenAIAudioStreamingClient : IAsyncDisposable
     private CancellationTokenSource? _sessionCts;
 
     // Snapshot of settings captured at StartAsync — prevents mutation after session starts.
-    private StreamingAudioSettings? _activeSettings;
+    private AudioStreamTranscriptionOptions? _activeSettings;
 
     /// <summary>
     /// Audio format settings for the streaming session.
     /// Must be configured before calling <see cref="StartAsync"/>.
     /// Settings are frozen once the session starts.
     /// </summary>
-    public record StreamingAudioSettings
+    public record AudioStreamTranscriptionOptions
     {
         /// <summary>PCM sample rate in Hz. Default: 16000.</summary>
         public int SampleRate { get; set; } = 16000;
@@ -65,32 +67,29 @@ public record StreamingAudioSettings
         /// <summary>Number of audio channels. Default: 1 (mono).</summary>
         public int Channels { get; set; } = 1;
 
-        /// <summary>Bits per sample. Default: 16.</summary>
-        public int BitsPerSample { get; set; } = 16;
-
         /// <summary>Optional BCP-47 language hint (e.g., "en", "zh").</summary>
         public string? Language { get; set; }
 
         /// <summary>
         /// Maximum number of audio chunks buffered in the internal push queue.
-        /// If the queue is full, PushAudioDataAsync will asynchronously wait.
+        /// If the queue is full, AppendAsync will asynchronously wait.
         /// Default: 100 (~3 seconds of audio at typical chunk sizes).
         /// </summary>
         public int PushQueueCapacity { get; set; } = 100;
 
-        internal StreamingAudioSettings Snapshot() => this with { }; // record copy
+        internal AudioStreamTranscriptionOptions Snapshot() => this with { }; // record copy
     }
 
-    public StreamingAudioSettings Settings { get; } = new();
+    public AudioStreamTranscriptionOptions Settings { get; } = new();
 
-    internal OpenAIAudioStreamingClient(string modelId)
+    internal AudioTranscriptionStreamSession(string modelId)
     {
         _modelId = modelId;
     }
 
     /// <summary>
     /// Start a real-time audio streaming session.
-    /// Must be called before <see cref="PushAudioDataAsync"/> or <see cref="GetTranscriptionStream"/>.
+    /// Must be called before <see cref="AppendAsync"/> or <see cref="GetTranscriptionStream"/>.
     /// Settings are frozen after this call.
     /// </summary>
     /// <param name="ct">Cancellation token.</param>
@@ -129,7 +128,6 @@ public async Task StartAsync(CancellationToken ct = default)
                 { "Model", _modelId },
                 { "SampleRate", _activeSettings.SampleRate.ToString(CultureInfo.InvariantCulture) },
                 { "Channels", _activeSettings.Channels.ToString(CultureInfo.InvariantCulture) },
-                { "BitsPerSample", _activeSettings.BitsPerSample.ToString(CultureInfo.InvariantCulture) },
             }
         };
 
@@ -171,7 +169,7 @@ public async Task StartAsync(CancellationToken ct = default)
     /// </summary>
     /// <param name="pcmData">Raw PCM audio bytes matching the configured format.</param>
     /// <param name="ct">Cancellation token.</param>
-    public async ValueTask PushAudioDataAsync(ReadOnlyMemory<byte> pcmData, CancellationToken ct = default)
+    public async ValueTask AppendAsync(ReadOnlyMemory<byte> pcmData, CancellationToken ct = default)
     {
         if (!_started || _stopped)
         {
@@ -211,6 +209,25 @@ private async Task PushLoopAsync(CancellationToken ct)
                     if (response.Error == null)
                     {
                         pushed = true;
+
+                        // Parse transcription result from push response and surface it
+                        if (!string.IsNullOrEmpty(response.Data))
+                        {
+                            try
+                            {
+                                var transcription = AudioStreamTranscriptionResult.FromJson(response.Data);
+                                if (!string.IsNullOrEmpty(transcription.Text))
+                                {
+                                    _outputChannel?.Writer.TryWrite(transcription);
+                                }
+                            }
+                            catch (Exception parseEx)
+                            {
+                                // Non-fatal: log and continue if response isn't a transcription result
+                                _logger.LogDebug(parseEx, "Could not parse push response as transcription result");
+                            }
+                        }
+
                         continue;
                     }
 
@@ -332,12 +349,29 @@ public async Task StopAsync(CancellationToken ct = default)
         }
         finally
         {
+            // Parse final transcription from stop response before completing the channel
+            if (response?.Data != null)
+            {
+                try
+                {
+                    var finalResult = AudioStreamTranscriptionResult.FromJson(response.Data);
+                    if (!string.IsNullOrEmpty(finalResult.Text))
+                    {
+                        _outputChannel?.Writer.TryWrite(finalResult);
+                    }
+                }
+                catch (Exception parseEx)
+                {
+                    _logger.LogDebug(parseEx, "Could not parse stop response as transcription result");
+                }
+            }
+
             _sessionHandle = null;
             _started = false;
             _sessionCts?.Dispose();
             _sessionCts = null;
 
-            // 5. Complete the output channel AFTER StopAudioStream returns
+            // Complete the output channel AFTER writing final result
             _outputChannel?.Writer.TryComplete();
         }
 
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs b/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs
index b5a49657..0e2ea1dc 100644
--- a/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/ModelTests.cs
@@ -52,3 +52,5 @@ public async Task GetLastestVersion_Works()
         await Assert.That(latestB).IsEqualTo(variants[1]);
     }
 }
+
+
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
index 55808da9..6da59baf 100644
--- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
@@ -1,452 +1,74 @@
-﻿// --------------------------------------------------------------------------------------------------------------------
-// <copyright company="Microsoft">
-//   Copyright (c) Microsoft. All rights reserved.
-// </copyright>
-// --------------------------------------------------------------------------------------------------------------------
-
-namespace Microsoft.AI.Foundry.Local.Tests;
-
-using System;
-using System.Collections.Generic;
-using System.Runtime.CompilerServices;
-using System.Text.Json;
-
-using Microsoft.AI.Foundry.Local.Detail;
-using Microsoft.Extensions.Configuration;
+﻿using Microsoft.AI.Foundry.Local;
 using Microsoft.Extensions.Logging;
 
-using Microsoft.VisualStudio.TestPlatform.TestHost;
+var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug));
+var logger = loggerFactory.CreateLogger("AudioStreamTest");
 
-using Moq;
+// Point to the directory containing Core + ORT DLLs
+var corePath = @"C:\Users\ruiren\Desktop\audio-stream-test\Microsoft.AI.Foundry.Local.Core.dll";
 
-internal static class Utils
+var config = new Configuration
 {
-    internal struct TestCatalogInfo
-    {
-        internal readonly List<ModelInfo> TestCatalog { get; }
-        internal readonly string ModelListJson { get; }
-
-        internal TestCatalogInfo(bool includeCuda)
-        {
-
-            TestCatalog = Utils.BuildTestCatalog(includeCuda);
-            ModelListJson = JsonSerializer.Serialize(TestCatalog, JsonSerializationContext.Default.ListModelInfo);
-        }
-    }
-
-    internal static readonly TestCatalogInfo TestCatalog = new(true);
-
-    [Before(Assembly)]
-    public static void AssemblyInit(AssemblyHookContext _)
-    {
-        using var loggerFactory = LoggerFactory.Create(builder =>
-        {
-            builder
-                .AddConsole()
-                .SetMinimumLevel(LogLevel.Debug);
-        });
-
-        ILogger logger = loggerFactory.CreateLogger<Program>();
-
-        // Read configuration from appsettings.Test.json
-        logger.LogDebug("Reading configuration from appsettings.Test.json");
-        var configuration = new ConfigurationBuilder()
-            .SetBasePath(Directory.GetCurrentDirectory())
-            .AddJsonFile("appsettings.Test.json", optional: true, reloadOnChange: false)
-            .Build();
-
-        var testModelCacheDirName = "test-data-shared";
-        string testDataSharedPath;
-        if (Path.IsPathRooted(testModelCacheDirName) ||
-            testModelCacheDirName.Contains(Path.DirectorySeparatorChar) ||
-            testModelCacheDirName.Contains(Path.AltDirectorySeparatorChar))
-        {
-            // It's a relative or complete filepath, resolve from current directory
-            testDataSharedPath = Path.GetFullPath(testModelCacheDirName);
-        }
-        else
-        {
-            // It's just a directory name, combine with repo root parent
-            testDataSharedPath = Path.GetFullPath(Path.Combine(GetRepoRoot(), "..", testModelCacheDirName));
-        }
-
-        logger.LogInformation("Using test model cache directory: {testDataSharedPath}", testDataSharedPath);
-
-        if (!Directory.Exists(testDataSharedPath))
-        {
-            throw new DirectoryNotFoundException($"Test model cache directory does not exist: {testDataSharedPath}");
-
-        }
-
-        var config = new Configuration
-        {
-            AppName = "FoundryLocalSdkTest",
-            LogLevel = Local.LogLevel.Debug,
-            Web = new Configuration.WebService
-            {
-                Urls = "http://127.0.0.1:0"
-            },
-            ModelCacheDir = testDataSharedPath,
-            LogsDir = Path.Combine(GetRepoRoot(), "sdk_v2", "cs", "logs")
-        };
-
-        // Initialize the singleton instance.
-        FoundryLocalManager.CreateAsync(config, logger).GetAwaiter().GetResult();
-
-        // standalone instance for testing individual components that skips the 'initialize' command
-        CoreInterop = new CoreInterop(logger);        
-    }
-
-    internal static ICoreInterop CoreInterop { get; private set; } = default!;
-
-    internal static Mock<ILogger> CreateCapturingLoggerMock(List<string> sink)
-    {
-        var mock = new Mock<ILogger>();
-        mock.Setup(x => x.Log(
-                It.IsAny<LogLevel>(),
-                It.IsAny<EventId>(),
-                It.IsAny<It.IsAnyType>(),
-                It.IsAny<Exception?>(),
-                (Func<It.IsAnyType, Exception?, string>)It.IsAny<object>()))
-            .Callback((LogLevel level, EventId id, object state, Exception? ex, Delegate formatter) =>
-            {
-                var message = formatter.DynamicInvoke(state, ex) as string;
-                sink.Add($"{level}: {message}");
-            });
-
-        return mock;
-    }
-
-    internal sealed record InteropCommandInterceptInfo
-    {
-        public string CommandName { get; init; } = default!;
-        public string? CommandInput { get; init; }
-        public string ResponseData { get; init; } = default!;
-        public string? ResponseError { get; init; }
-    }
-
-    internal static Mock<ICoreInterop> CreateCoreInteropWithIntercept(ICoreInterop coreInterop,
-                                                                      List<InteropCommandInterceptInfo> intercepts)
-    {
-        var mock = new Mock<ICoreInterop>();
-        var interceptNames = new HashSet<string>(StringComparer.InvariantCulture);
-
-        foreach (var intercept in intercepts)
-        {
-            if (!interceptNames.Add(intercept.CommandName))
-            {
-                throw new ArgumentException($"Duplicate intercept for command {intercept.CommandName}");
-            }
-
-            mock.Setup(x => x.ExecuteCommand(It.Is<string>(s => s == intercept.CommandName), It.IsAny<CoreInteropRequest?>()))
-                .Returns(new ICoreInterop.Response
-                {
-                    Data = intercept.ResponseData,
-                    Error = intercept.ResponseError
-                });
-
-            mock.Setup(x => x.ExecuteCommandAsync(It.Is<string>(s => s == intercept.CommandName),
-                                                  It.IsAny<CoreInteropRequest?>(),
-                                                  It.IsAny<CancellationToken?>()))
-                .ReturnsAsync(new ICoreInterop.Response
-                {
-                    Data = intercept.ResponseData,
-                    Error = intercept.ResponseError
-                });
-        }
-
-        mock.Setup(x => x.ExecuteCommand(It.Is<string>(s => !interceptNames.Contains(s)),
-                                         It.IsAny<CoreInteropRequest?>()))
-            .Returns((string commandName, CoreInteropRequest? commandInput) =>
-                        coreInterop.ExecuteCommand(commandName, commandInput));
-
-        mock.Setup(x => x.ExecuteCommandAsync(It.Is<string>(s => !interceptNames.Contains(s)),
-                                              It.IsAny<CoreInteropRequest?>(),
-                                              It.IsAny<CancellationToken?>()))
-            .Returns((string commandName, CoreInteropRequest? commandInput, CancellationToken? ct) =>
-                coreInterop.ExecuteCommandAsync(commandName, commandInput, ct));
-
-        return mock;
-    }
-
-    internal static bool IsRunningInCI()
+    AppName = "AudioStreamTest",
+    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Debug,
+    AdditionalSettings = new Dictionary<string, string>
     {
-        var azureDevOps = Environment.GetEnvironmentVariable("TF_BUILD");
-        var githubActions = Environment.GetEnvironmentVariable("GITHUB_ACTIONS");
-        var isCI = string.Equals(azureDevOps, "True", StringComparison.OrdinalIgnoreCase) ||
-                   string.Equals(githubActions, "true", StringComparison.OrdinalIgnoreCase);
-
-        return isCI;
+        { "FoundryLocalCorePath", corePath }
     }
+};
 
-    private static List<ModelInfo> BuildTestCatalog(bool includeCuda = true)
-    {
-        // Mirrors MOCK_CATALOG_DATA ordering and fields (Python tests)
-        var common = new
-        {
-            ProviderType = "AzureFoundry",
-            Version = 1,
-            ModelType = "ONNX",
-            PromptTemplate = (PromptTemplate?)null,
-            Publisher = "Microsoft",
-            Task = "chat-completion",
-            FileSizeMb = 10403,
-            ModelSettings = new ModelSettings { Parameters = [] },
-            SupportsToolCalling = false,
-            License = "MIT",
-            LicenseDescription = "License…",
-            MaxOutputTokens = 1024L,
-            MinFLVersion = "1.0.0",
-        };
-
-        var list = new List<ModelInfo>
-            {
-                // model-1 generic-gpu, generic-cpu:2, generic-cpu:1
-                new()
-                {
-                    Id = "model-1-generic-gpu:1",
-                    Name = "model-1-generic-gpu",
-                    DisplayName = "model-1-generic-gpu",
-                    Uri = "azureml://registries/azureml/models/model-1-generic-gpu/versions/1",
-                    Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" },
-                    Alias = "model-1",
-                    // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1",
-                    ProviderType = common.ProviderType, Version = common.Version, ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate, Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb, ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling, License = common.License,
-                    LicenseDescription = common.LicenseDescription, MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
-                new()
-                {
-                    Id = "model-1-generic-cpu:2",
-                    Name = "model-1-generic-cpu",
-                    DisplayName = "model-1-generic-cpu",
-                    Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/2",
-                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
-                    Alias = "model-1",
-                    // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/2",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version, ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb - 10,  // smaller so default chosen in test that sorts on this
-                    ModelSettings = common.ModelSettings, 
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
-                new()
-                {
-                    Id = "model-1-generic-cpu:1",
-                    Name = "model-1-generic-cpu",
-                    DisplayName = "model-1-generic-cpu",
-                    Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/1",
-                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
-                    Alias = "model-1",
-                    //ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version,
-                    ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb,
-                    ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
+Console.WriteLine("=== Initializing FoundryLocalManager ===");
+await FoundryLocalManager.CreateAsync(config, logger);
+var manager = FoundryLocalManager.Instance;
 
-                // model-2 npu:2, npu:1, generic-cpu:1
-                new()
-                {
-                    Id = "model-2-npu:2",
-                    Name = "model-2-npu",
-                    DisplayName = "model-2-npu",
-                    Uri = "azureml://registries/azureml/models/model-2-npu/versions/2",
-                    Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" },
-                    Alias = "model-2",
-                    //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/2",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version, ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb,
-                    ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
-                new()
-                {
-                    Id = "model-2-npu:1",
-                    Name = "model-2-npu",
-                    DisplayName = "model-2-npu",
-                    Uri = "azureml://registries/azureml/models/model-2-npu/versions/1",
-                    Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" },
-                    Alias = "model-2",
-                    //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version, ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb,
-                    ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
-                new()
-                {
-                    Id = "model-2-generic-cpu:1",
-                    Name = "model-2-generic-cpu",
-                    DisplayName = "model-2-generic-cpu",
-                    Uri = "azureml://registries/azureml/models/model-2-generic-cpu/versions/1",
-                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
-                    Alias = "model-2",
-                    //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version, ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb,
-                    ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
-            };
+Console.WriteLine("=== Getting Catalog ===");
+var catalog = await manager.GetCatalogAsync();
+var models = await catalog.ListModelsAsync();
+Console.WriteLine($"Found {models.Count} models");
 
-        // model-3 cuda-gpu (optional), generic-gpu, generic-cpu
-        if (includeCuda)
-        {
-            list.Add(new ModelInfo
-            {
-                Id = "model-3-cuda-gpu:1",
-                Name = "model-3-cuda-gpu",
-                DisplayName = "model-3-cuda-gpu",
-                Uri = "azureml://registries/azureml/models/model-3-cuda-gpu/versions/1",
-                Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "CUDAExecutionProvider" },
-                Alias = "model-3",
-                //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1",
-                ProviderType = common.ProviderType,
-                Version = common.Version,
-                ModelType = common.ModelType,
-                PromptTemplate = common.PromptTemplate,
-                Publisher = common.Publisher,
-                Task = common.Task,
-                FileSizeMb = common.FileSizeMb,
-                ModelSettings = common.ModelSettings,
-                SupportsToolCalling = common.SupportsToolCalling,
-                License = common.License,
-                LicenseDescription = common.LicenseDescription,
-                MaxOutputTokens = common.MaxOutputTokens,
-                MinFLVersion = common.MinFLVersion
-            });
-        }
-
-        list.AddRange(new[]
-        {
-                new ModelInfo
-                {
-                    Id = "model-3-generic-gpu:1",
-                    Name = "model-3-generic-gpu",
-                    DisplayName = "model-3-generic-gpu",
-                    Uri = "azureml://registries/azureml/models/model-3-generic-gpu/versions/1",
-                    Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" },
-                    Alias = "model-3",
-                    //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version, ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb,
-                    ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                },
-                new ModelInfo
-                {
-                    Id = "model-3-generic-cpu:1",
-                    Name = "model-3-generic-cpu",
-                    DisplayName = "model-3-generic-cpu",
-                    Uri = "azureml://registries/azureml/models/model-3-generic-cpu/versions/1",
-                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
-                    Alias = "model-3",
-                    //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1",
-                    ProviderType = common.ProviderType,
-                    Version = common.Version,
-                    ModelType = common.ModelType,
-                    PromptTemplate = common.PromptTemplate,
-                    Publisher = common.Publisher, Task = common.Task,
-                    FileSizeMb = common.FileSizeMb,
-                    ModelSettings = common.ModelSettings,
-                    SupportsToolCalling = common.SupportsToolCalling,
-                    License = common.License,
-                    LicenseDescription = common.LicenseDescription,
-                    MaxOutputTokens = common.MaxOutputTokens,
-                    MinFLVersion = common.MinFLVersion
-                }
-            });
-
-        // model-4 generic-gpu (nullable prompt)
-        list.Add(new ModelInfo
-        {
-            Id = "model-4-generic-gpu:1",
-            Name = "model-4-generic-gpu",
-            DisplayName = "model-4-generic-gpu",
-            Uri = "azureml://registries/azureml/models/model-4-generic-gpu/versions/1",
-            Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" },
-            Alias = "model-4",
-            //ParentModelUri = "azureml://registries/azureml/models/model-4/versions/1",
-            ProviderType = common.ProviderType,
-            Version = common.Version,
-            ModelType = common.ModelType,
-            PromptTemplate = null,
-            Publisher = common.Publisher,
-            Task = common.Task,
-            FileSizeMb = common.FileSizeMb,
-            ModelSettings = common.ModelSettings,
-            SupportsToolCalling = common.SupportsToolCalling,
-            License = common.License,
-            LicenseDescription = common.LicenseDescription,
-            MaxOutputTokens = common.MaxOutputTokens,
-            MinFLVersion = common.MinFLVersion
-        });
-
-        return list;
-    }
-
-    private static string GetSourceFilePath([CallerFilePath] string path = "") => path;
-
-    // Gets the root directory of the foundry-local-sdk repository by finding the .git directory.
-    private static string GetRepoRoot()
-    {
-        var sourceFile = GetSourceFilePath();
-        var dir = new DirectoryInfo(Path.GetDirectoryName(sourceFile)!);
+// Find and load a whisper model
+var model = await catalog.GetModelAsync("whisper-tiny");
+if (model == null)
+{
+    Console.WriteLine("whisper-tiny not found. Available models:");
+    foreach (var m in models)
+        Console.WriteLine($"  - {m.Alias}");
+    return;
+}
 
-        while (dir != null)
-        {
-            if (Directory.Exists(Path.Combine(dir.FullName, ".git")))
-                return dir.FullName;
+Console.WriteLine($"=== Downloading {model.Alias} ===");
+await model.DownloadAsync(p => Console.Write($"\r  Progress: {p:F1}%"));
+Console.WriteLine();
+
+Console.WriteLine($"=== Loading {model.Alias} ===");
+await model.LoadAsync();
+Console.WriteLine("Model loaded.");
+
+Console.WriteLine("=== Creating streaming session ===");
+var audioClient = await model.GetAudioClientAsync();
+var streamingClient = audioClient.CreateStreamingSession();
+streamingClient.Settings.SampleRate = 16000;
+streamingClient.Settings.Channels = 1;
+streamingClient.Settings.BitsPerSample = 16;
+streamingClient.Settings.Language = "en";
+
+Console.WriteLine("=== Starting streaming session ===");
+await streamingClient.StartAsync();
+Console.WriteLine("Session started!");
+
+// Push some fake PCM data (silence — 100ms at 16kHz 16-bit mono = 3200 bytes)
+var fakePcm = new byte[3200];
+Console.WriteLine("=== Pushing audio chunks ===");
+for (int i = 0; i < 5; i++)
+{
+    await streamingClient.AppendAsync(fakePcm);
+    Console.WriteLine($"  Pushed chunk {i + 1}");
+}
 
-            dir = dir.Parent;
-        }
+Console.WriteLine("=== Stopping session ===");
+await streamingClient.StopAsync();
+Console.WriteLine("Session stopped.");
 
-        throw new InvalidOperationException("Could not find git repository root from test file location");
-    }
-}
+Console.WriteLine("=== Unloading model ===");
+await model.UnloadAsync();
+Console.WriteLine("Done! All plumbing works end-to-end.");
\ No newline at end of file

From f5bd9162c30928435e7bd5e39876313c378a9ad0 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Thu, 12 Mar 2026 18:52:41 -0700
Subject: [PATCH 06/22] update the api

---
 sdk_v2/cs/src/Detail/CoreInterop.cs | 86 ++++-------------------------
 1 file changed, 12 insertions(+), 74 deletions(-)

diff --git a/sdk_v2/cs/src/Detail/CoreInterop.cs b/sdk_v2/cs/src/Detail/CoreInterop.cs
index e4c88e9b..c5eba7ec 100644
--- a/sdk_v2/cs/src/Detail/CoreInterop.cs
+++ b/sdk_v2/cs/src/Detail/CoreInterop.cs
@@ -158,7 +158,12 @@ private static unsafe partial void CoreExecuteCommandWithCallback(RequestBuffer*
                                                                       nint callbackPtr, // NativeCallbackFn pointer
                                                                       nint userData);
 
-    // --- Audio streaming P/Invoke imports ---
+    [LibraryImport(LibraryName, EntryPoint = "execute_command_with_binary")]
+    [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
+    private static unsafe partial void CoreExecuteCommandWithBinary(StreamingRequestBuffer* nativeRequest,
+                                                                     ResponseBuffer* nativeResponse);
+
+    // --- Audio streaming P/Invoke imports (kept for future dedicated entry points) ---
 
     [LibraryImport(LibraryName, EntryPoint = "audio_stream_start")]
     [UnmanagedCallConv(CallConvs = new[] { typeof(System.Runtime.CompilerServices.CallConvCdecl) })]
@@ -377,45 +382,13 @@ private Response MarshalResponse(ResponseBuffer response)
     }
 
     // --- Audio streaming managed implementations ---
+    // Route through the existing execute_command / execute_command_with_binary entry points.
+    // The Core handles audio_stream_start / audio_stream_stop as command cases in ExecuteCommandManaged,
+    // and audio_stream_push as a command case in ExecuteCommandWithBinaryManaged.
 
     public Response StartAudioStream(CoreInteropRequest request)
     {
-        try
-        {
-            var commandInputJson = request.ToJson();
-            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
-
-            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
-            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
-
-            unsafe
-            {
-                var reqBuf = new RequestBuffer
-                {
-                    Command = IntPtr.Zero,
-                    CommandLength = 0,
-                    Data = inputPtr,
-                    DataLength = inputBytes.Length
-                };
-
-                ResponseBuffer response = default;
-
-                try
-                {
-                    CoreAudioStreamStart(&reqBuf, &response);
-                }
-                finally
-                {
-                    Marshal.FreeHGlobal(inputPtr);
-                }
-
-                return MarshalResponse(response);
-            }
-        }
-        catch (Exception ex) when (ex is not OperationCanceledException)
-        {
-            throw new FoundryLocalException("Error executing audio_stream_start", ex, _logger);
-        }
+        return ExecuteCommand("audio_stream_start", request);
     }
 
     public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> audioData)
@@ -451,7 +424,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
 
                 try
                 {
-                    CoreAudioStreamPush(&reqBuf, &response);
+                    CoreExecuteCommandWithBinary(&reqBuf, &response);
                 }
                 finally
                 {
@@ -470,42 +443,7 @@ public Response PushAudioData(CoreInteropRequest request, ReadOnlyMemory<byte> a
 
     public Response StopAudioStream(CoreInteropRequest request)
     {
-        try
-        {
-            var commandInputJson = request.ToJson();
-            byte[] inputBytes = System.Text.Encoding.UTF8.GetBytes(commandInputJson);
-
-            IntPtr inputPtr = Marshal.AllocHGlobal(inputBytes.Length);
-            Marshal.Copy(inputBytes, 0, inputPtr, inputBytes.Length);
-
-            unsafe
-            {
-                var reqBuf = new RequestBuffer
-                {
-                    Command = IntPtr.Zero,
-                    CommandLength = 0,
-                    Data = inputPtr,
-                    DataLength = inputBytes.Length
-                };
-
-                ResponseBuffer response = default;
-
-                try
-                {
-                    CoreAudioStreamStop(&reqBuf, &response);
-                }
-                finally
-                {
-                    Marshal.FreeHGlobal(inputPtr);
-                }
-
-                return MarshalResponse(response);
-            }
-        }
-        catch (Exception ex) when (ex is not OperationCanceledException)
-        {
-            throw new FoundryLocalException("Error executing audio_stream_stop", ex, _logger);
-        }
+        return ExecuteCommand("audio_stream_stop", request);
     }
 
 }

From 6d067e086ed8c294006f961585233d0421e4da2f Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Fri, 13 Mar 2026 12:15:55 -0700
Subject: [PATCH 07/22] rename LiveAudioTranscription

---
 .../cs/src/Detail/JsonSerializationContext.cs |  2 +-
 sdk_v2/cs/src/OpenAI/AudioClient.cs           |  4 ++--
 ...ent.cs => LiveAudioTranscriptionClient.cs} | 24 +++++++++----------
 ...ypes.cs => LiveAudioTranscriptionTypes.cs} |  8 +++----
 4 files changed, 19 insertions(+), 19 deletions(-)
 rename sdk_v2/cs/src/OpenAI/{AudioStreamingClient.cs => LiveAudioTranscriptionClient.cs} (94%)
 rename sdk_v2/cs/src/OpenAI/{AudioStreamTranscriptionTypes.cs => LiveAudioTranscriptionTypes.cs} (91%)

diff --git a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
index 3cc079f3..9ca3f539 100644
--- a/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
+++ b/sdk_v2/cs/src/Detail/JsonSerializationContext.cs
@@ -34,7 +34,7 @@ namespace Microsoft.AI.Foundry.Local.Detail;
 [JsonSerializable(typeof(PropertyDefinition))]
 [JsonSerializable(typeof(IList<PropertyDefinition>))]
 // --- NEW: Audio streaming types ---
-[JsonSerializable(typeof(AudioStreamTranscriptionResult))]
+[JsonSerializable(typeof(LiveAudioTranscriptionResult))]
 [JsonSerializable(typeof(CoreErrorResponse))]
 [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
                              WriteIndented = false)]
diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
index 1f44996b..e2088901 100644
--- a/sdk_v2/cs/src/OpenAI/AudioClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -48,9 +48,9 @@ public record AudioSettings
     /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
     /// </summary>
     /// <returns>A streaming session that must be disposed when done.</returns>
-    public AudioTranscriptionStreamSession CreateStreamingSession()
+    public LiveAudioTranscriptionSession CreateLiveTranscriptionSession()
     {
-        return new AudioTranscriptionStreamSession(_modelId);
+        return new LiveAudioTranscriptionSession(_modelId);
     }
 
     /// <summary>
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
similarity index 94%
rename from sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
rename to sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
index f0a1904d..0c9e6477 100644
--- a/sdk_v2/cs/src/OpenAI/AudioStreamingClient.cs
+++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
@@ -19,7 +19,7 @@ namespace Microsoft.AI.Foundry.Local;
 /// Audio data from a microphone (or other source) is pushed in as PCM chunks,
 /// and transcription results are returned as an async stream.
 ///
-/// Created via <see cref="OpenAIAudioClient.CreateStreamingSessionAsync"/>.
+/// Created via <see cref="OpenAIAudioClient.CreateLiveTranscriptionSession"/>.
 ///
 /// Thread safety: PushAudioAsync can be called from any thread (including high-frequency
 /// audio callbacks). Pushes are internally serialized via a bounded channel to prevent
@@ -27,7 +27,7 @@ namespace Microsoft.AI.Foundry.Local;
 /// </summary>
 
 
-public sealed class AudioTranscriptionStreamSession : IAsyncDisposable
+public sealed class LiveAudioTranscriptionSession : IAsyncDisposable
 {
     private readonly string _modelId;
     private readonly ICoreInterop _coreInterop = FoundryLocalManager.Instance.CoreInterop;
@@ -40,7 +40,7 @@ public sealed class AudioTranscriptionStreamSession : IAsyncDisposable
     private bool _stopped;
 
     // Output channel: native callback writes, user reads via GetTranscriptionStream
-    private Channel<AudioStreamTranscriptionResult>? _outputChannel;
+    private Channel<LiveAudioTranscriptionResult>? _outputChannel;
 
     // Internal push queue: user writes audio chunks, background loop drains to native core.
     // Bounded to prevent unbounded memory growth if native core is slower than real-time.
@@ -52,14 +52,14 @@ public sealed class AudioTranscriptionStreamSession : IAsyncDisposable
     private CancellationTokenSource? _sessionCts;
 
     // Snapshot of settings captured at StartAsync — prevents mutation after session starts.
-    private AudioStreamTranscriptionOptions? _activeSettings;
+    private LiveAudioTranscriptionOptions? _activeSettings;
 
     /// <summary>
     /// Audio format settings for the streaming session.
     /// Must be configured before calling <see cref="StartAsync"/>.
     /// Settings are frozen once the session starts.
     /// </summary>
-    public record AudioStreamTranscriptionOptions
+    public record LiveAudioTranscriptionOptions
     {
         /// <summary>PCM sample rate in Hz. Default: 16000.</summary>
         public int SampleRate { get; set; } = 16000;
@@ -77,12 +77,12 @@ public record AudioStreamTranscriptionOptions
         /// </summary>
         public int PushQueueCapacity { get; set; } = 100;
 
-        internal AudioStreamTranscriptionOptions Snapshot() => this with { }; // record copy
+        internal LiveAudioTranscriptionOptions Snapshot() => this with { }; // record copy
     }
 
-    public AudioStreamTranscriptionOptions Settings { get; } = new();
+    public LiveAudioTranscriptionOptions Settings { get; } = new();
 
-    internal AudioTranscriptionStreamSession(string modelId)
+    internal LiveAudioTranscriptionSession(string modelId)
     {
         _modelId = modelId;
     }
@@ -105,7 +105,7 @@ public async Task StartAsync(CancellationToken ct = default)
         // Freeze settings
         _activeSettings = Settings.Snapshot();
 
-        _outputChannel = Channel.CreateUnbounded<AudioStreamTranscriptionResult>(
+        _outputChannel = Channel.CreateUnbounded<LiveAudioTranscriptionResult>(
             new UnboundedChannelOptions
             {
                 SingleWriter = true,  // only the native callback writes
@@ -215,7 +215,7 @@ private async Task PushLoopAsync(CancellationToken ct)
                         {
                             try
                             {
-                                var transcription = AudioStreamTranscriptionResult.FromJson(response.Data);
+                                var transcription = LiveAudioTranscriptionResult.FromJson(response.Data);
                                 if (!string.IsNullOrEmpty(transcription.Text))
                                 {
                                     _outputChannel?.Writer.TryWrite(transcription);
@@ -273,7 +273,7 @@ private async Task PushLoopAsync(CancellationToken ct)
     /// </summary>
     /// <param name="ct">Cancellation token.</param>
     /// <returns>Async enumerable of transcription results.</returns>
-    public async IAsyncEnumerable<AudioStreamTranscriptionResult> GetTranscriptionStream(
+    public async IAsyncEnumerable<LiveAudioTranscriptionResult> GetTranscriptionStream(
         [EnumeratorCancellation] CancellationToken ct = default)
     {
         if (_outputChannel == null)
@@ -354,7 +354,7 @@ public async Task StopAsync(CancellationToken ct = default)
             {
                 try
                 {
-                    var finalResult = AudioStreamTranscriptionResult.FromJson(response.Data);
+                    var finalResult = LiveAudioTranscriptionResult.FromJson(response.Data);
                     if (!string.IsNullOrEmpty(finalResult.Text))
                     {
                         _outputChannel?.Writer.TryWrite(finalResult);
diff --git a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
similarity index 91%
rename from sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
rename to sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
index 02c4169e..33820836 100644
--- a/sdk_v2/cs/src/OpenAI/AudioStreamTranscriptionTypes.cs
+++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
@@ -4,7 +4,7 @@ namespace Microsoft.AI.Foundry.Local;
 using System.Text.Json.Serialization;
 using Microsoft.AI.Foundry.Local.Detail;
 
-public record AudioStreamTranscriptionResult
+public record LiveAudioTranscriptionResult
 {
     /// <summary>
     /// Whether this is a final or partial (interim) result.
@@ -35,11 +35,11 @@ public record AudioStreamTranscriptionResult
     [JsonPropertyName("confidence")]
     public float? Confidence { get; init; }
 
-    internal static AudioStreamTranscriptionResult FromJson(string json)
+    internal static LiveAudioTranscriptionResult FromJson(string json)
     {
         return JsonSerializer.Deserialize(json,
-            JsonSerializationContext.Default.AudioStreamTranscriptionResult)
-            ?? throw new FoundryLocalException("Failed to deserialize AudioStreamTranscriptionResult");
+            JsonSerializationContext.Default.LiveAudioTranscriptionResult)
+            ?? throw new FoundryLocalException("Failed to deserialize LiveAudioTranscriptionResult");
     }
 }
 

From 6dee740b1770c3abd8602572e8874f9169e526db Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Fri, 13 Mar 2026 13:20:51 -0700
Subject: [PATCH 08/22] fix: add missing using directives for
 EnumeratorCancellation and Channel

---
 sdk_v2/cs/src/OpenAI/AudioClient.cs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
index 1a402ca6..e7529284 100644
--- a/sdk_v2/cs/src/OpenAI/AudioClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -6,6 +6,8 @@
 
 namespace Microsoft.AI.Foundry.Local;
 
+using System.Runtime.CompilerServices;
+using System.Threading.Channels;
 using Betalgo.Ranul.OpenAI.ObjectModels.RequestModels;
 using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
 

From b89e1bd285c328e6091d7beaf47cc9de27603b67 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Fri, 13 Mar 2026 13:26:38 -0700
Subject: [PATCH 09/22] update test

---
 sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
index 6da59baf..6b71921a 100644
--- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
@@ -46,7 +46,7 @@
 
 Console.WriteLine("=== Creating streaming session ===");
 var audioClient = await model.GetAudioClientAsync();
-var streamingClient = audioClient.CreateStreamingSession();
+var streamingClient = audioClient.CreateLiveTranscriptionSession();
 streamingClient.Settings.SampleRate = 16000;
 streamingClient.Settings.Channels = 1;
 streamingClient.Settings.BitsPerSample = 16;

From eb9f282ff3be8403dbd06dc368f2a874668c24d6 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 17 Mar 2026 20:42:16 -0700
Subject: [PATCH 10/22] e2e test

---
 .../LiveAudioTranscription.csproj             |  30 ++++
 samples/cs/LiveAudioTranscription/Program.cs  | 169 ++++++++++++++++++
 samples/cs/LiveAudioTranscription/README.md   | 143 +++++++++++++++
 3 files changed, 342 insertions(+)
 create mode 100644 samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
 create mode 100644 samples/cs/LiveAudioTranscription/Program.cs
 create mode 100644 samples/cs/LiveAudioTranscription/README.md

diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
new file mode 100644
index 00000000..a816d2ba
--- /dev/null
+++ b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
@@ -0,0 +1,30 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\sdk_v2\cs\src\Microsoft.AI.Foundry.Local.csproj" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
+    <PackageReference Include="NAudio" Version="2.2.1" />
+  </ItemGroup>
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net9.0</TargetFramework>
+    <RuntimeIdentifier>win-x64</RuntimeIdentifier>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <!-- Copy Core + ORT native DLLs to output directory.
+       These must be placed in the project root before building.
+       See README.md for instructions. -->
+  <ItemGroup>
+    <None Include="Microsoft.AI.Foundry.Local.Core.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('Microsoft.AI.Foundry.Local.Core.dll')" />
+    <None Include="onnxruntime-genai.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime-genai.dll')" />
+    <None Include="onnxruntime.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime.dll')" />
+    <None Include="onnxruntime_providers_shared.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime_providers_shared.dll')" />
+  </ItemGroup>
+
+</Project>
diff --git a/samples/cs/LiveAudioTranscription/Program.cs b/samples/cs/LiveAudioTranscription/Program.cs
new file mode 100644
index 00000000..c0ecee95
--- /dev/null
+++ b/samples/cs/LiveAudioTranscription/Program.cs
@@ -0,0 +1,169 @@
+// Live Audio Transcription — Foundry Local SDK Example
+//
+// Demonstrates real-time microphone-to-text using:
+//   SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
+//
+// Prerequisites:
+//   1. Nemotron ASR model downloaded to a local cache folder
+//   2. Microsoft.AI.Foundry.Local.Core.dll (built from neutron-server with GenAI 0.13.0+)
+//   3. onnxruntime-genai.dll + onnxruntime.dll + onnxruntime_providers_shared.dll (native GenAI)
+//
+// Usage:
+//   dotnet run -- [model-cache-dir]
+//   dotnet run -- C:\path\to\models
+
+using Microsoft.AI.Foundry.Local;
+using Microsoft.Extensions.Logging;
+using NAudio.Wave;
+
+// Parse model cache directory from args or use default
+var modelCacheDir = args.Length > 0
+    ? args[0]
+    : Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
+                    "FoundryLocal", "models");
+
+var coreDllPath = Path.Combine(AppContext.BaseDirectory, "Microsoft.AI.Foundry.Local.Core.dll");
+
+var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information));
+var logger = loggerFactory.CreateLogger("LiveAudioTranscription");
+
+Console.WriteLine("===========================================================");
+Console.WriteLine("   Foundry Local -- Live Audio Transcription Demo");
+Console.WriteLine("===========================================================");
+Console.WriteLine();
+Console.WriteLine($"  Model cache: {modelCacheDir}");
+Console.WriteLine($"  Core DLL:    {coreDllPath} (exists: {File.Exists(coreDllPath)})");
+Console.WriteLine();
+
+try
+{
+    // === Step 1: Initialize Foundry Local SDK ===
+    Console.WriteLine("[1/5] Initializing Foundry Local SDK...");
+    var config = new Configuration
+    {
+        AppName = "LiveAudioTranscription",
+        LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
+        ModelCacheDir = modelCacheDir,
+        AdditionalSettings = new Dictionary<string, string>
+        {
+            { "FoundryLocalCorePath", coreDllPath }
+        }
+    };
+
+    await FoundryLocalManager.CreateAsync(config, logger);
+    Console.WriteLine("       SDK initialized.");
+
+    // === Step 2: Find and load the nemotron ASR model ===
+    Console.WriteLine("[2/5] Loading nemotron model...");
+    var catalog = await FoundryLocalManager.Instance.GetCatalogAsync();
+    var model = await catalog.GetModelAsync("nemotron");
+
+    if (model == null)
+    {
+        Console.WriteLine("ERROR: 'nemotron' not found in catalog.");
+        Console.WriteLine($"       Ensure the model is downloaded to: {modelCacheDir}");
+        Console.WriteLine("       The folder should contain genai_config.json, encoder.onnx, decoder.onnx, etc.");
+        return;
+    }
+
+    Console.WriteLine($"       Found model: {model.Alias}");
+    await model.LoadAsync();
+    Console.WriteLine("       Model loaded.");
+
+    // === Step 3: Create live transcription session ===
+    Console.WriteLine("[3/5] Creating live transcription session...");
+    var audioClient = await model.GetAudioClientAsync();
+    var session = audioClient.CreateLiveTranscriptionSession();
+    session.Settings.SampleRate = 16000;
+    session.Settings.Channels = 1;
+    session.Settings.Language = "en";
+
+    await session.StartAsync();
+    Console.WriteLine("       Session started (SDK -> Core -> GenAI pipeline active).");
+
+    // === Step 4: Set up microphone + transcription reader ===
+    Console.WriteLine("[4/5] Setting up microphone...");
+
+    // Background task reads transcription results as they arrive
+    var readTask = Task.Run(async () =>
+    {
+        try
+        {
+            await foreach (var result in session.GetTranscriptionStream())
+            {
+                if (result.IsFinal)
+                {
+                    Console.WriteLine();
+                    Console.WriteLine($"  [FINAL] {result.Text}");
+                    Console.Out.Flush();
+                }
+                else if (!string.IsNullOrEmpty(result.Text))
+                {
+                    Console.ForegroundColor = ConsoleColor.Cyan;
+                    Console.Write(result.Text);
+                    Console.ResetColor();
+                    Console.Out.Flush();
+                }
+            }
+        }
+        catch (OperationCanceledException) { }
+    });
+
+    // Microphone capture via NAudio
+    using var waveIn = new WaveInEvent
+    {
+        WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1),
+        BufferMilliseconds = 100
+    };
+
+    int totalChunks = 0;
+    long totalBytes = 0;
+
+    waveIn.DataAvailable += (sender, e) =>
+    {
+        if (e.BytesRecorded > 0)
+        {
+            _ = session.AppendAsync(new ReadOnlyMemory<byte>(e.Buffer, 0, e.BytesRecorded));
+            totalChunks++;
+            totalBytes += e.BytesRecorded;
+        }
+    };
+
+    // === Step 5: Record ===
+    Console.WriteLine();
+    Console.WriteLine("===========================================================");
+    Console.WriteLine("  LIVE TRANSCRIPTION ACTIVE");
+    Console.WriteLine("  Speak into your microphone.");
+    Console.WriteLine("  Transcription appears in real-time (cyan text).");
+    Console.WriteLine("  Press ENTER to stop recording.");
+    Console.WriteLine("===========================================================");
+    Console.WriteLine();
+
+    waveIn.StartRecording();
+    Console.ReadLine();
+    waveIn.StopRecording();
+
+    var totalSeconds = totalBytes / (16000.0 * 2);
+    Console.WriteLine($"\n  Recording: {totalSeconds:F1}s | {totalChunks} chunks | {totalBytes / 1024} KB");
+
+    // Stop session (flushes remaining audio through the pipeline)
+    Console.WriteLine("\n[5/5] Stopping session...");
+    await session.StopAsync();
+    await readTask;
+
+    // Unload model
+    await model.UnloadAsync();
+
+    Console.WriteLine();
+    Console.WriteLine("===========================================================");
+    Console.WriteLine("  Demo complete!");
+    Console.WriteLine("  Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text");
+    Console.WriteLine("===========================================================");
+}
+catch (Exception ex)
+{
+    Console.WriteLine($"\nERROR: {ex.Message}");
+    if (ex.InnerException != null)
+        Console.WriteLine($"Inner: {ex.InnerException.Message}");
+    Console.WriteLine($"\n{ex.StackTrace}");
+}
diff --git a/samples/cs/LiveAudioTranscription/README.md b/samples/cs/LiveAudioTranscription/README.md
new file mode 100644
index 00000000..f4897524
--- /dev/null
+++ b/samples/cs/LiveAudioTranscription/README.md
@@ -0,0 +1,143 @@
+# Live Audio Transcription Demo
+
+Real-time microphone-to-text using Foundry Local SDK, Core, and onnxruntime-genai.
+
+## Architecture
+
+```
+Microphone (NAudio, 16kHz/16-bit/mono)
+    |
+    v
+Foundry Local SDK (C#)
+    | AppendAsync(pcmBytes)
+    v
+Foundry Local Core (NativeAOT DLL)
+    | AppendAudioChunk -> CommitTranscription
+    v
+onnxruntime-genai (StreamingProcessor + Generator)
+    | RNNT encoder + decoder
+    v
+Live transcription text
+```
+
+## Prerequisites
+
+1. **Windows x64** with a microphone
+2. **.NET 9.0 SDK** installed
+3. **Nemotron ASR model** downloaded locally
+4. **Native DLLs** (4 files — see Setup below)
+
+## Setup (Step by Step)
+
+### Step 1: Get the native DLLs
+
+You need 4 DLLs placed in this project folder:
+
+| DLL | Source |
+|-----|--------|
+| `Microsoft.AI.Foundry.Local.Core.dll` | Built from neutron-server (`dotnet publish` with NativeAOT) |
+| `onnxruntime-genai.dll` | Built from onnxruntime-genai (Nenad's StreamingProcessor branch) |
+| `onnxruntime.dll` | Comes with the Core publish output |
+| `onnxruntime_providers_shared.dll` | Comes with the Core publish output |
+
+**Option A: From CI artifacts**
+- Download the Core DLL from the neutron-server CI pipeline artifacts
+- Download the GenAI native DLLs from the onnxruntime-genai pipeline artifacts
+
+**Option B: From a teammate**
+- Ask for the 4 DLLs from someone who has already built them
+
+Copy all 4 DLLs to this folder (`samples/cs/LiveAudioTranscription/`).
+
+### Step 2: Get the Nemotron model
+
+The model should be in a folder with this structure:
+```
+models/
+  nemotron/
+    genai_config.json
+    encoder.onnx
+    decoder.onnx
+    joint.onnx
+    tokenizer.json
+    vocab.txt
+```
+
+### Step 3: Build
+
+```powershell
+cd samples/cs/LiveAudioTranscription
+dotnet build -c Debug
+```
+
+### Step 4: Copy native DLLs to output (if not auto-copied)
+
+```powershell
+Copy-Item onnxruntime-genai.dll bin\Debug\net9.0\win-x64\ -Force
+Copy-Item onnxruntime.dll bin\Debug\net9.0\win-x64\ -Force
+Copy-Item onnxruntime_providers_shared.dll bin\Debug\net9.0\win-x64\ -Force
+Copy-Item Microsoft.AI.Foundry.Local.Core.dll bin\Debug\net9.0\win-x64\ -Force
+```
+
+### Step 5: Run
+
+```powershell
+# Default model cache location
+dotnet run -c Debug --no-build
+
+# Or specify model cache directory
+dotnet run -c Debug --no-build -- C:\path\to\models
+```
+
+### Step 6: Speak!
+
+- The app will show `LIVE TRANSCRIPTION ACTIVE`
+- Speak into your microphone
+- Text appears in **cyan** as you speak
+- Press **ENTER** to stop
+
+## Expected Output
+
+```
+===========================================================
+   Foundry Local -- Live Audio Transcription Demo
+===========================================================
+
+[1/5] Initializing Foundry Local SDK...
+       SDK initialized.
+[2/5] Loading nemotron model...
+       Found model: nemotron
+       Model loaded.
+[3/5] Creating live transcription session...
+       Session started (SDK -> Core -> GenAI pipeline active).
+[4/5] Setting up microphone...
+
+===========================================================
+  LIVE TRANSCRIPTION ACTIVE
+  Speak into your microphone.
+  Transcription appears in real-time (cyan text).
+  Press ENTER to stop recording.
+===========================================================
+
+Hello this is a demo of live audio transcription running entirely on device
+  [FINAL] Hello this is a demo of live audio transcription running entirely on device
+
+  Recording: 15.2s | 152 chunks | 475 KB
+
+[5/5] Stopping session...
+
+===========================================================
+  Demo complete!
+  Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text
+===========================================================
+```
+
+## Troubleshooting
+
+| Error | Fix |
+|-------|-----|
+| `Core DLL not found` | Copy `Microsoft.AI.Foundry.Local.Core.dll` to project folder |
+| `nemotron not found in catalog` | Check `ModelCacheDir` points to folder containing `nemotron/` with `genai_config.json` |
+| `OgaStreamingProcessor not found` | The `onnxruntime-genai.dll` is old — rebuild from Nenad's branch or get from CI |
+| `No microphone` | Ensure a mic is connected and set as default recording device |
+| `num_mels unknown` | Fix `genai_config.json` — ASR params must be at model level, not nested under `speech` |

From 5e981195fde7704d91f18423fea29da52a545657 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 17 Mar 2026 20:49:26 -0700
Subject: [PATCH 11/22] update for test

---
 samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
index a816d2ba..fb1a95a3 100644
--- a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
+++ b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
@@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">
 
   <ItemGroup>
-    <ProjectReference Include="..\..\sdk_v2\cs\src\Microsoft.AI.Foundry.Local.csproj" />
+    <ProjectReference Include="..\..\..\sdk_v2\cs\src\Microsoft.AI.Foundry.Local.csproj" />
   </ItemGroup>
 
   <ItemGroup>

From d2e35138c2530b640d96ec7f78c08a688c82ad53 Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Mar 2026 10:36:56 -0700
Subject: [PATCH 12/22] Fix C# SDK audio streaming PR: namespace corrections,
 restored public API, sample restructure (#538)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolves all 23 review comments on the live audio transcription PR
(`ruiren/audio-streaming-support-sdk`), including merge conflict
resolution. Covers namespace fixes, a removed-but-needed public method,
test file restoration, and sample reorganization.

## SDK fixes (`sdk_v2/cs/src/`)

- **`OpenAI/AudioClient.cs`**: Restored `TranscribeAudioStreamingAsync`
public method — was accidentally removed; `AudioTranscriptionExample`
depends on it
- **`OpenAI/LiveAudioTranscriptionClient.cs`** +
**`LiveAudioTranscriptionTypes.cs`**: Changed namespace
`Microsoft.AI.Foundry.Local` → `Microsoft.AI.Foundry.Local.OpenAI`
(consistent with `ToolCallingExtensions.cs`,
`AudioTranscriptionRequestResponseTypes.cs`); added required `using
Microsoft.AI.Foundry.Local;`
- **`OpenAI/LiveAudioTranscriptionClient.cs`**: Removed unused `using
System.Runtime.InteropServices` (would fail build with
`TreatWarningsAsErrors=true`); fixed XML doc `PushAudioAsync` →
`AppendAsync`; removed leftover `#pragma warning disable` directives;
cleaned up double blank lines
- **`OpenAI/LiveAudioTranscriptionTypes.cs`**: Removed `Confidence`
property — not populated by any code path
- **`AssemblyInfo.cs`**: Removed `InternalsVisibleTo("AudioStreamTest")`
— local dev artifact, not for shipped SDK

## Test fix (`sdk_v2/cs/test/`)

- **`Utils.cs`**: Restored original
`Microsoft.AI.Foundry.Local.Tests.Utils` class from main — file was
completely overwritten with a top-level executable test script, breaking
all existing tests that reference `Utils.CoreInterop`,
`Utils.IsRunningInCI`, etc.

## Sample restructure (`samples/cs/`)

- Removed standalone `samples/cs/LiveAudioTranscription/` (csproj,
Program.cs, README)
- Added
`samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs`
— follows `HelloFoundryLocalSdk` pattern using `Utils.GetAppLogger()`,
`Utils.RunWithSpinner()`, `catalog.GetModelAsync()`; removed hardcoded
DLL paths, model cache dir override, `BitsPerSample=16` (property
doesn't exist), and debug diagnostics
- Added cross-platform and Windows `.csproj` files under
`GettingStarted/cross-platform/` and `GettingStarted/windows/` matching
the structure of `AudioTranscriptionExample`

> [!WARNING]
>
> <details>
> <summary>Firewall rules blocked me from connecting to one or more
addresses (expand for details)</summary>
>
> #### I tried to connect to the following addresses, but was blocked by
firewall rules:
>
> - `0t3vsblobprodcus362.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/B2063432E236EB2499F756DC7AEAC028/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block)
> - `1javsblobprodcus364.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/CDD8923456756250B6AF4E42CA6F8DFB/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block)
> - `1s1vsblobprodcus386.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force` (dns block)
> - `4zjvsblobprodcus390.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/79820580DC01B1F2024CE1D67DCA3751/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block)
> - `51yvsblobprodcus36.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/CDD8923456756250B6AF4E42CA6F8DFB/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block)
> - `80zvsblobprodcus35.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force` (dns block)
> - `aiinfra.pkgs.visualstudio.com`
> - Triggering command:
`/opt/hostedtoolcache/CodeQL/2.24.3/x64/codeql/csharp/tools/linux64/Semmle.Autobuild.CSharp
/opt/hostedtoolcache/CodeQL/2.24.3/x64/codeql/csharp/tools/linux64/Semmle.Autobuild.CSharp`
(dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/samples/cs/GettingStarted/cross-platform/FoundrySamplesXPlatform.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/samples/cs/GettingStarted/cross-platform/AudioTranscriptionExample/AudioTranscriptionExample.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - `c50vsblobprodcus330.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - `frdvsblobprodcus327.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - `i1qvsblobprodcus353.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - `imzvsblobprodcus368.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - `k0ivsblobprodcus356.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/B2063432E236EB2499F756DC7AEAC028/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force ng/emptyFakeDotnetRoot ing/emptyFakeDotnetRoot` (dns block)
> - `kxqvsblobprodcus376.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/test/FoundryLocal.Tests/Microsoft.AI.Foundry.Local.Tests.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - `m16vsblobprodcus374.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force` (dns block)
> - `s8mvsblobprodcus38.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/EFEB4E95C962CAA7DA01DE9B7C9E5F4D/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force` (dns block)
> - `se1vsblobprodcus349.vsblob.vsassets.io`
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/Microsoft.AI.Foundry.Local.SDK.sln
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/home/REDACTED/work/Foundry-Local/Foundry-Local/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/packages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
/p:TargetFrameworkRootPath=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:NetCoreTargetingPackRoot=/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/emptyFakeDotnetRoot
/p:AllowMissingPrunePackageData=true` (dns block)
> - Triggering command: `/usr/bin/dotnet dotnet restore
--no-dependencies
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/63E6685CBF8FE43B2889F9BB97016C00/missingpackages_workingdir
--packages
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/missingpackages
/p:DisableImplicitNuGetFallbackFolder=true --verbosity normal
--configfile
/tmp/codeql-scratch-1a696f058c3bb324/dbs/csharp/working/nugetconfig/nuget.config
--force` (dns block)
>
> If you need me to access, download, or install something from one of
these locations, you can either:
>
> - Configure [Actions setup
steps](https://gh.io/copilot/actions-setup-steps) to set up my
environment, which run before the firewall is enabled
> - Add the appropriate URLs or hosts to the custom allowlist in this
repository's [Copilot coding agent
settings](https://github.com/microsoft/Foundry-Local/settings/copilot/coding_agent)
(admins only)
>
> </details>

<!-- START COPILOT ORIGINAL PROMPT -->



<details>

<summary>Original prompt</summary>


## Context
PR #485 (branch `ruiren/audio-streaming-support-sdk` targeting `main`)
in microsoft/Foundry-Local adds live audio transcription streaming
support to the Foundry Local C# SDK. It currently has merge conflicts
with `main` and 23 review comments from Copilot bot and @kunal-vaishnavi
that all need to be resolved.

## Task 1: Merge main branch and resolve conflicts
The PR's `mergeable_state` is "dirty". Merge `main` into
`ruiren/audio-streaming-support-sdk` and resolve all conflicts, ensuring
the PR author's new code is preserved while incorporating any changes
from main.

## Task 2: Resolve ALL of the following review comments

### SDK Source Code Fixes:

1. **`sdk/cs/src/Detail/JsonSerializationContext.cs`**: The file is in
namespace `Microsoft.AI.Foundry.Local.Detail` but references
`LiveAudioTranscriptionResult` and `CoreErrorResponse` which will be in
namespace `Microsoft.AI.Foundry.Local.OpenAI` (see fix #8 below). Add a
`using Microsoft.AI.Foundry.Local.OpenAI;` statement (this using may
already exist from main, just ensure the types resolve correctly after
the namespace change).

2. **`sdk/cs/src/OpenAI/AudioClient.cs`**: The public
`TranscribeAudioStreamingAsync(...)` method was removed in the PR but
the private `TranscribeAudioStreamingImplAsync(...)` still exists.
**Restore the public `TranscribeAudioStreamingAsync` method** that wraps
the private impl. This is used by speech-to-text models like Whisper and
must NOT be removed. The original version from main is:
```csharp
public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
    string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
{
    var enumerable = Utils.CallWithExceptionHandling(
        () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
        "Error during streaming audio transcription.", _logger).ConfigureAwait(false);

    await foreach (var item in enumerable)
    {
        yield return item;
    }
}
```

3. **`sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs`**:
- Remove `using System.Runtime.InteropServices;` — it is unused and
`TreatWarningsAsErrors=true` means this will cause CS8019 build failure.
- Fix the XML doc comment that says "Thread safety: PushAudioAsync can
be called from any thread" — change it to reference `AppendAsync`
instead of `PushAudioAsync`.
- Remove `#pragma warning disable` directives if they are not necessary.
The reviewer asked why they're needed — they appear to be from
development and should be removed for a clean PR.

4. **`sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs`**:
- Change namespace from `Microsoft.AI.Foundry.Local` to
`Microsoft.AI.Foundry.Local.OpenAI` (since the file is in the OpenAI
folder, it should match the folder-based namespace convention used by
the rest of the codebase).
- Remove the `Confidence` property from `LiveAudioTranscriptionResult`
if it is not being calculated/populated. The reviewer asked and it
appears not to be calculated.

5. **`sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs`**:
- Also change namespace from `Microsoft.AI.Foundry.Local` to
`Microsoft.AI.Foundry.Local.OpenAI` (same reason as above — the file is
in the OpenAI folder).

6. **`sdk/cs/src/Microsoft.AI.Foundry.Local.csproj`**: Remove the
`InternalsVisibleTo("AudioStreamTest")` attribute/assembly attribute.
This was only needed for local experimentation and should not be in the
shipped SDK.

7. **Remove trailing blank lines** in any files that have extra trailing
blank lines added by this PR.

### Test File Fix:

8. **`sdk/cs/test/FoundryLocal.Tests/Utils.cs`**: This file was
completely rewritten in the PR with top-level executable code and a
hardcoded Core DLL path. It must be **restored to its original content
from main**. The original file defines the
`Microsoft.AI.Foundry.Local.Tests.Utils` helper class with
`TestCatalogInfo`, `AssemblyInit`, `CoreInterop`,
`CreateCapturingLoggerMock`, `CreateCoreInteropWithIntercept`,
`IsRunningInCI`, `BuildTestCatalog`, `GetRepoRoot` etc. Multiple tests
reference `Utils.*` (e.g., `Utils.CoreInterop`, `Utils.IsRunningInCI`),
so the test project won't compile without it. Restore it to match the
version on `main` exactly.

### Sample Restructuring:

9. **Move the sample from `samples/cs/LiveAudioTranscription/`** to
`samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/`. The
sample Program.cs should be placed there.

10. **Remove the standalone `samples/cs/LiveAudioTranscription/`
directory** entirely (including the README.md in it — reviewer says it's
good for internal docs but these samples are public-facing, and the
existing GettingStarted README covers it).

11. **Create cross-platform `.csproj`** at
`samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj`
following the format of the existing cross-platform
AudioTranscriptionExample:
```xml
<Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
    <OutputType>Exe<...

</details>



<!-- START COPILOT CODING AGENT SUFFIX -->

*This pull request was created from Copilot chat.*
>

<!-- START COPILOT CODING AGENT TIPS -->
---

🔒 GitHub Advanced Security automatically protects Copilot coding agent pull requests. You can protect all pull requests by enabling Advanced Security for your repositories. [Learn more about Advanced Security.](https://gh.io/cca-advanced-security)

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: rui-ren <15321482+rui-ren@users.noreply.github.com>
---
 .../LiveAudioTranscriptionExample.csproj      |  32 ++
 .../LiveAudioTranscriptionExample/Program.cs  | 105 ++++
 .../LiveAudioTranscriptionExample.csproj      |  30 ++
 .../LiveAudioTranscription.csproj             |  30 --
 samples/cs/LiveAudioTranscription/Program.cs  | 169 ------
 samples/cs/LiveAudioTranscription/README.md   | 143 -----
 sdk_v2/cs/src/AssemblyInfo.cs                 |   1 -
 sdk_v2/cs/src/OpenAI/AudioClient.cs           |  13 +
 .../OpenAI/LiveAudioTranscriptionClient.cs    |  13 +-
 .../src/OpenAI/LiveAudioTranscriptionTypes.cs |   7 +-
 sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs    | 499 +++++++++++++++---
 11 files changed, 623 insertions(+), 419 deletions(-)
 create mode 100644 samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj
 create mode 100644 samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs
 create mode 100644 samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj
 delete mode 100644 samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
 delete mode 100644 samples/cs/LiveAudioTranscription/Program.cs
 delete mode 100644 samples/cs/LiveAudioTranscription/README.md

diff --git a/samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj b/samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj
new file mode 100644
index 00000000..ad6086f5
--- /dev/null
+++ b/samples/cs/GettingStarted/cross-platform/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj
@@ -0,0 +1,32 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net9.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
+    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <!-- Include the main program -->
+  <ItemGroup>
+    <Compile Include="../../src/LiveAudioTranscriptionExample/*.cs" />
+    <Compile Include="../../src/Shared/*.cs" />
+  </ItemGroup>
+
+  <!-- Packages -->
+  <ItemGroup>
+    <PackageReference Include="Microsoft.AI.Foundry.Local" />
+    <PackageReference Include="NAudio" Version="2.2.1" />
+  </ItemGroup>
+
+  <!-- ONNX Runtime GPU and CUDA provider (required for Linux)-->
+  <ItemGroup Condition="'$(RuntimeIdentifier)' == 'linux-x64'">
+    <PackageReference Include="Microsoft.ML.OnnxRuntime.Gpu" />
+    <PackageReference Include="Microsoft.ML.OnnxRuntimeGenAI.Cuda" />
+  </ItemGroup>
+
+</Project>
diff --git a/samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs b/samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs
new file mode 100644
index 00000000..d6e812e3
--- /dev/null
+++ b/samples/cs/GettingStarted/src/LiveAudioTranscriptionExample/Program.cs
@@ -0,0 +1,105 @@
+// Live Audio Transcription — Foundry Local SDK Example
+//
+// Demonstrates real-time microphone-to-text using:
+//   SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
+
+using Microsoft.AI.Foundry.Local;
+using NAudio.Wave;
+
+Console.WriteLine("===========================================================");
+Console.WriteLine("   Foundry Local -- Live Audio Transcription Demo");
+Console.WriteLine("===========================================================");
+Console.WriteLine();
+
+var config = new Configuration
+{
+    AppName = "foundry_local_samples",
+    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information
+};
+
+await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
+var mgr = FoundryLocalManager.Instance;
+
+await Utils.RunWithSpinner("Registering execution providers", mgr.EnsureEpsDownloadedAsync());
+
+var catalog = await mgr.GetCatalogAsync();
+
+var model = await catalog.GetModelAsync("nemotron") ?? throw new Exception("Model \"nemotron\" not found in catalog");
+
+await model.DownloadAsync(progress =>
+{
+    Console.Write($"\rDownloading model: {progress:F2}%");
+    if (progress >= 100f)
+    {
+        Console.WriteLine();
+    }
+});
+
+Console.Write($"Loading model {model.Id}...");
+await model.LoadAsync();
+Console.WriteLine("done.");
+
+var audioClient = await model.GetAudioClientAsync();
+var session = audioClient.CreateLiveTranscriptionSession();
+session.Settings.SampleRate = 16000;
+session.Settings.Channels = 1;
+session.Settings.Language = "en";
+
+await session.StartAsync();
+Console.WriteLine("       Session started");
+
+var readTask = Task.Run(async () =>
+{
+    try
+    {
+        await foreach (var result in session.GetTranscriptionStream())
+        {
+            if (result.IsFinal)
+            {
+                Console.WriteLine();
+                Console.WriteLine($"  [FINAL] {result.Text}");
+                Console.Out.Flush();
+            }
+            else if (!string.IsNullOrEmpty(result.Text))
+            {
+                Console.ForegroundColor = ConsoleColor.Cyan;
+                Console.Write(result.Text);
+                Console.ResetColor();
+                Console.Out.Flush();
+            }
+        }
+    }
+    catch (OperationCanceledException) { }
+});
+
+using var waveIn = new WaveInEvent
+{
+    WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1),
+    BufferMilliseconds = 100
+};
+
+waveIn.DataAvailable += (sender, e) =>
+{
+    if (e.BytesRecorded > 0)
+    {
+        _ = session.AppendAsync(new ReadOnlyMemory<byte>(e.Buffer, 0, e.BytesRecorded));
+    }
+};
+
+Console.WriteLine();
+Console.WriteLine("===========================================================");
+Console.WriteLine("  LIVE TRANSCRIPTION ACTIVE");
+Console.WriteLine("  Speak into your microphone.");
+Console.WriteLine("  Transcription appears in real-time (cyan text).");
+Console.WriteLine("  Press ENTER to stop recording.");
+Console.WriteLine("===========================================================");
+Console.WriteLine();
+
+waveIn.StartRecording();
+Console.ReadLine();
+waveIn.StopRecording();
+
+await session.StopAsync();
+await readTask;
+
+await model.UnloadAsync();
diff --git a/samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj b/samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj
new file mode 100644
index 00000000..b4489af2
--- /dev/null
+++ b/samples/cs/GettingStarted/windows/LiveAudioTranscriptionExample/LiveAudioTranscriptionExample.csproj
@@ -0,0 +1,30 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+    <!-- For Windows use the following -->
+    <TargetFramework>net9.0-windows10.0.26100</TargetFramework>
+    <WindowsAppSDKSelfContained>false</WindowsAppSDKSelfContained>
+    <Platforms>ARM64;x64</Platforms>
+    <WindowsPackageType>None</WindowsPackageType>
+    <EnableCoreMrtTooling>false</EnableCoreMrtTooling>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(RuntimeIdentifier)'==''">
+    <RuntimeIdentifier>$(NETCoreSdkRuntimeIdentifier)</RuntimeIdentifier>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <Compile Include="../../src/LiveAudioTranscriptionExample/*.cs" />
+    <Compile Include="../../src/Shared/*.cs" />
+  </ItemGroup>
+
+  <!-- Use WinML package for local Foundry SDK on Windows -->
+  <ItemGroup>
+    <PackageReference Include="Microsoft.AI.Foundry.Local.WinML" />
+    <PackageReference Include="NAudio" Version="2.2.1" />
+  </ItemGroup>
+
+</Project>
diff --git a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj b/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
deleted file mode 100644
index fb1a95a3..00000000
--- a/samples/cs/LiveAudioTranscription/LiveAudioTranscription.csproj
+++ /dev/null
@@ -1,30 +0,0 @@
-<Project Sdk="Microsoft.NET.Sdk">
-
-  <ItemGroup>
-    <ProjectReference Include="..\..\..\sdk_v2\cs\src\Microsoft.AI.Foundry.Local.csproj" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Microsoft.Extensions.Logging.Console" Version="9.0.9" />
-    <PackageReference Include="NAudio" Version="2.2.1" />
-  </ItemGroup>
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>net9.0</TargetFramework>
-    <RuntimeIdentifier>win-x64</RuntimeIdentifier>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <!-- Copy Core + ORT native DLLs to output directory.
-       These must be placed in the project root before building.
-       See README.md for instructions. -->
-  <ItemGroup>
-    <None Include="Microsoft.AI.Foundry.Local.Core.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('Microsoft.AI.Foundry.Local.Core.dll')" />
-    <None Include="onnxruntime-genai.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime-genai.dll')" />
-    <None Include="onnxruntime.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime.dll')" />
-    <None Include="onnxruntime_providers_shared.dll" CopyToOutputDirectory="PreserveNewest" Condition="Exists('onnxruntime_providers_shared.dll')" />
-  </ItemGroup>
-
-</Project>
diff --git a/samples/cs/LiveAudioTranscription/Program.cs b/samples/cs/LiveAudioTranscription/Program.cs
deleted file mode 100644
index c0ecee95..00000000
--- a/samples/cs/LiveAudioTranscription/Program.cs
+++ /dev/null
@@ -1,169 +0,0 @@
-// Live Audio Transcription — Foundry Local SDK Example
-//
-// Demonstrates real-time microphone-to-text using:
-//   SDK (FoundryLocalManager) → Core (NativeAOT DLL) → onnxruntime-genai (StreamingProcessor)
-//
-// Prerequisites:
-//   1. Nemotron ASR model downloaded to a local cache folder
-//   2. Microsoft.AI.Foundry.Local.Core.dll (built from neutron-server with GenAI 0.13.0+)
-//   3. onnxruntime-genai.dll + onnxruntime.dll + onnxruntime_providers_shared.dll (native GenAI)
-//
-// Usage:
-//   dotnet run -- [model-cache-dir]
-//   dotnet run -- C:\path\to\models
-
-using Microsoft.AI.Foundry.Local;
-using Microsoft.Extensions.Logging;
-using NAudio.Wave;
-
-// Parse model cache directory from args or use default
-var modelCacheDir = args.Length > 0
-    ? args[0]
-    : Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
-                    "FoundryLocal", "models");
-
-var coreDllPath = Path.Combine(AppContext.BaseDirectory, "Microsoft.AI.Foundry.Local.Core.dll");
-
-var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(Microsoft.Extensions.Logging.LogLevel.Information));
-var logger = loggerFactory.CreateLogger("LiveAudioTranscription");
-
-Console.WriteLine("===========================================================");
-Console.WriteLine("   Foundry Local -- Live Audio Transcription Demo");
-Console.WriteLine("===========================================================");
-Console.WriteLine();
-Console.WriteLine($"  Model cache: {modelCacheDir}");
-Console.WriteLine($"  Core DLL:    {coreDllPath} (exists: {File.Exists(coreDllPath)})");
-Console.WriteLine();
-
-try
-{
-    // === Step 1: Initialize Foundry Local SDK ===
-    Console.WriteLine("[1/5] Initializing Foundry Local SDK...");
-    var config = new Configuration
-    {
-        AppName = "LiveAudioTranscription",
-        LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Information,
-        ModelCacheDir = modelCacheDir,
-        AdditionalSettings = new Dictionary<string, string>
-        {
-            { "FoundryLocalCorePath", coreDllPath }
-        }
-    };
-
-    await FoundryLocalManager.CreateAsync(config, logger);
-    Console.WriteLine("       SDK initialized.");
-
-    // === Step 2: Find and load the nemotron ASR model ===
-    Console.WriteLine("[2/5] Loading nemotron model...");
-    var catalog = await FoundryLocalManager.Instance.GetCatalogAsync();
-    var model = await catalog.GetModelAsync("nemotron");
-
-    if (model == null)
-    {
-        Console.WriteLine("ERROR: 'nemotron' not found in catalog.");
-        Console.WriteLine($"       Ensure the model is downloaded to: {modelCacheDir}");
-        Console.WriteLine("       The folder should contain genai_config.json, encoder.onnx, decoder.onnx, etc.");
-        return;
-    }
-
-    Console.WriteLine($"       Found model: {model.Alias}");
-    await model.LoadAsync();
-    Console.WriteLine("       Model loaded.");
-
-    // === Step 3: Create live transcription session ===
-    Console.WriteLine("[3/5] Creating live transcription session...");
-    var audioClient = await model.GetAudioClientAsync();
-    var session = audioClient.CreateLiveTranscriptionSession();
-    session.Settings.SampleRate = 16000;
-    session.Settings.Channels = 1;
-    session.Settings.Language = "en";
-
-    await session.StartAsync();
-    Console.WriteLine("       Session started (SDK -> Core -> GenAI pipeline active).");
-
-    // === Step 4: Set up microphone + transcription reader ===
-    Console.WriteLine("[4/5] Setting up microphone...");
-
-    // Background task reads transcription results as they arrive
-    var readTask = Task.Run(async () =>
-    {
-        try
-        {
-            await foreach (var result in session.GetTranscriptionStream())
-            {
-                if (result.IsFinal)
-                {
-                    Console.WriteLine();
-                    Console.WriteLine($"  [FINAL] {result.Text}");
-                    Console.Out.Flush();
-                }
-                else if (!string.IsNullOrEmpty(result.Text))
-                {
-                    Console.ForegroundColor = ConsoleColor.Cyan;
-                    Console.Write(result.Text);
-                    Console.ResetColor();
-                    Console.Out.Flush();
-                }
-            }
-        }
-        catch (OperationCanceledException) { }
-    });
-
-    // Microphone capture via NAudio
-    using var waveIn = new WaveInEvent
-    {
-        WaveFormat = new WaveFormat(rate: 16000, bits: 16, channels: 1),
-        BufferMilliseconds = 100
-    };
-
-    int totalChunks = 0;
-    long totalBytes = 0;
-
-    waveIn.DataAvailable += (sender, e) =>
-    {
-        if (e.BytesRecorded > 0)
-        {
-            _ = session.AppendAsync(new ReadOnlyMemory<byte>(e.Buffer, 0, e.BytesRecorded));
-            totalChunks++;
-            totalBytes += e.BytesRecorded;
-        }
-    };
-
-    // === Step 5: Record ===
-    Console.WriteLine();
-    Console.WriteLine("===========================================================");
-    Console.WriteLine("  LIVE TRANSCRIPTION ACTIVE");
-    Console.WriteLine("  Speak into your microphone.");
-    Console.WriteLine("  Transcription appears in real-time (cyan text).");
-    Console.WriteLine("  Press ENTER to stop recording.");
-    Console.WriteLine("===========================================================");
-    Console.WriteLine();
-
-    waveIn.StartRecording();
-    Console.ReadLine();
-    waveIn.StopRecording();
-
-    var totalSeconds = totalBytes / (16000.0 * 2);
-    Console.WriteLine($"\n  Recording: {totalSeconds:F1}s | {totalChunks} chunks | {totalBytes / 1024} KB");
-
-    // Stop session (flushes remaining audio through the pipeline)
-    Console.WriteLine("\n[5/5] Stopping session...");
-    await session.StopAsync();
-    await readTask;
-
-    // Unload model
-    await model.UnloadAsync();
-
-    Console.WriteLine();
-    Console.WriteLine("===========================================================");
-    Console.WriteLine("  Demo complete!");
-    Console.WriteLine("  Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text");
-    Console.WriteLine("===========================================================");
-}
-catch (Exception ex)
-{
-    Console.WriteLine($"\nERROR: {ex.Message}");
-    if (ex.InnerException != null)
-        Console.WriteLine($"Inner: {ex.InnerException.Message}");
-    Console.WriteLine($"\n{ex.StackTrace}");
-}
diff --git a/samples/cs/LiveAudioTranscription/README.md b/samples/cs/LiveAudioTranscription/README.md
deleted file mode 100644
index f4897524..00000000
--- a/samples/cs/LiveAudioTranscription/README.md
+++ /dev/null
@@ -1,143 +0,0 @@
-# Live Audio Transcription Demo
-
-Real-time microphone-to-text using Foundry Local SDK, Core, and onnxruntime-genai.
-
-## Architecture
-
-```
-Microphone (NAudio, 16kHz/16-bit/mono)
-    |
-    v
-Foundry Local SDK (C#)
-    | AppendAsync(pcmBytes)
-    v
-Foundry Local Core (NativeAOT DLL)
-    | AppendAudioChunk -> CommitTranscription
-    v
-onnxruntime-genai (StreamingProcessor + Generator)
-    | RNNT encoder + decoder
-    v
-Live transcription text
-```
-
-## Prerequisites
-
-1. **Windows x64** with a microphone
-2. **.NET 9.0 SDK** installed
-3. **Nemotron ASR model** downloaded locally
-4. **Native DLLs** (4 files — see Setup below)
-
-## Setup (Step by Step)
-
-### Step 1: Get the native DLLs
-
-You need 4 DLLs placed in this project folder:
-
-| DLL | Source |
-|-----|--------|
-| `Microsoft.AI.Foundry.Local.Core.dll` | Built from neutron-server (`dotnet publish` with NativeAOT) |
-| `onnxruntime-genai.dll` | Built from onnxruntime-genai (Nenad's StreamingProcessor branch) |
-| `onnxruntime.dll` | Comes with the Core publish output |
-| `onnxruntime_providers_shared.dll` | Comes with the Core publish output |
-
-**Option A: From CI artifacts**
-- Download the Core DLL from the neutron-server CI pipeline artifacts
-- Download the GenAI native DLLs from the onnxruntime-genai pipeline artifacts
-
-**Option B: From a teammate**
-- Ask for the 4 DLLs from someone who has already built them
-
-Copy all 4 DLLs to this folder (`samples/cs/LiveAudioTranscription/`).
-
-### Step 2: Get the Nemotron model
-
-The model should be in a folder with this structure:
-```
-models/
-  nemotron/
-    genai_config.json
-    encoder.onnx
-    decoder.onnx
-    joint.onnx
-    tokenizer.json
-    vocab.txt
-```
-
-### Step 3: Build
-
-```powershell
-cd samples/cs/LiveAudioTranscription
-dotnet build -c Debug
-```
-
-### Step 4: Copy native DLLs to output (if not auto-copied)
-
-```powershell
-Copy-Item onnxruntime-genai.dll bin\Debug\net9.0\win-x64\ -Force
-Copy-Item onnxruntime.dll bin\Debug\net9.0\win-x64\ -Force
-Copy-Item onnxruntime_providers_shared.dll bin\Debug\net9.0\win-x64\ -Force
-Copy-Item Microsoft.AI.Foundry.Local.Core.dll bin\Debug\net9.0\win-x64\ -Force
-```
-
-### Step 5: Run
-
-```powershell
-# Default model cache location
-dotnet run -c Debug --no-build
-
-# Or specify model cache directory
-dotnet run -c Debug --no-build -- C:\path\to\models
-```
-
-### Step 6: Speak!
-
-- The app will show `LIVE TRANSCRIPTION ACTIVE`
-- Speak into your microphone
-- Text appears in **cyan** as you speak
-- Press **ENTER** to stop
-
-## Expected Output
-
-```
-===========================================================
-   Foundry Local -- Live Audio Transcription Demo
-===========================================================
-
-[1/5] Initializing Foundry Local SDK...
-       SDK initialized.
-[2/5] Loading nemotron model...
-       Found model: nemotron
-       Model loaded.
-[3/5] Creating live transcription session...
-       Session started (SDK -> Core -> GenAI pipeline active).
-[4/5] Setting up microphone...
-
-===========================================================
-  LIVE TRANSCRIPTION ACTIVE
-  Speak into your microphone.
-  Transcription appears in real-time (cyan text).
-  Press ENTER to stop recording.
-===========================================================
-
-Hello this is a demo of live audio transcription running entirely on device
-  [FINAL] Hello this is a demo of live audio transcription running entirely on device
-
-  Recording: 15.2s | 152 chunks | 475 KB
-
-[5/5] Stopping session...
-
-===========================================================
-  Demo complete!
-  Pipeline: Mic -> NAudio -> SDK -> Core -> GenAI -> Text
-===========================================================
-```
-
-## Troubleshooting
-
-| Error | Fix |
-|-------|-----|
-| `Core DLL not found` | Copy `Microsoft.AI.Foundry.Local.Core.dll` to project folder |
-| `nemotron not found in catalog` | Check `ModelCacheDir` points to folder containing `nemotron/` with `genai_config.json` |
-| `OgaStreamingProcessor not found` | The `onnxruntime-genai.dll` is old — rebuild from Nenad's branch or get from CI |
-| `No microphone` | Ensure a mic is connected and set as default recording device |
-| `num_mels unknown` | Fix `genai_config.json` — ASR params must be at model level, not nested under `speech` |
diff --git a/sdk_v2/cs/src/AssemblyInfo.cs b/sdk_v2/cs/src/AssemblyInfo.cs
index 987f9de6..9bebe71b 100644
--- a/sdk_v2/cs/src/AssemblyInfo.cs
+++ b/sdk_v2/cs/src/AssemblyInfo.cs
@@ -7,5 +7,4 @@
 using System.Runtime.CompilerServices;
 
 [assembly: InternalsVisibleTo("Microsoft.AI.Foundry.Local.Tests")]
-[assembly: InternalsVisibleTo("AudioStreamTest")]
 [assembly: InternalsVisibleTo("DynamicProxyGenAssembly2")] // for Mock of ICoreInterop
diff --git a/sdk_v2/cs/src/OpenAI/AudioClient.cs b/sdk_v2/cs/src/OpenAI/AudioClient.cs
index e7529284..1986c330 100644
--- a/sdk_v2/cs/src/OpenAI/AudioClient.cs
+++ b/sdk_v2/cs/src/OpenAI/AudioClient.cs
@@ -96,6 +96,19 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
     }
 
 
+    public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
+        string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
+    {
+        var enumerable = Utils.CallWithExceptionHandling(
+            () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
+            "Error during streaming audio transcription.", _logger).ConfigureAwait(false);
+
+        await foreach (var item in enumerable)
+        {
+            yield return item;
+        }
+    }
+
     private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
         string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
     {
diff --git a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
index 0c9e6477..39eb1683 100644
--- a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
+++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
@@ -4,16 +4,15 @@
 // </copyright>
 // --------------------------------------------------------------------------------------------------------------------
 
-namespace Microsoft.AI.Foundry.Local;
+namespace Microsoft.AI.Foundry.Local.OpenAI;
 
 using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
 using System.Globalization;
 using System.Threading.Channels;
+using Microsoft.AI.Foundry.Local;
 using Microsoft.AI.Foundry.Local.Detail;
 using Microsoft.Extensions.Logging;
 
-
 /// <summary>
 /// Session for real-time audio streaming ASR (Automatic Speech Recognition).
 /// Audio data from a microphone (or other source) is pushed in as PCM chunks,
@@ -21,12 +20,11 @@ namespace Microsoft.AI.Foundry.Local;
 ///
 /// Created via <see cref="OpenAIAudioClient.CreateLiveTranscriptionSession"/>.
 ///
-/// Thread safety: PushAudioAsync can be called from any thread (including high-frequency
+/// Thread safety: AppendAsync can be called from any thread (including high-frequency
 /// audio callbacks). Pushes are internally serialized via a bounded channel to prevent
 /// unbounded memory growth and ensure ordering.
 /// </summary>
 
-
 public sealed class LiveAudioTranscriptionSession : IAsyncDisposable
 {
     private readonly string _modelId;
@@ -153,13 +151,8 @@ public async Task StartAsync(CancellationToken ct = default)
         _started = true;
         _stopped = false;
 
-        // Use a dedicated CTS for the push loop — NOT the caller's ct.
-#pragma warning disable IDISP003 // Dispose previous before re-assigning
         _sessionCts = new CancellationTokenSource();
-#pragma warning restore IDISP003
-#pragma warning disable IDISP013 // Await in using
         _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None);
-#pragma warning restore IDISP013
     }
 
     /// <summary>
diff --git a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
index 33820836..ef0f9edc 100644
--- a/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
+++ b/sdk_v2/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
@@ -1,7 +1,8 @@
-namespace Microsoft.AI.Foundry.Local;
+namespace Microsoft.AI.Foundry.Local.OpenAI;
 
 using System.Text.Json;
 using System.Text.Json.Serialization;
+using Microsoft.AI.Foundry.Local;
 using Microsoft.AI.Foundry.Local.Detail;
 
 public record LiveAudioTranscriptionResult
@@ -31,10 +32,6 @@ public record LiveAudioTranscriptionResult
     [JsonPropertyName("end_time")]
     public double? EndTime { get; init; }
 
-    /// <summary>Confidence score (0.0 - 1.0) if available.</summary>
-    [JsonPropertyName("confidence")]
-    public float? Confidence { get; init; }
-
     internal static LiveAudioTranscriptionResult FromJson(string json)
     {
         return JsonSerializer.Deserialize(json,
diff --git a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
index 6b71921a..d64a98b7 100644
--- a/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
+++ b/sdk_v2/cs/test/FoundryLocal.Tests/Utils.cs
@@ -1,74 +1,451 @@
-﻿using Microsoft.AI.Foundry.Local;
+﻿// --------------------------------------------------------------------------------------------------------------------
+// <copyright company="Microsoft">
+//   Copyright (c) Microsoft. All rights reserved.
+// </copyright>
+// --------------------------------------------------------------------------------------------------------------------
+
+namespace Microsoft.AI.Foundry.Local.Tests;
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Text.Json;
+
+using Microsoft.AI.Foundry.Local.Detail;
+using Microsoft.Extensions.Configuration;
 using Microsoft.Extensions.Logging;
 
-var loggerFactory = LoggerFactory.Create(b => b.AddConsole().SetMinimumLevel(LogLevel.Debug));
-var logger = loggerFactory.CreateLogger("AudioStreamTest");
+using Microsoft.VisualStudio.TestPlatform.TestHost;
 
-// Point to the directory containing Core + ORT DLLs
-var corePath = @"C:\Users\ruiren\Desktop\audio-stream-test\Microsoft.AI.Foundry.Local.Core.dll";
+using Moq;
 
-var config = new Configuration
+internal static class Utils
 {
-    AppName = "AudioStreamTest",
-    LogLevel = Microsoft.AI.Foundry.Local.LogLevel.Debug,
-    AdditionalSettings = new Dictionary<string, string>
+    internal struct TestCatalogInfo
     {
-        { "FoundryLocalCorePath", corePath }
+        internal readonly List<ModelInfo> TestCatalog { get; }
+        internal readonly string ModelListJson { get; }
+
+        internal TestCatalogInfo(bool includeCuda)
+        {
+
+            TestCatalog = Utils.BuildTestCatalog(includeCuda);
+            ModelListJson = JsonSerializer.Serialize(TestCatalog, JsonSerializationContext.Default.ListModelInfo);
+        }
     }
-};
 
-Console.WriteLine("=== Initializing FoundryLocalManager ===");
-await FoundryLocalManager.CreateAsync(config, logger);
-var manager = FoundryLocalManager.Instance;
+    internal static readonly TestCatalogInfo TestCatalog = new(true);
 
-Console.WriteLine("=== Getting Catalog ===");
-var catalog = await manager.GetCatalogAsync();
-var models = await catalog.ListModelsAsync();
-Console.WriteLine($"Found {models.Count} models");
+    [Before(Assembly)]
+    public static void AssemblyInit(AssemblyHookContext _)
+    {
+        using var loggerFactory = LoggerFactory.Create(builder =>
+        {
+            builder
+                .AddConsole()
+                .SetMinimumLevel(LogLevel.Debug);
+        });
 
-// Find and load a whisper model
-var model = await catalog.GetModelAsync("whisper-tiny");
-if (model == null)
-{
-    Console.WriteLine("whisper-tiny not found. Available models:");
-    foreach (var m in models)
-        Console.WriteLine($"  - {m.Alias}");
-    return;
-}
+        ILogger logger = loggerFactory.CreateLogger<Program>();
 
-Console.WriteLine($"=== Downloading {model.Alias} ===");
-await model.DownloadAsync(p => Console.Write($"\r  Progress: {p:F1}%"));
-Console.WriteLine();
-
-Console.WriteLine($"=== Loading {model.Alias} ===");
-await model.LoadAsync();
-Console.WriteLine("Model loaded.");
-
-Console.WriteLine("=== Creating streaming session ===");
-var audioClient = await model.GetAudioClientAsync();
-var streamingClient = audioClient.CreateLiveTranscriptionSession();
-streamingClient.Settings.SampleRate = 16000;
-streamingClient.Settings.Channels = 1;
-streamingClient.Settings.BitsPerSample = 16;
-streamingClient.Settings.Language = "en";
-
-Console.WriteLine("=== Starting streaming session ===");
-await streamingClient.StartAsync();
-Console.WriteLine("Session started!");
-
-// Push some fake PCM data (silence — 100ms at 16kHz 16-bit mono = 3200 bytes)
-var fakePcm = new byte[3200];
-Console.WriteLine("=== Pushing audio chunks ===");
-for (int i = 0; i < 5; i++)
-{
-    await streamingClient.AppendAsync(fakePcm);
-    Console.WriteLine($"  Pushed chunk {i + 1}");
-}
+        // Read configuration from appsettings.Test.json
+        logger.LogDebug("Reading configuration from appsettings.Test.json");
+        var configuration = new ConfigurationBuilder()
+            .SetBasePath(Directory.GetCurrentDirectory())
+            .AddJsonFile("appsettings.Test.json", optional: true, reloadOnChange: false)
+            .Build();
+
+        var testModelCacheDirName = "test-data-shared";
+        string testDataSharedPath;
+        if (Path.IsPathRooted(testModelCacheDirName) ||
+            testModelCacheDirName.Contains(Path.DirectorySeparatorChar) ||
+            testModelCacheDirName.Contains(Path.AltDirectorySeparatorChar))
+        {
+            // It's a relative or complete filepath, resolve from current directory
+            testDataSharedPath = Path.GetFullPath(testModelCacheDirName);
+        }
+        else
+        {
+            // It's just a directory name, combine with repo root parent
+            testDataSharedPath = Path.GetFullPath(Path.Combine(GetRepoRoot(), "..", testModelCacheDirName));
+        }
+
+        logger.LogInformation("Using test model cache directory: {testDataSharedPath}", testDataSharedPath);
+
+        if (!Directory.Exists(testDataSharedPath))
+        {
+            throw new DirectoryNotFoundException($"Test model cache directory does not exist: {testDataSharedPath}");
+
+        }
+
+        var config = new Configuration
+        {
+            AppName = "FoundryLocalSdkTest",
+            LogLevel = Local.LogLevel.Debug,
+            Web = new Configuration.WebService
+            {
+                Urls = "http://127.0.0.1:0"
+            },
+            ModelCacheDir = testDataSharedPath
+        };
+
+        // Initialize the singleton instance.
+        FoundryLocalManager.CreateAsync(config, logger).GetAwaiter().GetResult();
+
+        // standalone instance for testing individual components that skips the 'initialize' command
+        CoreInterop = new CoreInterop(logger);        
+    }
+
+    internal static ICoreInterop CoreInterop { get; private set; } = default!;
+
+    internal static Mock<ILogger> CreateCapturingLoggerMock(List<string> sink)
+    {
+        var mock = new Mock<ILogger>();
+        mock.Setup(x => x.Log(
+                It.IsAny<LogLevel>(),
+                It.IsAny<EventId>(),
+                It.IsAny<It.IsAnyType>(),
+                It.IsAny<Exception?>(),
+                (Func<It.IsAnyType, Exception?, string>)It.IsAny<object>()))
+            .Callback((LogLevel level, EventId id, object state, Exception? ex, Delegate formatter) =>
+            {
+                var message = formatter.DynamicInvoke(state, ex) as string;
+                sink.Add($"{level}: {message}");
+            });
+
+        return mock;
+    }
+
+    internal sealed record InteropCommandInterceptInfo
+    {
+        public string CommandName { get; init; } = default!;
+        public string? CommandInput { get; init; }
+        public string ResponseData { get; init; } = default!;
+        public string? ResponseError { get; init; }
+    }
+
+    internal static Mock<ICoreInterop> CreateCoreInteropWithIntercept(ICoreInterop coreInterop,
+                                                                      List<InteropCommandInterceptInfo> intercepts)
+    {
+        var mock = new Mock<ICoreInterop>();
+        var interceptNames = new HashSet<string>(StringComparer.InvariantCulture);
+
+        foreach (var intercept in intercepts)
+        {
+            if (!interceptNames.Add(intercept.CommandName))
+            {
+                throw new ArgumentException($"Duplicate intercept for command {intercept.CommandName}");
+            }
+
+            mock.Setup(x => x.ExecuteCommand(It.Is<string>(s => s == intercept.CommandName), It.IsAny<CoreInteropRequest?>()))
+                .Returns(new ICoreInterop.Response
+                {
+                    Data = intercept.ResponseData,
+                    Error = intercept.ResponseError
+                });
+
+            mock.Setup(x => x.ExecuteCommandAsync(It.Is<string>(s => s == intercept.CommandName),
+                                                  It.IsAny<CoreInteropRequest?>(),
+                                                  It.IsAny<CancellationToken?>()))
+                .ReturnsAsync(new ICoreInterop.Response
+                {
+                    Data = intercept.ResponseData,
+                    Error = intercept.ResponseError
+                });
+        }
+
+        mock.Setup(x => x.ExecuteCommand(It.Is<string>(s => !interceptNames.Contains(s)),
+                                         It.IsAny<CoreInteropRequest?>()))
+            .Returns((string commandName, CoreInteropRequest? commandInput) =>
+                        coreInterop.ExecuteCommand(commandName, commandInput));
+
+        mock.Setup(x => x.ExecuteCommandAsync(It.Is<string>(s => !interceptNames.Contains(s)),
+                                              It.IsAny<CoreInteropRequest?>(),
+                                              It.IsAny<CancellationToken?>()))
+            .Returns((string commandName, CoreInteropRequest? commandInput, CancellationToken? ct) =>
+                coreInterop.ExecuteCommandAsync(commandName, commandInput, ct));
+
+        return mock;
+    }
+
+    internal static bool IsRunningInCI()
+    {
+        var azureDevOps = Environment.GetEnvironmentVariable("TF_BUILD");
+        var githubActions = Environment.GetEnvironmentVariable("GITHUB_ACTIONS");
+        var isCI = string.Equals(azureDevOps, "True", StringComparison.OrdinalIgnoreCase) ||
+                   string.Equals(githubActions, "true", StringComparison.OrdinalIgnoreCase);
+
+        return isCI;
+    }
+
+    private static List<ModelInfo> BuildTestCatalog(bool includeCuda = true)
+    {
+        // Mirrors MOCK_CATALOG_DATA ordering and fields (Python tests)
+        var common = new
+        {
+            ProviderType = "AzureFoundry",
+            Version = 1,
+            ModelType = "ONNX",
+            PromptTemplate = (PromptTemplate?)null,
+            Publisher = "Microsoft",
+            Task = "chat-completion",
+            FileSizeMb = 10403,
+            ModelSettings = new ModelSettings { Parameters = [] },
+            SupportsToolCalling = false,
+            License = "MIT",
+            LicenseDescription = "License…",
+            MaxOutputTokens = 1024L,
+            MinFLVersion = "1.0.0",
+        };
 
-Console.WriteLine("=== Stopping session ===");
-await streamingClient.StopAsync();
-Console.WriteLine("Session stopped.");
+        var list = new List<ModelInfo>
+            {
+                // model-1 generic-gpu, generic-cpu:2, generic-cpu:1
+                new()
+                {
+                    Id = "model-1-generic-gpu:1",
+                    Name = "model-1-generic-gpu",
+                    DisplayName = "model-1-generic-gpu",
+                    Uri = "azureml://registries/azureml/models/model-1-generic-gpu/versions/1",
+                    Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" },
+                    Alias = "model-1",
+                    // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1",
+                    ProviderType = common.ProviderType, Version = common.Version, ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate, Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb, ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling, License = common.License,
+                    LicenseDescription = common.LicenseDescription, MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
+                new()
+                {
+                    Id = "model-1-generic-cpu:2",
+                    Name = "model-1-generic-cpu",
+                    DisplayName = "model-1-generic-cpu",
+                    Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/2",
+                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
+                    Alias = "model-1",
+                    // ParentModelUri = "azureml://registries/azureml/models/model-1/versions/2",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version, ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb - 10,  // smaller so default chosen in test that sorts on this
+                    ModelSettings = common.ModelSettings, 
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
+                new()
+                {
+                    Id = "model-1-generic-cpu:1",
+                    Name = "model-1-generic-cpu",
+                    DisplayName = "model-1-generic-cpu",
+                    Uri = "azureml://registries/azureml/models/model-1-generic-cpu/versions/1",
+                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
+                    Alias = "model-1",
+                    //ParentModelUri = "azureml://registries/azureml/models/model-1/versions/1",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version,
+                    ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb,
+                    ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
 
-Console.WriteLine("=== Unloading model ===");
-await model.UnloadAsync();
-Console.WriteLine("Done! All plumbing works end-to-end.");
\ No newline at end of file
+                // model-2 npu:2, npu:1, generic-cpu:1
+                new()
+                {
+                    Id = "model-2-npu:2",
+                    Name = "model-2-npu",
+                    DisplayName = "model-2-npu",
+                    Uri = "azureml://registries/azureml/models/model-2-npu/versions/2",
+                    Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" },
+                    Alias = "model-2",
+                    //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/2",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version, ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb,
+                    ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
+                new()
+                {
+                    Id = "model-2-npu:1",
+                    Name = "model-2-npu",
+                    DisplayName = "model-2-npu",
+                    Uri = "azureml://registries/azureml/models/model-2-npu/versions/1",
+                    Runtime = new Runtime { DeviceType = DeviceType.NPU, ExecutionProvider = "QNNExecutionProvider" },
+                    Alias = "model-2",
+                    //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version, ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb,
+                    ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
+                new()
+                {
+                    Id = "model-2-generic-cpu:1",
+                    Name = "model-2-generic-cpu",
+                    DisplayName = "model-2-generic-cpu",
+                    Uri = "azureml://registries/azureml/models/model-2-generic-cpu/versions/1",
+                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
+                    Alias = "model-2",
+                    //ParentModelUri = "azureml://registries/azureml/models/model-2/versions/1",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version, ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb,
+                    ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
+            };
+
+        // model-3 cuda-gpu (optional), generic-gpu, generic-cpu
+        if (includeCuda)
+        {
+            list.Add(new ModelInfo
+            {
+                Id = "model-3-cuda-gpu:1",
+                Name = "model-3-cuda-gpu",
+                DisplayName = "model-3-cuda-gpu",
+                Uri = "azureml://registries/azureml/models/model-3-cuda-gpu/versions/1",
+                Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "CUDAExecutionProvider" },
+                Alias = "model-3",
+                //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1",
+                ProviderType = common.ProviderType,
+                Version = common.Version,
+                ModelType = common.ModelType,
+                PromptTemplate = common.PromptTemplate,
+                Publisher = common.Publisher,
+                Task = common.Task,
+                FileSizeMb = common.FileSizeMb,
+                ModelSettings = common.ModelSettings,
+                SupportsToolCalling = common.SupportsToolCalling,
+                License = common.License,
+                LicenseDescription = common.LicenseDescription,
+                MaxOutputTokens = common.MaxOutputTokens,
+                MinFLVersion = common.MinFLVersion
+            });
+        }
+
+        list.AddRange(new[]
+        {
+                new ModelInfo
+                {
+                    Id = "model-3-generic-gpu:1",
+                    Name = "model-3-generic-gpu",
+                    DisplayName = "model-3-generic-gpu",
+                    Uri = "azureml://registries/azureml/models/model-3-generic-gpu/versions/1",
+                    Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" },
+                    Alias = "model-3",
+                    //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version, ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb,
+                    ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                },
+                new ModelInfo
+                {
+                    Id = "model-3-generic-cpu:1",
+                    Name = "model-3-generic-cpu",
+                    DisplayName = "model-3-generic-cpu",
+                    Uri = "azureml://registries/azureml/models/model-3-generic-cpu/versions/1",
+                    Runtime = new Runtime { DeviceType = DeviceType.CPU, ExecutionProvider = "CPUExecutionProvider" },
+                    Alias = "model-3",
+                    //ParentModelUri = "azureml://registries/azureml/models/model-3/versions/1",
+                    ProviderType = common.ProviderType,
+                    Version = common.Version,
+                    ModelType = common.ModelType,
+                    PromptTemplate = common.PromptTemplate,
+                    Publisher = common.Publisher, Task = common.Task,
+                    FileSizeMb = common.FileSizeMb,
+                    ModelSettings = common.ModelSettings,
+                    SupportsToolCalling = common.SupportsToolCalling,
+                    License = common.License,
+                    LicenseDescription = common.LicenseDescription,
+                    MaxOutputTokens = common.MaxOutputTokens,
+                    MinFLVersion = common.MinFLVersion
+                }
+            });
+
+        // model-4 generic-gpu (nullable prompt)
+        list.Add(new ModelInfo
+        {
+            Id = "model-4-generic-gpu:1",
+            Name = "model-4-generic-gpu",
+            DisplayName = "model-4-generic-gpu",
+            Uri = "azureml://registries/azureml/models/model-4-generic-gpu/versions/1",
+            Runtime = new Runtime { DeviceType = DeviceType.GPU, ExecutionProvider = "WebGpuExecutionProvider" },
+            Alias = "model-4",
+            //ParentModelUri = "azureml://registries/azureml/models/model-4/versions/1",
+            ProviderType = common.ProviderType,
+            Version = common.Version,
+            ModelType = common.ModelType,
+            PromptTemplate = null,
+            Publisher = common.Publisher,
+            Task = common.Task,
+            FileSizeMb = common.FileSizeMb,
+            ModelSettings = common.ModelSettings,
+            SupportsToolCalling = common.SupportsToolCalling,
+            License = common.License,
+            LicenseDescription = common.LicenseDescription,
+            MaxOutputTokens = common.MaxOutputTokens,
+            MinFLVersion = common.MinFLVersion
+        });
+
+        return list;
+    }
+
+    private static string GetSourceFilePath([CallerFilePath] string path = "") => path;
+
+    // Gets the root directory of the foundry-local-sdk repository by finding the .git directory.
+    private static string GetRepoRoot()
+    {
+        var sourceFile = GetSourceFilePath();
+        var dir = new DirectoryInfo(Path.GetDirectoryName(sourceFile)!);
+
+        while (dir != null)
+        {
+            if (Directory.Exists(Path.Combine(dir.FullName, ".git")))
+                return dir.FullName;
+
+            dir = dir.Parent;
+        }
+
+        throw new InvalidOperationException("Could not find git repository root from test file location");
+    }
+}

From 0cac7f3b4f9492979e04d8398e87b9535c04c1ec Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Sun, 22 Mar 2026 16:59:48 -0700
Subject: [PATCH 13/22] update response type

---
 sdk/cs/README.md                              | 60 ++++++++++++++++++
 sdk/cs/src/Detail/JsonSerializationContext.cs |  6 +-
 .../OpenAI/LiveAudioTranscriptionClient.cs    | 10 +--
 .../src/OpenAI/LiveAudioTranscriptionTypes.cs | 63 ++++++++++++++-----
 4 files changed, 117 insertions(+), 22 deletions(-)

diff --git a/sdk/cs/README.md b/sdk/cs/README.md
index f58e41e0..48f37d05 100644
--- a/sdk/cs/README.md
+++ b/sdk/cs/README.md
@@ -233,6 +233,64 @@ audioClient.Settings.Language = "en";
 audioClient.Settings.Temperature = 0.0f;
 ```
 
+### Live Audio Transcription (Real-Time Streaming)
+
+For real-time microphone-to-text transcription, use `CreateLiveTranscriptionSession()`. Audio is pushed as raw PCM chunks and transcription results stream back as an `IAsyncEnumerable`.
+
+The streaming result type (`LiveAudioTranscriptionResponse`) extends `AudioCreateTranscriptionResponse` from the Betalgo OpenAI SDK, so it's compatible with the file-based transcription output format while adding streaming-specific fields.
+
+```csharp
+var audioClient = await model.GetAudioClientAsync();
+var session = audioClient.CreateLiveTranscriptionSession();
+
+// Configure audio format (must be set before StartAsync)
+session.Settings.SampleRate = 16000;
+session.Settings.Channels = 1;
+session.Settings.Language = "en";
+
+await session.StartAsync();
+
+// Push audio from a microphone callback (thread-safe)
+waveIn.DataAvailable += (sender, e) =>
+{
+    _ = session.AppendAsync(new ReadOnlyMemory<byte>(e.Buffer, 0, e.BytesRecorded));
+};
+
+// Read transcription results as they arrive
+await foreach (var result in session.GetTranscriptionStream())
+{
+    // result inherits from AudioCreateTranscriptionResponse
+    // - result.Text         — incremental transcribed text (per chunk, not accumulated)
+    // - result.IsFinal      — true for final results, false for interim hypotheses
+    // - result.Segments     — segment-level timing data (Start/End in seconds)
+    // - result.Language     — language code
+    Console.Write(result.Text);
+}
+
+await session.StopAsync();
+```
+
+#### Output Type
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `Text` | `string` | Transcribed text from this audio chunk (inherited from `AudioCreateTranscriptionResponse`) |
+| `IsFinal` | `bool` | Whether this is a final or interim result. Nemotron always returns `true`. |
+| `Language` | `string` | Language code (inherited) |
+| `Duration` | `float` | Audio duration in seconds (inherited) |
+| `Segments` | `List<Segment>` | Segment timing with `Start`/`End` offsets (inherited) |
+| `Words` | `List<WordSegment>` | Word-level timing (inherited, when available) |
+
+#### Session Lifecycle
+
+| Method | Description |
+|--------|-------------|
+| `StartAsync()` | Initialize the streaming session. Settings are frozen after this call. |
+| `AppendAsync(pcmData)` | Push a chunk of raw PCM audio. Thread-safe (bounded internal queue). |
+| `GetTranscriptionStream()` | Async enumerable of transcription results. |
+| `StopAsync()` | Signal end-of-audio, flush remaining audio, and clean up. |
+| `DisposeAsync()` | Calls `StopAsync` if needed. Use `await using` for automatic cleanup. |
+
 ### Web Service
 
 Start an OpenAI-compatible REST endpoint for use by external tools or processes:
@@ -297,6 +355,8 @@ Key types:
 | [`ModelVariant`](./docs/api/microsoft.ai.foundry.local.modelvariant.md) | Specific model variant (hardware/quantization) |
 | [`OpenAIChatClient`](./docs/api/microsoft.ai.foundry.local.openaichatclient.md) | Chat completions (sync + streaming) |
 | [`OpenAIAudioClient`](./docs/api/microsoft.ai.foundry.local.openaiaudioclient.md) | Audio transcription (sync + streaming) |
+| [`LiveAudioTranscriptionSession`](./docs/api/microsoft.ai.foundry.local.openai.liveaudiotranscriptionsession.md) | Real-time audio streaming session |
+| [`LiveAudioTranscriptionResponse`](./docs/api/microsoft.ai.foundry.local.openai.liveaudiotranscriptionresponse.md) | Streaming transcription result (extends `AudioCreateTranscriptionResponse`) |
 | [`ModelInfo`](./docs/api/microsoft.ai.foundry.local.modelinfo.md) | Full model metadata record |
 
 ## Tests
diff --git a/sdk/cs/src/Detail/JsonSerializationContext.cs b/sdk/cs/src/Detail/JsonSerializationContext.cs
index 9ca3f539..ea5f5c21 100644
--- a/sdk/cs/src/Detail/JsonSerializationContext.cs
+++ b/sdk/cs/src/Detail/JsonSerializationContext.cs
@@ -33,9 +33,11 @@ namespace Microsoft.AI.Foundry.Local.Detail;
 [JsonSerializable(typeof(IList<FunctionDefinition>))]
 [JsonSerializable(typeof(PropertyDefinition))]
 [JsonSerializable(typeof(IList<PropertyDefinition>))]
-// --- NEW: Audio streaming types ---
-[JsonSerializable(typeof(LiveAudioTranscriptionResult))]
+// --- Audio streaming types ---
+[JsonSerializable(typeof(LiveAudioTranscriptionResponse))]
+[JsonSerializable(typeof(LiveAudioTranscriptionRaw))]
 [JsonSerializable(typeof(CoreErrorResponse))]
+[JsonSerializable(typeof(AudioCreateTranscriptionResponse.Segment))]
 [JsonSourceGenerationOptions(DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
                              WriteIndented = false)]
 internal partial class JsonSerializationContext : JsonSerializerContext
diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
index 39eb1683..453eb23f 100644
--- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
+++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
@@ -38,7 +38,7 @@ public sealed class LiveAudioTranscriptionSession : IAsyncDisposable
     private bool _stopped;
 
     // Output channel: native callback writes, user reads via GetTranscriptionStream
-    private Channel<LiveAudioTranscriptionResult>? _outputChannel;
+    private Channel<LiveAudioTranscriptionResponse>? _outputChannel;
 
     // Internal push queue: user writes audio chunks, background loop drains to native core.
     // Bounded to prevent unbounded memory growth if native core is slower than real-time.
@@ -103,7 +103,7 @@ public async Task StartAsync(CancellationToken ct = default)
         // Freeze settings
         _activeSettings = Settings.Snapshot();
 
-        _outputChannel = Channel.CreateUnbounded<LiveAudioTranscriptionResult>(
+        _outputChannel = Channel.CreateUnbounded<LiveAudioTranscriptionResponse>(
             new UnboundedChannelOptions
             {
                 SingleWriter = true,  // only the native callback writes
@@ -208,7 +208,7 @@ private async Task PushLoopAsync(CancellationToken ct)
                         {
                             try
                             {
-                                var transcription = LiveAudioTranscriptionResult.FromJson(response.Data);
+                                var transcription = LiveAudioTranscriptionResponse.FromJson(response.Data);
                                 if (!string.IsNullOrEmpty(transcription.Text))
                                 {
                                     _outputChannel?.Writer.TryWrite(transcription);
@@ -266,7 +266,7 @@ private async Task PushLoopAsync(CancellationToken ct)
     /// </summary>
     /// <param name="ct">Cancellation token.</param>
     /// <returns>Async enumerable of transcription results.</returns>
-    public async IAsyncEnumerable<LiveAudioTranscriptionResult> GetTranscriptionStream(
+    public async IAsyncEnumerable<LiveAudioTranscriptionResponse> GetTranscriptionStream(
         [EnumeratorCancellation] CancellationToken ct = default)
     {
         if (_outputChannel == null)
@@ -347,7 +347,7 @@ public async Task StopAsync(CancellationToken ct = default)
             {
                 try
                 {
-                    var finalResult = LiveAudioTranscriptionResult.FromJson(response.Data);
+                    var finalResult = LiveAudioTranscriptionResponse.FromJson(response.Data);
                     if (!string.IsNullOrEmpty(finalResult.Text))
                     {
                         _outputChannel?.Writer.TryWrite(finalResult);
diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
index ef0f9edc..21a2c5a3 100644
--- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
+++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
@@ -2,10 +2,16 @@ namespace Microsoft.AI.Foundry.Local.OpenAI;
 
 using System.Text.Json;
 using System.Text.Json.Serialization;
+using Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels;
 using Microsoft.AI.Foundry.Local;
 using Microsoft.AI.Foundry.Local.Detail;
 
-public record LiveAudioTranscriptionResult
+/// <summary>
+/// Transcription result for real-time audio streaming sessions.
+/// Extends <see cref="AudioCreateTranscriptionResponse"/> to provide a consistent
+/// output format with file-based transcription, while adding streaming-specific fields.
+/// </summary>
+public class LiveAudioTranscriptionResponse : AudioCreateTranscriptionResponse
 {
     /// <summary>
     /// Whether this is a final or partial (interim) result.
@@ -16,28 +22,55 @@ public record LiveAudioTranscriptionResult
     [JsonPropertyName("is_final")]
     public bool IsFinal { get; init; }
 
-    /// <summary>
-    /// Newly transcribed text from this audio chunk only (incremental hypothesis).
-    /// This is NOT the full accumulated transcript — each result contains only
-    /// the text decoded from the most recent audio chunk.
-    /// </summary>
+    internal static LiveAudioTranscriptionResponse FromJson(string json)
+    {
+        // Deserialize the core's JSON (which has is_final, text, start_time, end_time)
+        // into an intermediate record, then map to the response type.
+        var raw = JsonSerializer.Deserialize(json,
+            JsonSerializationContext.Default.LiveAudioTranscriptionRaw)
+            ?? throw new FoundryLocalException("Failed to deserialize live audio transcription result");
+
+        var response = new LiveAudioTranscriptionResponse
+        {
+            Text = raw.Text,
+            IsFinal = raw.IsFinal,
+        };
+
+        // Map start_time/end_time into a Segment for OpenAI-compatible output
+        if (raw.StartTime.HasValue || raw.EndTime.HasValue)
+        {
+            response.Segments =
+            [
+                new Segment
+                {
+                    Start = (float)(raw.StartTime ?? 0),
+                    End = (float)(raw.EndTime ?? 0),
+                    Text = raw.Text
+                }
+            ];
+        }
+
+        return response;
+    }
+}
+
+/// <summary>
+/// Internal raw deserialization target matching the Core's JSON format.
+/// Mapped to <see cref="LiveAudioTranscriptionResponse"/> in FromJson.
+/// </summary>
+internal record LiveAudioTranscriptionRaw
+{
+    [JsonPropertyName("is_final")]
+    public bool IsFinal { get; init; }
+
     [JsonPropertyName("text")]
     public string Text { get; init; } = string.Empty;
 
-    /// <summary>Start time offset of this segment in the audio stream (seconds).</summary>
     [JsonPropertyName("start_time")]
     public double? StartTime { get; init; }
 
-    /// <summary>End time offset of this segment in the audio stream (seconds).</summary>
     [JsonPropertyName("end_time")]
     public double? EndTime { get; init; }
-
-    internal static LiveAudioTranscriptionResult FromJson(string json)
-    {
-        return JsonSerializer.Deserialize(json,
-            JsonSerializationContext.Default.LiveAudioTranscriptionResult)
-            ?? throw new FoundryLocalException("Failed to deserialize LiveAudioTranscriptionResult");
-    }
 }
 
 internal record CoreErrorResponse

From 06dc45c4988b2cf2204ed3a2a780fa4a28a3eaa8 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Sun, 22 Mar 2026 17:20:36 -0700
Subject: [PATCH 14/22] fix nenad

---
 sdk/cs/src/OpenAI/AudioClient.cs              | 47 ++++++------
 .../OpenAI/LiveAudioTranscriptionClient.cs    | 74 +++++++------------
 2 files changed, 50 insertions(+), 71 deletions(-)

diff --git a/sdk/cs/src/OpenAI/AudioClient.cs b/sdk/cs/src/OpenAI/AudioClient.cs
index 1986c330..cccc3ee4 100644
--- a/sdk/cs/src/OpenAI/AudioClient.cs
+++ b/sdk/cs/src/OpenAI/AudioClient.cs
@@ -45,16 +45,6 @@ public record AudioSettings
     /// </summary>
     public AudioSettings Settings { get; } = new();
 
-    /// <summary>
-    /// Create a real-time streaming transcription session.
-    /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
-    /// </summary>
-    /// <returns>A streaming session that must be disposed when done.</returns>
-    public LiveAudioTranscriptionSession CreateLiveTranscriptionSession()
-    {
-        return new LiveAudioTranscriptionSession(_modelId);
-    }
-
     /// <summary>
     /// Transcribe audio from a file.
     /// </summary>
@@ -72,6 +62,29 @@ public async Task<AudioCreateTranscriptionResponse> TranscribeAudioAsync(string
                                                     .ConfigureAwait(false);
     }
 
+    public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
+        string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
+    {
+        var enumerable = Utils.CallWithExceptionHandling(
+            () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
+            "Error during streaming audio transcription.", _logger).ConfigureAwait(false);
+
+        await foreach (var item in enumerable)
+        {
+            yield return item;
+        }
+    }
+
+    /// <summary>
+    /// Create a real-time streaming transcription session.
+    /// Audio data is pushed in as PCM chunks and transcription results are returned as an async stream.
+    /// </summary>
+    /// <returns>A streaming session that must be disposed when done.</returns>
+    public LiveAudioTranscriptionSession CreateLiveTranscriptionSession()
+    {
+        return new LiveAudioTranscriptionSession(_modelId);
+    }
+
     private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(string audioFilePath,
                                                                                   CancellationToken? ct)
     {
@@ -95,20 +108,6 @@ private async Task<AudioCreateTranscriptionResponse> TranscribeAudioImplAsync(st
         return output;
     }
 
-
-    public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
-        string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
-    {
-        var enumerable = Utils.CallWithExceptionHandling(
-            () => TranscribeAudioStreamingImplAsync(audioFilePath, ct),
-            "Error during streaming audio transcription.", _logger).ConfigureAwait(false);
-
-        await foreach (var item in enumerable)
-        {
-            yield return item;
-        }
-    }
-
     private async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingImplAsync(
         string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
     {
diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
index 453eb23f..d2b42b8f 100644
--- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
+++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
@@ -178,13 +178,10 @@ public async ValueTask AppendAsync(ReadOnlyMemory<byte> pcmData, CancellationTok
 
     /// <summary>
     /// Internal loop that drains the push queue and sends chunks to native core one at a time.
-    /// Implements retry for transient native errors and terminates the session on permanent failures.
+    /// Terminates the session on any native error.
     /// </summary>
     private async Task PushLoopAsync(CancellationToken ct)
     {
-        const int maxRetries = 3;
-        var initialRetryDelay = TimeSpan.FromMilliseconds(50);
-
         try
         {
             await foreach (var audioData in _pushChannel!.Reader.ReadAllAsync(ct).ConfigureAwait(false))
@@ -194,57 +191,36 @@ private async Task PushLoopAsync(CancellationToken ct)
                     Params = new Dictionary<string, string> { { "SessionHandle", _sessionHandle! } }
                 };
 
-                var pushed = false;
-                for (int attempt = 0; attempt <= maxRetries && !pushed; attempt++)
+                var response = _coreInterop.PushAudioData(request, audioData);
+
+                if (response.Error != null)
                 {
-                    var response = _coreInterop.PushAudioData(request, audioData);
+                    var errorInfo = CoreErrorResponse.TryParse(response.Error);
+                    var fatalEx = new FoundryLocalException(
+                        $"Push failed (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}",
+                        _logger);
+                    _logger.LogError("Terminating push loop due to push failure: {Error}",
+                                     response.Error);
+                    _outputChannel?.Writer.TryComplete(fatalEx);
+                    return;
+                }
 
-                    if (response.Error == null)
+                // Parse transcription result from push response and surface it
+                if (!string.IsNullOrEmpty(response.Data))
+                {
+                    try
                     {
-                        pushed = true;
-
-                        // Parse transcription result from push response and surface it
-                        if (!string.IsNullOrEmpty(response.Data))
+                        var transcription = LiveAudioTranscriptionResponse.FromJson(response.Data);
+                        if (!string.IsNullOrEmpty(transcription.Text))
                         {
-                            try
-                            {
-                                var transcription = LiveAudioTranscriptionResponse.FromJson(response.Data);
-                                if (!string.IsNullOrEmpty(transcription.Text))
-                                {
-                                    _outputChannel?.Writer.TryWrite(transcription);
-                                }
-                            }
-                            catch (Exception parseEx)
-                            {
-                                // Non-fatal: log and continue if response isn't a transcription result
-                                _logger.LogDebug(parseEx, "Could not parse push response as transcription result");
-                            }
+                            _outputChannel?.Writer.TryWrite(transcription);
                         }
-
-                        continue;
                     }
-
-                    // Parse structured error to determine transient vs permanent
-                    var errorInfo = CoreErrorResponse.TryParse(response.Error);
-
-                    if (errorInfo?.IsTransient == true && attempt < maxRetries)
+                    catch (Exception parseEx)
                     {
-                        var delay = initialRetryDelay * Math.Pow(2, attempt);
-                        _logger.LogWarning(
-                            "Transient push error (attempt {Attempt}/{Max}): {Code}. Retrying in {Delay}ms",
-                            attempt + 1, maxRetries, errorInfo.Code, delay.TotalMilliseconds);
-                        await Task.Delay(delay, ct).ConfigureAwait(false);
-                        continue;
+                        // Non-fatal: log and continue if response isn't a transcription result
+                        _logger.LogDebug(parseEx, "Could not parse push response as transcription result");
                     }
-
-                    // Permanent error or retries exhausted — terminate the session
-                    var fatalEx = new FoundryLocalException(
-                        $"Push failed permanently (code={errorInfo?.Code ?? "UNKNOWN"}): {response.Error}",
-                        _logger);
-                    _logger.LogError("Terminating push loop due to permanent push failure: {Error}",
-                                     response.Error);
-                    _outputChannel?.Writer.TryComplete(fatalEx);
-                    return; // exit push loop
                 }
             }
         }
@@ -375,6 +351,10 @@ public async Task StopAsync(CancellationToken ct = default)
         }
     }
 
+    /// <summary>
+    /// Dispose the streaming session. Calls <see cref="StopAsync"/> if the session is still active.
+    /// Safe to call multiple times.
+    /// </summary>
     public async ValueTask DisposeAsync()
     {
         try

From 709788ced058b0e65d14dce0aa0534e786e36ab7 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 10:14:19 -0700
Subject: [PATCH 15/22] add unitest

---
 .../OpenAI/LiveAudioTranscriptionClient.cs    |   3 +
 .../src/OpenAI/LiveAudioTranscriptionTypes.cs |   2 +-
 .../LiveAudioTranscriptionTests.cs            | 162 ++++++++++++++++++
 3 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs

diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
index d2b42b8f..4b4b6d9a 100644
--- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
+++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionClient.cs
@@ -151,8 +151,11 @@ public async Task StartAsync(CancellationToken ct = default)
         _started = true;
         _stopped = false;
 
+        _sessionCts?.Dispose();
         _sessionCts = new CancellationTokenSource();
+#pragma warning disable IDISP013 // Await in using — Task.Run is intentionally fire-and-forget here
         _pushLoopTask = Task.Run(() => PushLoopAsync(_sessionCts.Token), CancellationToken.None);
+#pragma warning restore IDISP013
     }
 
     /// <summary>
diff --git a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
index 21a2c5a3..c9650232 100644
--- a/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
+++ b/sdk/cs/src/OpenAI/LiveAudioTranscriptionTypes.cs
@@ -11,7 +11,7 @@ namespace Microsoft.AI.Foundry.Local.OpenAI;
 /// Extends <see cref="AudioCreateTranscriptionResponse"/> to provide a consistent
 /// output format with file-based transcription, while adding streaming-specific fields.
 /// </summary>
-public class LiveAudioTranscriptionResponse : AudioCreateTranscriptionResponse
+public record LiveAudioTranscriptionResponse : AudioCreateTranscriptionResponse
 {
     /// <summary>
     /// Whether this is a final or partial (interim) result.
diff --git a/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
new file mode 100644
index 00000000..ae768fe6
--- /dev/null
+++ b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
@@ -0,0 +1,162 @@
+// --------------------------------------------------------------------------------------------------------------------
+// <copyright company="Microsoft">
+//   Copyright (c) Microsoft. All rights reserved.
+// </copyright>
+// --------------------------------------------------------------------------------------------------------------------
+
+namespace Microsoft.AI.Foundry.Local.Tests;
+
+using System.Text.Json;
+using Microsoft.AI.Foundry.Local.Detail;
+using Microsoft.AI.Foundry.Local.OpenAI;
+
+internal sealed class LiveAudioTranscriptionTests
+{
+    // --- LiveAudioTranscriptionResponse.FromJson tests ---
+
+    [Test]
+    public async Task FromJson_ParsesTextAndIsFinal()
+    {
+        var json = """{"is_final":true,"text":"hello world","start_time":null,"end_time":null}""";
+
+        var result = LiveAudioTranscriptionResponse.FromJson(json);
+
+        await Assert.That(result.Text).IsEqualTo("hello world");
+        await Assert.That(result.IsFinal).IsTrue();
+        await Assert.That(result.Segments).IsNull();
+    }
+
+    [Test]
+    public async Task FromJson_MapsTimingToSegments()
+    {
+        var json = """{"is_final":false,"text":"partial","start_time":1.5,"end_time":3.0}""";
+
+        var result = LiveAudioTranscriptionResponse.FromJson(json);
+
+        await Assert.That(result.Text).IsEqualTo("partial");
+        await Assert.That(result.IsFinal).IsFalse();
+        await Assert.That(result.Segments).IsNotNull();
+        await Assert.That(result.Segments!.Count).IsEqualTo(1);
+        await Assert.That(result.Segments[0].Start).IsEqualTo(1.5f);
+        await Assert.That(result.Segments[0].End).IsEqualTo(3.0f);
+        await Assert.That(result.Segments[0].Text).IsEqualTo("partial");
+    }
+
+    [Test]
+    public async Task FromJson_EmptyText_ParsesSuccessfully()
+    {
+        var json = """{"is_final":true,"text":"","start_time":null,"end_time":null}""";
+
+        var result = LiveAudioTranscriptionResponse.FromJson(json);
+
+        await Assert.That(result.Text).IsEqualTo("");
+        await Assert.That(result.IsFinal).IsTrue();
+    }
+
+    [Test]
+    public async Task FromJson_OnlyStartTime_CreatesSegment()
+    {
+        var json = """{"is_final":true,"text":"word","start_time":2.0,"end_time":null}""";
+
+        var result = LiveAudioTranscriptionResponse.FromJson(json);
+
+        await Assert.That(result.Segments).IsNotNull();
+        await Assert.That(result.Segments!.Count).IsEqualTo(1);
+        await Assert.That(result.Segments[0].Start).IsEqualTo(2.0f);
+        await Assert.That(result.Segments[0].End).IsEqualTo(0f);
+    }
+
+    [Test]
+    public async Task FromJson_InvalidJson_Throws()
+    {
+        var ex = Assert.Throws<FoundryLocalException>(() =>
+            LiveAudioTranscriptionResponse.FromJson("not valid json"));
+        await Assert.That(ex).IsNotNull();
+    }
+
+    [Test]
+    public async Task FromJson_InheritsFromAudioCreateTranscriptionResponse()
+    {
+        var json = """{"is_final":true,"text":"test","start_time":null,"end_time":null}""";
+
+        var result = LiveAudioTranscriptionResponse.FromJson(json);
+
+        // Verify it's assignable to the base type
+        Betalgo.Ranul.OpenAI.ObjectModels.ResponseModels.AudioCreateTranscriptionResponse baseRef = result;
+        await Assert.That(baseRef.Text).IsEqualTo("test");
+    }
+
+    // --- LiveAudioTranscriptionOptions tests ---
+
+    [Test]
+    public async Task Options_DefaultValues()
+    {
+        var options = new LiveAudioTranscriptionSession.LiveAudioTranscriptionOptions();
+
+        await Assert.That(options.SampleRate).IsEqualTo(16000);
+        await Assert.That(options.Channels).IsEqualTo(1);
+        await Assert.That(options.Language).IsNull();
+        await Assert.That(options.PushQueueCapacity).IsEqualTo(100);
+    }
+
+    // --- CoreErrorResponse tests ---
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_ValidJson()
+    {
+        var json = """{"code":"ASR_SESSION_NOT_FOUND","message":"Session not found","isTransient":false}""";
+
+        var error = CoreErrorResponse.TryParse(json);
+
+        await Assert.That(error).IsNotNull();
+        await Assert.That(error!.Code).IsEqualTo("ASR_SESSION_NOT_FOUND");
+        await Assert.That(error.Message).IsEqualTo("Session not found");
+        await Assert.That(error.IsTransient).IsFalse();
+    }
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_InvalidJson_ReturnsNull()
+    {
+        var result = CoreErrorResponse.TryParse("not json");
+        await Assert.That(result).IsNull();
+    }
+
+    [Test]
+    public async Task CoreErrorResponse_TryParse_TransientError()
+    {
+        var json = """{"code":"BUSY","message":"Model busy","isTransient":true}""";
+
+        var error = CoreErrorResponse.TryParse(json);
+
+        await Assert.That(error).IsNotNull();
+        await Assert.That(error!.IsTransient).IsTrue();
+    }
+
+    // --- Session state guard tests ---
+
+    [Test]
+    public async Task AppendAsync_BeforeStart_Throws()
+    {
+        var session = new LiveAudioTranscriptionSession("test-model");
+        var data = new ReadOnlyMemory<byte>(new byte[100]);
+
+        var ex = Assert.ThrowsAsync<FoundryLocalException>(
+            async () => await session.AppendAsync(data));
+        await Assert.That(ex).IsNotNull();
+    }
+
+    [Test]
+    public async Task GetTranscriptionStream_BeforeStart_Throws()
+    {
+        var session = new LiveAudioTranscriptionSession("test-model");
+
+        var ex = Assert.ThrowsAsync<FoundryLocalException>(async () =>
+        {
+            await foreach (var _ in session.GetTranscriptionStream())
+            {
+                // should not reach here
+            }
+        });
+        await Assert.That(ex).IsNotNull();
+    }
+}

From 24aacb1ed160b676575ca98d9e992d0b1c0ac384 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 10:30:40 -0700
Subject: [PATCH 16/22] update the ci core package

---
 sdk/cs/src/Microsoft.AI.Foundry.Local.csproj | 4 ++--
 sdk/cs/src/OpenAI/AudioClient.cs             | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
index 905f9652..dc600e28 100644
--- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
+++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
@@ -99,8 +99,8 @@
     <PropertyGroup>
       <!-- default version unless overridden during dotnet build/restore command -->
       <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' != ''">$(FoundryLocalCoreVersion)</FoundryLocalCoreWinMLVersion>
-      <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0.8-rc3</FoundryLocalCoreWinMLVersion>
-      <FoundryLocalCoreVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0.8-rc3</FoundryLocalCoreVersion>    </PropertyGroup>
+      <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0</FoundryLocalCoreWinMLVersion>
+      <FoundryLocalCoreVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0</FoundryLocalCoreVersion>    </PropertyGroup>
 
     <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
       <TreatWarningsAsErrors>True</TreatWarningsAsErrors>
diff --git a/sdk/cs/src/OpenAI/AudioClient.cs b/sdk/cs/src/OpenAI/AudioClient.cs
index cccc3ee4..a8cbc1d7 100644
--- a/sdk/cs/src/OpenAI/AudioClient.cs
+++ b/sdk/cs/src/OpenAI/AudioClient.cs
@@ -62,6 +62,15 @@ public async Task<AudioCreateTranscriptionResponse> TranscribeAudioAsync(string
                                                     .ConfigureAwait(false);
     }
 
+    /// <summary>
+    /// Transcribe audio from a file with streamed output.
+    /// </summary>
+    /// <param name="audioFilePath">
+    /// Path to file containing audio recording.
+    /// Supported formats: mp3
+    /// </param>
+    /// <param name="ct">Cancellation token.</param>
+    /// <returns>An asynchronous enumerable of transcription responses.</returns>
     public async IAsyncEnumerable<AudioCreateTranscriptionResponse> TranscribeAudioStreamingAsync(
         string audioFilePath, [EnumeratorCancellation] CancellationToken ct)
     {

From eeb34b874e95181576c248ca8edaf162cac21e45 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 10:39:00 -0700
Subject: [PATCH 17/22] update the ci core package

---
 .../LiveAudioTranscriptionTests.cs            | 32 +++++++++++++------
 sdk/cs/test/FoundryLocal.Tests/ModelTests.cs  |  4 +--
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
index ae768fe6..b29ecd77 100644
--- a/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
+++ b/sdk/cs/test/FoundryLocal.Tests/LiveAudioTranscriptionTests.cs
@@ -69,7 +69,7 @@ public async Task FromJson_OnlyStartTime_CreatesSegment()
     [Test]
     public async Task FromJson_InvalidJson_Throws()
     {
-        var ex = Assert.Throws<FoundryLocalException>(() =>
+        var ex = Assert.Throws<Exception>(() =>
             LiveAudioTranscriptionResponse.FromJson("not valid json"));
         await Assert.That(ex).IsNotNull();
     }
@@ -137,26 +137,40 @@ public async Task CoreErrorResponse_TryParse_TransientError()
     [Test]
     public async Task AppendAsync_BeforeStart_Throws()
     {
-        var session = new LiveAudioTranscriptionSession("test-model");
+        await using var session = new LiveAudioTranscriptionSession("test-model");
         var data = new ReadOnlyMemory<byte>(new byte[100]);
 
-        var ex = Assert.ThrowsAsync<FoundryLocalException>(
-            async () => await session.AppendAsync(data));
-        await Assert.That(ex).IsNotNull();
+        FoundryLocalException? caught = null;
+        try
+        {
+            await session.AppendAsync(data);
+        }
+        catch (FoundryLocalException ex)
+        {
+            caught = ex;
+        }
+
+        await Assert.That(caught).IsNotNull();
     }
 
     [Test]
     public async Task GetTranscriptionStream_BeforeStart_Throws()
     {
-        var session = new LiveAudioTranscriptionSession("test-model");
+        await using var session = new LiveAudioTranscriptionSession("test-model");
 
-        var ex = Assert.ThrowsAsync<FoundryLocalException>(async () =>
+        FoundryLocalException? caught = null;
+        try
         {
             await foreach (var _ in session.GetTranscriptionStream())
             {
                 // should not reach here
             }
-        });
-        await Assert.That(ex).IsNotNull();
+        }
+        catch (FoundryLocalException ex)
+        {
+            caught = ex;
+        }
+
+        await Assert.That(caught).IsNotNull();
     }
 }
diff --git a/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs b/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs
index 0e2ea1dc..1f49560d 100644
--- a/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs
+++ b/sdk/cs/test/FoundryLocal.Tests/ModelTests.cs
@@ -51,6 +51,4 @@ public async Task GetLastestVersion_Works()
         var latestB = model.GetLatestVersion(variants[2]);
         await Assert.That(latestB).IsEqualTo(variants[1]);
     }
-}
-
-
+}
\ No newline at end of file

From 292a5bc1555b2fafa1b1e9f24b24c84867ab24d3 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 11:53:04 -0700
Subject: [PATCH 18/22] Add live audio transcription support to JS SDK

---
 sdk/js/script/install.cjs                     |   4 +-
 sdk/js/src/imodel.ts                          |   6 +
 sdk/js/src/index.ts                           |   2 +
 sdk/js/src/model.ts                           |   9 +
 sdk/js/src/modelVariant.ts                    |   9 +
 .../openai/liveAudioTranscriptionClient.ts    | 369 ++++++++++++++++++
 .../src/openai/liveAudioTranscriptionTypes.ts |  49 +++
 7 files changed, 446 insertions(+), 2 deletions(-)
 create mode 100644 sdk/js/src/openai/liveAudioTranscriptionClient.ts
 create mode 100644 sdk/js/src/openai/liveAudioTranscriptionTypes.ts

diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs
index 3db771b8..a058c5f3 100644
--- a/sdk/js/script/install.cjs
+++ b/sdk/js/script/install.cjs
@@ -54,14 +54,14 @@ const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED;
 
 const FOUNDRY_LOCAL_CORE_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core',
-    version: '0.9.0.8-rc3',
+    version: '0.9.0',
     feed: ORT_NIGHTLY_FEED,
     nightly: useNightly
 }
 
 const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core.WinML',
-    version: '0.9.0.8-rc3',
+    version: '0.9.0',
     feed: ORT_NIGHTLY_FEED,
     nightly: useNightly
 }
diff --git a/sdk/js/src/imodel.ts b/sdk/js/src/imodel.ts
index be0913d6..eff742f0 100644
--- a/sdk/js/src/imodel.ts
+++ b/sdk/js/src/imodel.ts
@@ -1,5 +1,6 @@
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { LiveAudioTranscriptionClient } from './openai/liveAudioTranscriptionClient.js';
 import { ResponsesClient } from './openai/responsesClient.js';
 
 export interface IModel {
@@ -16,6 +17,11 @@ export interface IModel {
 
     createChatClient(): ChatClient;
     createAudioClient(): AudioClient;
+    /**
+     * Creates a LiveAudioTranscriptionClient for real-time audio streaming ASR.
+     * @returns A LiveAudioTranscriptionClient instance.
+     */
+    createLiveTranscriptionClient(): LiveAudioTranscriptionClient;
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
      * Unlike createChatClient/createAudioClient (which use FFI), the Responses API
diff --git a/sdk/js/src/index.ts b/sdk/js/src/index.ts
index 7d7ee17a..63f971fd 100644
--- a/sdk/js/src/index.ts
+++ b/sdk/js/src/index.ts
@@ -6,6 +6,8 @@ export { ModelVariant } from './modelVariant.js';
 export type { IModel } from './imodel.js';
 export { ChatClient, ChatClientSettings } from './openai/chatClient.js';
 export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
+export { LiveAudioTranscriptionClient, LiveAudioTranscriptionSettings } from './openai/liveAudioTranscriptionClient.js';
+export type { LiveAudioTranscriptionResult, CoreErrorResponse } from './openai/liveAudioTranscriptionTypes.js';
 export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
 export { ModelLoadManager } from './detail/modelLoadManager.js';
 /** @internal */
diff --git a/sdk/js/src/model.ts b/sdk/js/src/model.ts
index e2b37119..2ea1da01 100644
--- a/sdk/js/src/model.ts
+++ b/sdk/js/src/model.ts
@@ -1,6 +1,7 @@
 import { ModelVariant } from './modelVariant.js';
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { LiveAudioTranscriptionClient } from './openai/liveAudioTranscriptionClient.js';
 import { ResponsesClient } from './openai/responsesClient.js';
 import { IModel } from './imodel.js';
 
@@ -159,6 +160,14 @@ export class Model implements IModel {
         return this.selectedVariant.createAudioClient();
     }
 
+    /**
+     * Creates a LiveAudioTranscriptionClient for real-time audio streaming ASR.
+     * @returns A LiveAudioTranscriptionClient instance.
+     */
+    public createLiveTranscriptionClient(): LiveAudioTranscriptionClient {
+        return this.selectedVariant.createLiveTranscriptionClient();
+    }
+
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
      * @param baseUrl - The base URL of the Foundry Local web service.
diff --git a/sdk/js/src/modelVariant.ts b/sdk/js/src/modelVariant.ts
index 4d3e2bee..c5bbf24e 100644
--- a/sdk/js/src/modelVariant.ts
+++ b/sdk/js/src/modelVariant.ts
@@ -3,6 +3,7 @@ import { ModelLoadManager } from './detail/modelLoadManager.js';
 import { ModelInfo } from './types.js';
 import { ChatClient } from './openai/chatClient.js';
 import { AudioClient } from './openai/audioClient.js';
+import { LiveAudioTranscriptionClient } from './openai/liveAudioTranscriptionClient.js';
 import { ResponsesClient } from './openai/responsesClient.js';
 import { IModel } from './imodel.js';
 
@@ -129,6 +130,14 @@ export class ModelVariant implements IModel {
         return new AudioClient(this._modelInfo.id, this.coreInterop);
     }
 
+    /**
+     * Creates a LiveAudioTranscriptionClient for real-time audio streaming ASR.
+     * @returns A LiveAudioTranscriptionClient instance.
+     */
+    public createLiveTranscriptionClient(): LiveAudioTranscriptionClient {
+        return new LiveAudioTranscriptionClient(this._modelInfo.id, this.coreInterop);
+    }
+
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
      * @param baseUrl - The base URL of the Foundry Local web service.
diff --git a/sdk/js/src/openai/liveAudioTranscriptionClient.ts b/sdk/js/src/openai/liveAudioTranscriptionClient.ts
new file mode 100644
index 00000000..0857f840
--- /dev/null
+++ b/sdk/js/src/openai/liveAudioTranscriptionClient.ts
@@ -0,0 +1,369 @@
+import { CoreInterop } from '../detail/coreInterop.js';
+import { LiveAudioTranscriptionResult, tryParseCoreError } from './liveAudioTranscriptionTypes.js';
+
+/**
+ * Audio format settings for a streaming session.
+ * Must be configured before calling start().
+ * Settings are frozen once the session starts.
+ */
+export class LiveAudioTranscriptionSettings {
+    /** PCM sample rate in Hz. Default: 16000. */
+    sampleRate: number = 16000;
+    /** Number of audio channels. Default: 1 (mono). */
+    channels: number = 1;
+    /** Bits per sample. Default: 16. */
+    bitsPerSample: number = 16;
+    /** Optional BCP-47 language hint (e.g., "en", "zh"). */
+    language?: string;
+    /** Maximum number of audio chunks buffered in the internal push queue. Default: 100. */
+    pushQueueCapacity: number = 100;
+
+    /** @internal Create a frozen copy of these settings. */
+    snapshot(): LiveAudioTranscriptionSettings {
+        const copy = new LiveAudioTranscriptionSettings();
+        copy.sampleRate = this.sampleRate;
+        copy.channels = this.channels;
+        copy.bitsPerSample = this.bitsPerSample;
+        copy.language = this.language;
+        copy.pushQueueCapacity = this.pushQueueCapacity;
+        return Object.freeze(copy) as LiveAudioTranscriptionSettings;
+    }
+}
+
+/**
+ * Internal async queue that acts like C#'s Channel<T>.
+ * Supports a single consumer reading via async iteration and multiple producers writing.
+ * @internal
+ */
+class AsyncQueue<T> {
+    private queue: T[] = [];
+    private waitingResolve: ((value: IteratorResult<T>) => void) | null = null;
+    private completed = false;
+    private completionError: Error | null = null;
+    private maxCapacity: number;
+    private backpressureResolve: (() => void) | null = null;
+
+    constructor(maxCapacity: number = Infinity) {
+        this.maxCapacity = maxCapacity;
+    }
+
+    /** Push an item. If at capacity, waits until space is available. */
+    async write(item: T): Promise<void> {
+        if (this.completed) {
+            throw new Error('Cannot write to a completed queue.');
+        }
+
+        if (this.waitingResolve) {
+            const resolve = this.waitingResolve;
+            this.waitingResolve = null;
+            resolve({ value: item, done: false });
+            return;
+        }
+
+        if (this.queue.length >= this.maxCapacity) {
+            await new Promise<void>((resolve) => {
+                this.backpressureResolve = resolve;
+            });
+        }
+
+        this.queue.push(item);
+    }
+
+    /** Push an item synchronously (no backpressure wait). */
+    tryWrite(item: T): boolean {
+        if (this.completed) return false;
+
+        if (this.waitingResolve) {
+            const resolve = this.waitingResolve;
+            this.waitingResolve = null;
+            resolve({ value: item, done: false });
+            return true;
+        }
+
+        this.queue.push(item);
+        return true;
+    }
+
+    /** Signal that no more items will be written. */
+    complete(error?: Error): void {
+        if (this.completed) return;
+        this.completed = true;
+        this.completionError = error ?? null;
+
+        if (this.backpressureResolve) {
+            this.backpressureResolve();
+            this.backpressureResolve = null;
+        }
+
+        if (this.waitingResolve) {
+            const resolve = this.waitingResolve;
+            this.waitingResolve = null;
+            resolve({ value: undefined as any, done: true });
+        }
+    }
+
+    get error(): Error | null {
+        return this.completionError;
+    }
+
+    /** Async iterator for consuming items. */
+    async *[Symbol.asyncIterator](): AsyncGenerator<T> {
+        while (true) {
+            if (this.backpressureResolve && this.queue.length < this.maxCapacity) {
+                const resolve = this.backpressureResolve;
+                this.backpressureResolve = null;
+                resolve();
+            }
+
+            if (this.queue.length > 0) {
+                yield this.queue.shift()!;
+                continue;
+            }
+
+            if (this.completed) {
+                if (this.completionError) {
+                    throw this.completionError;
+                }
+                return;
+            }
+
+            const result = await new Promise<IteratorResult<T>>((resolve) => {
+                this.waitingResolve = resolve;
+            });
+
+            if (result.done) {
+                if (this.completionError) {
+                    throw this.completionError;
+                }
+                return;
+            }
+
+            yield result.value;
+        }
+    }
+}
+
+/**
+ * Client for real-time audio streaming ASR (Automatic Speech Recognition).
+ * Audio data from a microphone (or other source) is pushed in as PCM chunks,
+ * and transcription results are returned as an async iterable.
+ *
+ * Mirrors the C# LiveAudioTranscriptionSession.
+ */
+export class LiveAudioTranscriptionClient {
+    private modelId: string;
+    private coreInterop: CoreInterop;
+
+    private sessionHandle: string | null = null;
+    private started = false;
+    private stopped = false;
+
+    private outputQueue: AsyncQueue<LiveAudioTranscriptionResult> | null = null;
+    private pushQueue: AsyncQueue<Uint8Array> | null = null;
+    private pushLoopPromise: Promise<void> | null = null;
+    private activeSettings: LiveAudioTranscriptionSettings | null = null;
+    private sessionAbortController: AbortController | null = null;
+
+    /**
+     * Configuration settings for the streaming session.
+     * Must be configured before calling start(). Settings are frozen after start().
+     */
+    public settings = new LiveAudioTranscriptionSettings();
+
+    /**
+     * @internal
+     * Users should create clients via Model.createLiveTranscriptionClient().
+     */
+    constructor(modelId: string, coreInterop: CoreInterop) {
+        this.modelId = modelId;
+        this.coreInterop = coreInterop;
+    }
+
+    /**
+     * Start a real-time audio streaming session.
+     * Must be called before pushAudioData() or getTranscriptionStream().
+     * Settings are frozen after this call.
+     */
+    public async start(): Promise<void> {
+        if (this.started) {
+            throw new Error('Streaming session already started. Call stop() first.');
+        }
+
+        this.activeSettings = this.settings.snapshot();
+        this.outputQueue = new AsyncQueue<LiveAudioTranscriptionResult>();
+        this.pushQueue = new AsyncQueue<Uint8Array>(this.activeSettings.pushQueueCapacity);
+
+        const params: Record<string, string> = {
+            Model: this.modelId,
+            SampleRate: this.activeSettings.sampleRate.toString(),
+            Channels: this.activeSettings.channels.toString(),
+            BitsPerSample: this.activeSettings.bitsPerSample.toString(),
+        };
+
+        if (this.activeSettings.language) {
+            params['Language'] = this.activeSettings.language;
+        }
+
+        try {
+            const response = this.coreInterop.executeCommand("audio_stream_start", {
+                Params: params
+            });
+
+            this.sessionHandle = response;
+            if (!this.sessionHandle) {
+                throw new Error('Native core did not return a session handle.');
+            }
+        } catch (error) {
+            this.outputQueue.complete();
+            throw new Error(
+                `Error starting audio stream session: ${error instanceof Error ? error.message : String(error)}`,
+                { cause: error }
+            );
+        }
+
+        this.started = true;
+        this.stopped = false;
+
+        this.sessionAbortController = new AbortController();
+        this.pushLoopPromise = this.pushLoop();
+    }
+
+    /**
+     * Push a chunk of raw PCM audio data to the streaming session.
+     * Can be called from any context. Chunks are internally queued
+     * and serialized to native core one at a time.
+     *
+     * @param pcmData - Raw PCM audio bytes matching the configured format.
+     */
+    public async pushAudioData(pcmData: Uint8Array): Promise<void> {
+        if (!this.started || this.stopped) {
+            throw new Error('No active streaming session. Call start() first.');
+        }
+
+        const copy = new Uint8Array(pcmData.length);
+        copy.set(pcmData);
+
+        await this.pushQueue!.write(copy);
+    }
+
+    /**
+     * Internal loop that drains the push queue and sends chunks to native core one at a time.
+     * Terminates the session on any native error.
+     * @internal
+     */
+    private async pushLoop(): Promise<void> {
+        try {
+            for await (const audioData of this.pushQueue!) {
+                if (this.sessionAbortController?.signal.aborted) {
+                    break;
+                }
+
+                try {
+                    this.coreInterop.executeCommand("audio_stream_push", {
+                        Params: {
+                            SessionHandle: this.sessionHandle!,
+                            AudioDataLength: audioData.length.toString()
+                        }
+                    });
+                } catch (error) {
+                    const errorMsg = error instanceof Error ? error.message : String(error);
+                    const errorInfo = tryParseCoreError(errorMsg);
+
+                    const fatalError = new Error(
+                        `Push failed (code=${errorInfo?.code ?? 'UNKNOWN'}): ${errorMsg}`,
+                        { cause: error }
+                    );
+                    console.error('Terminating push loop due to push failure:', errorMsg);
+                    this.outputQueue?.complete(fatalError);
+                    return;
+                }
+            }
+        } catch (error) {
+            if (this.sessionAbortController?.signal.aborted) {
+                return;
+            }
+            const err = error instanceof Error ? error : new Error(String(error));
+            console.error('Push loop terminated with unexpected error:', err.message);
+            this.outputQueue?.complete(new Error('Push loop terminated unexpectedly.', { cause: err }));
+        }
+    }
+
+    /**
+     * Get the async iterable of transcription results.
+     * Results arrive as the native ASR engine processes audio data.
+     *
+     * Usage:
+     * ```ts
+     * for await (const result of client.getTranscriptionStream()) {
+     *     console.log(result.text);
+     * }
+     * ```
+     */
+    public async *getTranscriptionStream(): AsyncGenerator<LiveAudioTranscriptionResult> {
+        if (!this.outputQueue) {
+            throw new Error('No active streaming session. Call start() first.');
+        }
+
+        for await (const item of this.outputQueue) {
+            yield item;
+        }
+    }
+
+    /**
+     * Signal end-of-audio and stop the streaming session.
+     * Any remaining buffered audio in the push queue will be drained to native core first.
+     * Final results are delivered through getTranscriptionStream() before it completes.
+     */
+    public async stop(): Promise<void> {
+        if (!this.started || this.stopped) {
+            return;
+        }
+
+        this.stopped = true;
+
+        this.pushQueue?.complete();
+
+        if (this.pushLoopPromise) {
+            await this.pushLoopPromise;
+        }
+
+        this.sessionAbortController?.abort();
+
+        let stopError: Error | null = null;
+        try {
+            this.coreInterop.executeCommand("audio_stream_stop", {
+                Params: { SessionHandle: this.sessionHandle! }
+            });
+        } catch (error) {
+            stopError = error instanceof Error ? error : new Error(String(error));
+            console.error('Error stopping audio stream session:', stopError.message);
+        }
+
+        this.sessionHandle = null;
+        this.started = false;
+        this.sessionAbortController = null;
+
+        this.outputQueue?.complete();
+
+        if (stopError) {
+            throw new Error(
+                `Error stopping audio stream session: ${stopError.message}`,
+                { cause: stopError }
+            );
+        }
+    }
+
+    /**
+     * Dispose the client and stop any active session.
+     * Safe to call multiple times.
+     */
+    public async dispose(): Promise<void> {
+        try {
+            if (this.started && !this.stopped) {
+                await this.stop();
+            }
+        } catch (error) {
+            console.warn('Error during dispose cleanup:', error instanceof Error ? error.message : String(error));
+        }
+    }
+}
diff --git a/sdk/js/src/openai/liveAudioTranscriptionTypes.ts b/sdk/js/src/openai/liveAudioTranscriptionTypes.ts
new file mode 100644
index 00000000..eb521cbd
--- /dev/null
+++ b/sdk/js/src/openai/liveAudioTranscriptionTypes.ts
@@ -0,0 +1,49 @@
+/**
+ * Types for real-time audio streaming transcription results and structured errors.
+ * Mirrors the C# LiveAudioTranscriptionResponse and CoreErrorResponse.
+ */
+
+/**
+ * A transcription result from a real-time audio streaming session.
+ * Mirrors the C# LiveAudioTranscriptionResponse which extends AudioCreateTranscriptionResponse.
+ */
+export interface LiveAudioTranscriptionResult {
+    /** Whether this is a partial (interim) or final result for this segment. */
+    is_final: boolean;
+    /** The transcribed text. */
+    text: string;
+    /** Start time offset of this segment in the audio stream (seconds). */
+    start_time?: number | null;
+    /** End time offset of this segment in the audio stream (seconds). */
+    end_time?: number | null;
+}
+
+/**
+ * Structured error response from native core audio streaming commands.
+ * @internal
+ */
+export interface CoreErrorResponse {
+    /** Machine-readable error code. */
+    code: string;
+    /** Human-readable error message. */
+    message: string;
+    /** Whether this error is transient and may succeed on retry. */
+    isTransient: boolean;
+}
+
+/**
+ * Attempt to parse a native error string as a structured CoreErrorResponse.
+ * Returns null if the error is not valid JSON or doesn't match the schema.
+ * @internal
+ */
+export function tryParseCoreError(errorString: string): CoreErrorResponse | null {
+    try {
+        const parsed = JSON.parse(errorString);
+        if (typeof parsed.code === 'string' && typeof parsed.isTransient === 'boolean') {
+            return parsed as CoreErrorResponse;
+        }
+        return null;
+    } catch {
+        return null;
+    }
+}

From 5287519772b5a95814ebf4017527cdab1ce19c29 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 12:16:51 -0700
Subject: [PATCH 19/22] Remove leftover sdk_v2/ directory

---
 .../cs/src/Microsoft.AI.Foundry.Local.csproj  | 128 ------------------
 sdk_v2/js/src/index.ts                        |  17 ---
 2 files changed, 145 deletions(-)
 delete mode 100644 sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
 delete mode 100644 sdk_v2/js/src/index.ts

diff --git a/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
deleted file mode 100644
index ffc83a94..00000000
--- a/sdk_v2/cs/src/Microsoft.AI.Foundry.Local.csproj
+++ /dev/null
@@ -1,128 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-    <PropertyGroup>
-      <AssemblyTitle>Microsoft AI Foundry Local</AssemblyTitle>
-      <Description>Microsoft Foundry Local SDK</Description>
-      <Authors>Microsoft</Authors>
-      <Company>Microsoft Corporation</Company>
-      <Copyright>© Microsoft Corporation. All rights reserved.</Copyright>
-      <PackageLicenseFile>LICENSE.txt</PackageLicenseFile>
-      <PackageProjectUrl>https://github.com/microsoft/Foundry-Local</PackageProjectUrl>
-      <PackageDescription>Microsoft AI Foundry Local SDK for .NET</PackageDescription>
-      <PackageTags>Microsoft AI Foundry SDK</PackageTags>
-      <PackageReadmeFile>README.md</PackageReadmeFile>
-      <RepositoryUrl>https://github.com/microsoft/Foundry-Local</RepositoryUrl>
-      <RepositoryType>git</RepositoryType>
-
-      <TargetFramework>net8.0</TargetFramework>
-      <RuntimeIdentifiers>win-x64;win-arm64;linux-x64;linux-arm64;osx-arm64</RuntimeIdentifiers> 
-
-      <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
-      <GeneratePackageOnBuild>False</GeneratePackageOnBuild>
-      <ImplicitUsings>enable</ImplicitUsings>
-      <IsAotCompatible>True</IsAotCompatible>
-      <IsTrimmable>True</IsTrimmable>
-      <Nullable>enable</Nullable>
-      <WarningsAsErrors />
-
-      <IncludeSymbols>true</IncludeSymbols>
-      <SymbolPackageFormat>snupkg</SymbolPackageFormat>
-      <GenerateDocumentationFile Condition="'$(Configuration)' == 'Release'">true</GenerateDocumentationFile>
-
-      <!-- 
-      Set to 'true' to use OnnxRuntimeGenAI.WinML with EP download on Windows
-      Command line value overrides the default value here.
-       -->
-      <UseWinML>false</UseWinML>
-      <RuntimeIdentifiers Condition="'$(UseWinML)' == 'true'">win-x64;win-arm64</RuntimeIdentifiers>
-    </PropertyGroup>
-
-    <!-- 
-      set a default Version so if someone creates a nuget package locally it has a meaningful name.
-      CI builds will set the version using .pipelines\sdk_v2\templates\sdk-version.yml
-      We only set this if we're actually generating a package (build or pack) so that Visual Studio doesn't 
-      constantly restore packages. Assuming it considers the csproj to be 'changed' due to the dynamic timestamp.
-      -->
-    <PropertyGroup Condition="'$(GeneratePackageOnBuild)' == 'true' Or '$(IsPacking)' == 'true'">
-      <BuildTimestamp>$([System.DateTime]::Now.ToString("yyyyMMddHHmmss"))</BuildTimestamp>
-      <Version>0.5.0-dev.local.$(BuildTimestamp)</Version>
-    </PropertyGroup>
-
-    <PropertyGroup>
-        <IsWindows Condition="$([MSBuild]::IsOSPlatform('Windows'))">true</IsWindows>
-        <IsOSX Condition="$([MSBuild]::IsOSPlatform('OSX'))">true</IsOSX>
-        <IsLinux Condition="$([MSBuild]::IsOSPlatform('Linux'))">true</IsLinux>
-
-        <!-- these aren't used in the code yet but most likely will be required -->
-        <DefineConstants Condition="('$(IsWindows)'=='true')">$(DefineConstants);IS_WINDOWS</DefineConstants>
-        <DefineConstants Condition="('$(IsOSX)'=='true')">$(DefineConstants);IS_OSX</DefineConstants>
-        <DefineConstants Condition="('$(IsLinux)'=='true')">$(DefineConstants);IS_LINUX</DefineConstants>
-    </PropertyGroup>
-
-    <Target Name="DumpValues" BeforeTargets="Build">
-        <Message Importance="Normal" Text="FoundryLocalCoreVersion: $(FoundryLocalCoreVersion)" />
-        <Message Importance="Normal" Text="RuntimeIdentifiers: $(RuntimeIdentifiers)" />
-        <Message Importance="Normal" Text="RuntimeIdentifier: $(RuntimeIdentifier)" />
-        <Message Importance="Normal" Text="DefineConstants: $(DefineConstants)" />
-        <Message Importance="Normal" Text="IsWindows: $(IsWindows)" />
-        <Message Importance="Normal" Text="IsOSX: $(IsOSX)" />
-        <Message Importance="Normal" Text="IsLinux: $(IsLinux)" />
-        <Message Importance="Normal" Text="UseWinML: $(UseWinML)" />
-    </Target>
-
-    <!-- This target runs automatically after package assets are resolved and prints the exact version of the Core package that was selected. -->
-    <Target Name="PrintResolvedVersions" AfterTargets="ResolvePackageAssets">
-        <Message Importance="High" Text="Resolved Dependencies:" />
-        <Message Importance="High" Text="  %(PackageDependencies.Identity) : %(PackageDependencies.Version)" 
-                 Condition="$([System.String]::Copy('%(PackageDependencies.Identity)').StartsWith('Microsoft.AI.Foundry.Local.Core'))" />
-    </Target>
-      
-    <ItemGroup>
-        <None Include="$(MSBuildThisFileDirectory)../README.md" Pack="true" PackagePath="" />
-        <None Include="$(MSBuildThisFileDirectory)../LICENSE.txt" Pack="true" PackagePath="" />
-    </ItemGroup>
-
-    <!-- override some values if we're doing a WinML build -->
-    <PropertyGroup Condition="'$(UseWinML)' == 'true'">
-      <AssemblyTitle>Microsoft AI Foundry Local for WinML</AssemblyTitle>
-      <Description>Microsoft Foundry Local SDK for WinML</Description>
-      <PackageId>Microsoft.AI.Foundry.Local.WinML</PackageId>
-      <AssemblyName>Microsoft.AI.Foundry.Local.WinML</AssemblyName>
-      <TargetFramework>net8.0-windows10.0.26100.0</TargetFramework> <!-- override -->
-      <RuntimeIdentifiers>win-x64;win-arm64</RuntimeIdentifiers>
-      <!-- TODO: Should we define this here to make it explicit? What minimnum is actually supported? -->
-      <TargetPlatformMinVersion>10.0.17763.0</TargetPlatformMinVersion>
-      
-      <!-- we don't pass any types across the WinRT ABI -->
-      <NoWarn>$(NoWarn);CsWinRT1028</NoWarn>
-    </PropertyGroup>
-    
-    <PropertyGroup>
-      <!-- default version unless overridden during dotnet build/restore command -->
-      <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' != ''">$(FoundryLocalCoreVersion)</FoundryLocalCoreWinMLVersion>
-      <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0-dev-20260227T230631-2a3af92</FoundryLocalCoreWinMLVersion>
-      <FoundryLocalCoreVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0-dev-20260227T222239-2a3af92</FoundryLocalCoreVersion>    </PropertyGroup>
-
-    <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
-      <TreatWarningsAsErrors>True</TreatWarningsAsErrors>
-    </PropertyGroup>
-    <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
-      <TreatWarningsAsErrors>True</TreatWarningsAsErrors>
-    </PropertyGroup>
-
-    <PropertyGroup>
-      <!-- NU1604: Transitive dependency Microsoft.ML.OnnxRuntime.Gpu.Linux lacks an inclusive lower bound.
-             This comes from the Microsoft.AI.Foundry.Local.Core package and cannot be fixed here. -->
-      <NoWarn>$(NoWarn);NU1604</NoWarn>
-    </PropertyGroup>
-    <ItemGroup>
-      <PackageReference Condition="'$(UseWinML)' == 'true'"
-                        Include="Microsoft.AI.Foundry.Local.Core.WinML" Version="$(FoundryLocalCoreWinMLVersion)" />
-      <PackageReference Condition="'$(UseWinML)' != 'true'"
-                        Include="Microsoft.AI.Foundry.Local.Core" Version="$(FoundryLocalCoreVersion)" />
-
-      <PackageReference Include="Betalgo.Ranul.OpenAI" Version="9.1.0" />
-      <PackageReference Include="Microsoft.Extensions.Logging" Version="9.0.9" />
-      <!-- specify PrivateAssets to exclude from nuget dependencies -->
-      <PackageReference Include="IDisposableAnalyzers" Version="4.0.8" PrivateAssets="all"/>
-    </ItemGroup>
-</Project>
\ No newline at end of file
diff --git a/sdk_v2/js/src/index.ts b/sdk_v2/js/src/index.ts
deleted file mode 100644
index 63f971fd..00000000
--- a/sdk_v2/js/src/index.ts
+++ /dev/null
@@ -1,17 +0,0 @@
-export { FoundryLocalManager } from './foundryLocalManager.js';
-export type { FoundryLocalConfig } from './configuration.js';
-export { Catalog } from './catalog.js';
-export { Model } from './model.js';
-export { ModelVariant } from './modelVariant.js';
-export type { IModel } from './imodel.js';
-export { ChatClient, ChatClientSettings } from './openai/chatClient.js';
-export { AudioClient, AudioClientSettings } from './openai/audioClient.js';
-export { LiveAudioTranscriptionClient, LiveAudioTranscriptionSettings } from './openai/liveAudioTranscriptionClient.js';
-export type { LiveAudioTranscriptionResult, CoreErrorResponse } from './openai/liveAudioTranscriptionTypes.js';
-export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
-export { ModelLoadManager } from './detail/modelLoadManager.js';
-/** @internal */
-export { CoreInterop } from './detail/coreInterop.js';
-/** @internal */
-export { Configuration } from './configuration.js';
-export * from './types.js';

From 57ce4608abe5d2adf1d7dc9f119d5ce523dfbfb8 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 12:31:18 -0700
Subject: [PATCH 20/22] Update Core version to 0.9.0 in JS install script

---
 sdk/js/script/install.cjs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs
index 3db771b8..a058c5f3 100644
--- a/sdk/js/script/install.cjs
+++ b/sdk/js/script/install.cjs
@@ -54,14 +54,14 @@ const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED;
 
 const FOUNDRY_LOCAL_CORE_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core',
-    version: '0.9.0.8-rc3',
+    version: '0.9.0',
     feed: ORT_NIGHTLY_FEED,
     nightly: useNightly
 }
 
 const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core.WinML',
-    version: '0.9.0.8-rc3',
+    version: '0.9.0',
     feed: ORT_NIGHTLY_FEED,
     nightly: useNightly
 }

From 18389cb489a1a90bf329f64c3267242033cac5c4 Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 14:39:56 -0700
Subject: [PATCH 21/22] Update Core version to 0.9.0 in JS install script

---
 sdk/js/script/install.cjs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs
index a058c5f3..a08dccb4 100644
--- a/sdk/js/script/install.cjs
+++ b/sdk/js/script/install.cjs
@@ -55,14 +55,14 @@ const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED;
 const FOUNDRY_LOCAL_CORE_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core',
     version: '0.9.0',
-    feed: ORT_NIGHTLY_FEED,
+    feed: CORE_FEED,
     nightly: useNightly
 }
 
 const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core.WinML',
     version: '0.9.0',
-    feed: ORT_NIGHTLY_FEED,
+    feed: CORE_FEED,
     nightly: useNightly
 }
 

From 10bbcb8a88cbae1b8960a08de5f3cd3051163fca Mon Sep 17 00:00:00 2001
From: ruiren_microsoft <ruiren@microsoft.com>
Date: Tue, 24 Mar 2026 19:54:45 -0700
Subject: [PATCH 22/22] update the npkg

---
 sdk/cs/src/Microsoft.AI.Foundry.Local.csproj | 4 ++--
 sdk/js/script/install.cjs                    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
index dc600e28..9f203a9b 100644
--- a/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
+++ b/sdk/cs/src/Microsoft.AI.Foundry.Local.csproj
@@ -99,8 +99,8 @@
     <PropertyGroup>
       <!-- default version unless overridden during dotnet build/restore command -->
       <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' != ''">$(FoundryLocalCoreVersion)</FoundryLocalCoreWinMLVersion>
-      <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0</FoundryLocalCoreWinMLVersion>
-      <FoundryLocalCoreVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0</FoundryLocalCoreVersion>    </PropertyGroup>
+      <FoundryLocalCoreWinMLVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0-dev</FoundryLocalCoreWinMLVersion>
+      <FoundryLocalCoreVersion Condition="'$(FoundryLocalCoreVersion)' == ''">0.9.0-dev</FoundryLocalCoreVersion>    </PropertyGroup>
 
     <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
       <TreatWarningsAsErrors>True</TreatWarningsAsErrors>
diff --git a/sdk/js/script/install.cjs b/sdk/js/script/install.cjs
index a058c5f3..600741ae 100644
--- a/sdk/js/script/install.cjs
+++ b/sdk/js/script/install.cjs
@@ -49,19 +49,19 @@ const ORT_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/O
 const ORT_NIGHTLY_FEED = 'https://pkgs.dev.azure.com/aiinfra/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json';
 
 // If nightly is requested, pull Core/GenAI from the ORT-Nightly feed where nightly builds are published.
-// Otherwise use the standard NuGet.org feed.
+// Otherwise use the ORT stable feed where release Core packages are published.
 const CORE_FEED = useNightly ? ORT_NIGHTLY_FEED : NUGET_FEED;
 
 const FOUNDRY_LOCAL_CORE_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core',
-    version: '0.9.0',
+    version: '0.9.0-dev',
     feed: ORT_NIGHTLY_FEED,
     nightly: useNightly
 }
 
 const FOUNDRY_LOCAL_CORE_WINML_ARTIFACT = {
     name: 'Microsoft.AI.Foundry.Local.Core.WinML',
-    version: '0.9.0',
+    version: '0.9.0-dev',
     feed: ORT_NIGHTLY_FEED,
     nightly: useNightly
 }