From 9423862dd5d59251e65f835fadeaaec847c7dbfc Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 22 May 2026 11:50:51 -0400 Subject: [PATCH 1/6] Fix flaky SDK E2E tests Avoid waiting for full assistant completions in tool-filter tests when the assertions only need captured CAPI requests. Replace fake echo MCP server configs with the shared Node MCP test server and wait for MCP connectivity before prompts in real MCP E2E tests. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/E2E/PreMcpToolCallHookE2ETests.cs | 8 +- dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs | 36 ++---- dotnet/test/E2E/SessionConfigE2ETests.cs | 17 ++- dotnet/test/E2E/SessionE2ETests.cs | 68 ++++++----- .../E2E/SessionMcpAndAgentConfigE2ETests.cs | 111 ++--------------- dotnet/test/Harness/E2ETestBase.cs | 94 +++++++++++++++ go/internal/e2e/mcp_and_agents_e2e_test.go | 79 ++---------- go/internal/e2e/mcp_server_helpers_test.go | 59 +++++++++ .../e2e/rpc_mcp_and_skills_e2e_test.go | 9 +- go/internal/e2e/session_config_e2e_test.go | 9 +- go/internal/e2e/session_e2e_test.go | 45 +------ go/internal/e2e/testharness/context.go | 25 ++++ .../github/copilot/sdk/E2ETestContext.java | 7 ++ .../github/copilot/sdk/McpAndAgentsTest.java | 60 ++++++---- nodejs/test/e2e/mcp_and_agents.e2e.test.ts | 112 +++++++++--------- .../test/e2e/rpc_mcp_and_skills.e2e.test.ts | 58 +++++++-- nodejs/test/e2e/session.e2e.test.ts | 77 +++++++----- nodejs/test/e2e/session_config.e2e.test.ts | 28 +++-- python/e2e/test_mcp_and_agents_e2e.py | 87 +++++++------- python/e2e/test_rpc_mcp_and_skills_e2e.py | 48 ++++++-- python/e2e/test_session_config_e2e.py | 16 +-- python/e2e/test_session_e2e.py | 60 +++++----- python/e2e/testharness/context.py | 17 +++ rust/tests/e2e/rpc_mcp_and_skills.rs | 51 ++++---- rust/tests/e2e/session.rs | 18 ++- .../accept_mcp_server_config_on_resume.yaml | 4 - ...erver_configuration_on_session_resume.yaml | 4 - 27 files changed, 678 insertions(+), 529 deletions(-) create mode 100644 go/internal/e2e/mcp_server_helpers_test.go diff --git a/dotnet/test/E2E/PreMcpToolCallHookE2ETests.cs b/dotnet/test/E2E/PreMcpToolCallHookE2ETests.cs index f825c4254..8e5240a9a 100644 --- a/dotnet/test/E2E/PreMcpToolCallHookE2ETests.cs +++ b/dotnet/test/E2E/PreMcpToolCallHookE2ETests.cs @@ -15,7 +15,7 @@ namespace GitHub.Copilot.Test.E2E; public class PreMcpToolCallHookE2ETests(E2ETestFixture fixture, ITestOutputHelper output) : E2ETestBase(fixture, "pre_mcp_tool_call_hook", output) { - private static string FindTestHarnessDir() + private static string FindMetaEchoTestHarnessDir() { var dir = new DirectoryInfo(AppContext.BaseDirectory); while (dir != null) @@ -42,7 +42,7 @@ private static string FindTestHarnessDir() [Fact] public async Task Should_Set_Meta_Via_PreMcpToolCall_Hook() { - var testHarnessDir = FindTestHarnessDir(); + var testHarnessDir = FindMetaEchoTestHarnessDir(); var hookInputs = new List(); var session = await CreateSessionAsync(new SessionConfig @@ -84,7 +84,7 @@ public async Task Should_Set_Meta_Via_PreMcpToolCall_Hook() [Fact] public async Task Should_Replace_Meta_Via_PreMcpToolCall_Hook() { - var testHarnessDir = FindTestHarnessDir(); + var testHarnessDir = FindMetaEchoTestHarnessDir(); var hookInputs = new List(); var session = await CreateSessionAsync(new SessionConfig @@ -125,7 +125,7 @@ public async Task Should_Replace_Meta_Via_PreMcpToolCall_Hook() [Fact] public async Task Should_Remove_Meta_Via_PreMcpToolCall_Hook() { - var testHarnessDir = FindTestHarnessDir(); + var testHarnessDir = FindMetaEchoTestHarnessDir(); var hookInputs = new List(); var session = await CreateSessionAsync(new SessionConfig diff --git a/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs b/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs index a3d08a1cb..0ba7d620e 100644 --- a/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs +++ b/dotnet/test/E2E/RpcMcpAndSkillsE2ETests.cs @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +using GitHub.Copilot.Rpc; using Xunit; using Xunit.Abstractions; using RpcSkill = GitHub.Copilot.Rpc.Skill; @@ -67,21 +68,14 @@ public async Task Should_List_Mcp_Servers_With_Configured_Server() const string serverName = "rpc-list-mcp-server"; var session = await CreateSessionAsync(new SessionConfig { - McpServers = new Dictionary - { - [serverName] = new McpStdioServerConfig - { - Command = "echo", - Args = ["rpc-list-mcp-server"], - Tools = ["*"], - }, - }, + McpServers = CreateTestMcpServers(serverName), }); + await WaitForMcpServerStatusAsync(session, serverName, McpServerStatus.Connected); var result = await session.Rpc.Mcp.ListAsync(); var server = Assert.Single(result.Servers, server => string.Equals(server.Name, serverName, StringComparison.Ordinal)); - Assert.False(string.IsNullOrWhiteSpace(server.Status.Value)); + Assert.Equal(McpServerStatus.Connected, server.Status); } [Fact] @@ -143,16 +137,9 @@ public async Task Should_Report_Error_When_Mcp_Oauth_Server_Is_Not_Configured() { var session = await CreateSessionAsync(new SessionConfig { - McpServers = new Dictionary - { - ["configured-stdio-server"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["configured-stdio-server"], - Tools = ["*"], - }, - }, + McpServers = CreateTestMcpServers("configured-stdio-server"), }); + await WaitForMcpServerStatusAsync(session, "configured-stdio-server", McpServerStatus.Connected); await AssertFailureAsync( () => session.Rpc.Mcp.Oauth.LoginAsync("missing-server"), @@ -165,16 +152,9 @@ public async Task Should_Report_Error_When_Mcp_Oauth_Server_Is_Not_Remote() const string serverName = "configured-stdio-server"; var session = await CreateSessionAsync(new SessionConfig { - McpServers = new Dictionary - { - [serverName] = new McpStdioServerConfig - { - Command = "echo", - Args = [serverName], - Tools = ["*"], - }, - }, + McpServers = CreateTestMcpServers(serverName), }); + await WaitForMcpServerStatusAsync(session, serverName, McpServerStatus.Connected); await AssertFailureAsync( () => session.Rpc.Mcp.Oauth.LoginAsync(serverName, forceReauth: true, clientName: "SDK E2E", callbackSuccessMessage: "Done"), diff --git a/dotnet/test/E2E/SessionConfigE2ETests.cs b/dotnet/test/E2E/SessionConfigE2ETests.cs index 64f5518fb..a763b6b72 100644 --- a/dotnet/test/E2E/SessionConfigE2ETests.cs +++ b/dotnet/test/E2E/SessionConfigE2ETests.cs @@ -435,12 +435,17 @@ public async Task Should_Apply_AvailableTools_On_Session_Resume() AvailableTools = ["view"], }); - await session2.SendAndWaitAsync(new MessageOptions { Prompt = "What is 1+1?" }); - - var exchange = Assert.Single(await Ctx.GetExchangesAsync()); - Assert.Equal(["view"], GetToolNames(exchange)); - - await session2.DisposeAsync(); + try + { + var exchange = Assert.Single(await SendAndWaitForExchangesAsync( + session2, + new MessageOptions { Prompt = "What is 1+1?" })); + Assert.Equal(["view"], GetToolNames(exchange)); + } + finally + { + await session2.DisposeAsync(); + } } [Fact] diff --git a/dotnet/test/E2E/SessionE2ETests.cs b/dotnet/test/E2E/SessionE2ETests.cs index d2c611e07..cfa361793 100644 --- a/dotnet/test/E2E/SessionE2ETests.cs +++ b/dotnet/test/E2E/SessionE2ETests.cs @@ -130,16 +130,20 @@ public async Task Should_Create_A_Session_With_AvailableTools() AvailableTools = ["view", "edit"] }); - await session.SendAsync(new MessageOptions { Prompt = "What is 1+1?" }); - await TestHelper.GetFinalAssistantMessageAsync(session); - - var traffic = await Ctx.GetExchangesAsync(); - Assert.NotEmpty(traffic); - - var toolNames = GetToolNames(traffic[0]); - Assert.Equal(2, toolNames.Count); - Assert.Contains("view", toolNames); - Assert.Contains("edit", toolNames); + try + { + var traffic = await SendAndWaitForExchangesAsync( + session, + new MessageOptions { Prompt = "What is 1+1?" }); + var toolNames = GetToolNames(traffic[0]); + Assert.Equal(2, toolNames.Count); + Assert.Contains("view", toolNames); + Assert.Contains("edit", toolNames); + } + finally + { + await session.DisposeAsync(); + } } [Fact] @@ -150,16 +154,20 @@ public async Task Should_Create_A_Session_With_ExcludedTools() ExcludedTools = ["view"] }); - await session.SendAsync(new MessageOptions { Prompt = "What is 1+1?" }); - await TestHelper.GetFinalAssistantMessageAsync(session); - - var traffic = await Ctx.GetExchangesAsync(); - Assert.NotEmpty(traffic); - - var toolNames = GetToolNames(traffic[0]); - Assert.DoesNotContain("view", toolNames); - Assert.Contains("edit", toolNames); - Assert.Contains("grep", toolNames); + try + { + var traffic = await SendAndWaitForExchangesAsync( + session, + new MessageOptions { Prompt = "What is 1+1?" }); + var toolNames = GetToolNames(traffic[0]); + Assert.DoesNotContain("view", toolNames); + Assert.Contains("edit", toolNames); + Assert.Contains("grep", toolNames); + } + finally + { + await session.DisposeAsync(); + } } [Fact] @@ -180,14 +188,18 @@ public async Task Should_Create_A_Session_With_DefaultAgent_ExcludedTools() }, }); - await session.SendAsync(new MessageOptions { Prompt = "What is 1+1?" }); - await TestHelper.GetFinalAssistantMessageAsync(session); - - var traffic = await Ctx.GetExchangesAsync(); - Assert.NotEmpty(traffic); - - var toolNames = GetToolNames(traffic[0]); - Assert.DoesNotContain("secret_tool", toolNames); + try + { + var traffic = await SendAndWaitForExchangesAsync( + session, + new MessageOptions { Prompt = "What is 1+1?" }); + var toolNames = GetToolNames(traffic[0]); + Assert.DoesNotContain("secret_tool", toolNames); + } + finally + { + await session.DisposeAsync(); + } } [Fact] diff --git a/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs b/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs index 9567c98be..18a8835a6 100644 --- a/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs +++ b/dotnet/test/E2E/SessionMcpAndAgentConfigE2ETests.cs @@ -14,22 +14,13 @@ public class SessionMcpAndAgentConfigE2ETests(E2ETestFixture fixture, ITestOutpu [Fact] public async Task Should_Accept_MCP_Server_Configuration_On_Session_Create() { - var mcpServers = new Dictionary - { - ["test-server"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["hello"], - Tools = ["*"] - } - }; - var session = await CreateSessionAsync(new SessionConfig { - McpServers = mcpServers + McpServers = CreateTestMcpServers("test-server") }); Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); + await WaitForMcpServerStatusAsync(session, "test-server", McpServerStatus.Connected); // Simple interaction to verify session works await session.SendAsync(new MessageOptions { Prompt = "What is 2+2?" }); @@ -48,7 +39,7 @@ public async Task Should_Accept_MCP_Server_Configuration_Without_Args() { ["test-server"] = new McpStdioServerConfig { - Command = "true", + Command = "dotnet", Tools = ["*"] } }; @@ -60,12 +51,6 @@ public async Task Should_Accept_MCP_Server_Configuration_Without_Args() Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); - await session.SendAsync(new MessageOptions { Prompt = "What is 2+2?" }); - - var message = await TestHelper.GetFinalAssistantMessageAsync(session); - Assert.NotNull(message); - Assert.Contains("4", message!.Data.Content); - await session.DisposeAsync(); } @@ -79,26 +64,13 @@ public async Task Should_Accept_MCP_Server_Configuration_On_Session_Resume() await session1.DisposeAsync(); // Resume with MCP servers - var mcpServers = new Dictionary - { - ["test-server"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["hello"], - Tools = ["*"] - } - }; - var session2 = await ResumeSessionAsync(sessionId, new ResumeSessionConfig { - McpServers = mcpServers + McpServers = CreateTestMcpServers("test-server") }); Assert.Equal(sessionId, session2.SessionId); - - var message = await session2.SendAndWaitAsync(new MessageOptions { Prompt = "What is 3+3?" }); - Assert.NotNull(message); - Assert.Contains("6", message!.Data.Content); + await WaitForMcpServerStatusAsync(session2, "test-server", McpServerStatus.Connected); await session2.DisposeAsync(); } @@ -106,28 +78,14 @@ public async Task Should_Accept_MCP_Server_Configuration_On_Session_Resume() [Fact] public async Task Should_Handle_Multiple_MCP_Servers() { - var mcpServers = new Dictionary - { - ["server1"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["server1"], - Tools = ["*"] - }, - ["server2"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["server2"], - Tools = ["*"] - } - }; - var session = await CreateSessionAsync(new SessionConfig { - McpServers = mcpServers + McpServers = CreateTestMcpServers("server1", "server2") }); Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); + await WaitForMcpServerStatusAsync(session, "server1", McpServerStatus.Connected); + await WaitForMcpServerStatusAsync(session, "server2", McpServerStatus.Connected); await session.DisposeAsync(); } @@ -234,15 +192,7 @@ public async Task Should_Handle_Custom_Agent_With_MCP_Servers() DisplayName = "MCP Agent", Description = "An agent with its own MCP servers", Prompt = "You are an agent with MCP servers.", - McpServers = new Dictionary - { - ["agent-server"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["agent-mcp"], - Tools = ["*"] - } - } + McpServers = CreateTestMcpServers("agent-server") } }; @@ -401,16 +351,6 @@ await File.WriteAllTextAsync( [Fact] public async Task Should_Accept_Both_MCP_Servers_And_Custom_Agents() { - var mcpServers = new Dictionary - { - ["shared-server"] = new McpStdioServerConfig - { - Command = "echo", - Args = ["shared"], - Tools = ["*"] - } - }; - var customAgents = new List { new CustomAgentConfig @@ -424,42 +364,13 @@ public async Task Should_Accept_Both_MCP_Servers_And_Custom_Agents() var session = await CreateSessionAsync(new SessionConfig { - McpServers = mcpServers, + McpServers = CreateTestMcpServers("shared-server"), CustomAgents = customAgents }); Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); + await WaitForMcpServerStatusAsync(session, "shared-server", McpServerStatus.Connected); await session.DisposeAsync(); } - private static string FindTestHarnessDir() - { - var dir = new DirectoryInfo(AppContext.BaseDirectory); - while (dir != null) - { - var candidate = Path.Combine(dir.FullName, "test", "harness", "test-mcp-server.mjs"); - if (File.Exists(candidate)) - return Path.GetDirectoryName(candidate)!; - dir = dir.Parent; - } - throw new InvalidOperationException("Could not find test/harness/test-mcp-server.mjs"); - } - - private static async Task WaitForMcpServerStatusAsync( - CopilotSession session, - string serverName, - McpServerStatus expectedStatus) - { - await TestHelper.WaitForConditionAsync( - async () => - { - var result = await session.Rpc.Mcp.ListAsync(); - return result.Servers.Any(server => - string.Equals(server.Name, serverName, StringComparison.Ordinal) - && server.Status == expectedStatus); - }, - timeout: TimeSpan.FromSeconds(60), - pollInterval: TimeSpan.FromMilliseconds(200), - timeoutMessage: $"{serverName} reaching {expectedStatus}"); - } } diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index 592253bda..2bcd3d81e 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -2,6 +2,7 @@ * Copyright (c) Microsoft Corporation. All rights reserved. *--------------------------------------------------------------------------------------------*/ +using GitHub.Copilot.Rpc; using GitHub.Copilot.Test.Harness; using Microsoft.Extensions.Logging; using System.Data; @@ -107,4 +108,97 @@ protected static List GetToolNames(ParsedHttpExchange exchange) { return exchange.Request.Tools?.Select(t => t.Function.Name).ToList() ?? []; } + + protected async Task> WaitForExchangesAsync(int minimumCount = 1) + { + List exchanges = []; + await TestHelper.WaitForConditionAsync( + async () => + { + exchanges = await Ctx.GetExchangesAsync(); + return exchanges.Count >= minimumCount; + }, + timeoutMessage: $"Timed out waiting for {minimumCount} chat completion request(s)"); + return exchanges; + } + + protected async Task> SendAndWaitForExchangesAsync( + CopilotSession session, + MessageOptions options, + int minimumCount = 1) + { + using var cts = new CancellationTokenSource(); + var sendTask = session.SendAndWaitAsync(options, TimeSpan.FromMinutes(3), cts.Token); + var exchangesTask = WaitForExchangesAsync(minimumCount); + + try + { + var completedTask = await Task.WhenAny(exchangesTask, sendTask); + if (completedTask == sendTask) + { + await sendTask; + } + + return await exchangesTask; + } + finally + { + if (!sendTask.IsCompleted) + { + cts.Cancel(); + try + { + await sendTask; + } + catch (OperationCanceledException) when (cts.IsCancellationRequested) + { + } + } + } + } + + protected static Dictionary CreateTestMcpServers(params string[] serverNames) + { + var testHarnessDir = FindTestHarnessDir(); + return serverNames.ToDictionary( + name => name, + _ => (McpServerConfig)new McpStdioServerConfig + { + Command = "node", + Args = [Path.Combine(testHarnessDir, "test-mcp-server.mjs")], + WorkingDirectory = testHarnessDir, + Tools = ["*"] + }); + } + + protected static string FindTestHarnessDir() + { + var dir = new DirectoryInfo(AppContext.BaseDirectory); + while (dir != null) + { + var candidate = Path.Combine(dir.FullName, "test", "harness", "test-mcp-server.mjs"); + if (File.Exists(candidate)) + return Path.GetDirectoryName(candidate)!; + dir = dir.Parent; + } + throw new InvalidOperationException("Could not find test/harness/test-mcp-server.mjs"); + } + + protected static async Task WaitForMcpServerStatusAsync( + CopilotSession session, + string serverName, + McpServerStatus expectedStatus) + { + await TestHelper.WaitForConditionAsync( + async () => + { + var result = await session.Rpc.Mcp.ListAsync(); + return result.Servers.Any(server => + string.Equals(server.Name, serverName, StringComparison.Ordinal) + && server.Status == expectedStatus); + }, + timeout: TimeSpan.FromSeconds(60), + pollInterval: TimeSpan.FromMilliseconds(200), + timeoutMessage: $"{serverName} reaching {expectedStatus}"); + } } diff --git a/go/internal/e2e/mcp_and_agents_e2e_test.go b/go/internal/e2e/mcp_and_agents_e2e_test.go index 87b25a533..4c7f29bc8 100644 --- a/go/internal/e2e/mcp_and_agents_e2e_test.go +++ b/go/internal/e2e/mcp_and_agents_e2e_test.go @@ -7,6 +7,7 @@ import ( copilot "github.com/github/copilot-sdk/go" "github.com/github/copilot-sdk/go/internal/e2e/testharness" + "github.com/github/copilot-sdk/go/rpc" ) func TestMCPServersE2E(t *testing.T) { @@ -17,13 +18,7 @@ func TestMCPServersE2E(t *testing.T) { t.Run("accept MCP server config on create", func(t *testing.T) { ctx.ConfigureForTest(t) - mcpServers := map[string]copilot.MCPServerConfig{ - "test-server": copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"hello"}, - Tools: &[]string{"*"}, - }, - } + mcpServers := testMCPServers(t, "test-server") session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ OnPermissionRequest: copilot.PermissionHandler.ApproveAll, @@ -36,6 +31,7 @@ func TestMCPServersE2E(t *testing.T) { if session.SessionID == "" { t.Error("Expected non-empty session ID") } + waitForMCPServerStatus(t, session, "test-server", rpc.McpServerStatusConnected) // Simple interaction to verify session works _, err = session.Send(t.Context(), copilot.MessageOptions{ @@ -62,7 +58,7 @@ func TestMCPServersE2E(t *testing.T) { mcpServers := map[string]copilot.MCPServerConfig{ "test-server": copilot.MCPStdioServerConfig{ - Command: "echo", + Command: "git", Tools: &[]string{"*"}, }, } @@ -79,22 +75,6 @@ func TestMCPServersE2E(t *testing.T) { t.Error("Expected non-empty session ID") } - _, err = session.Send(t.Context(), copilot.MessageOptions{ - Prompt: "What is 2+2?", - }) - if err != nil { - t.Fatalf("Failed to send message: %v", err) - } - - message, err := testharness.GetFinalAssistantMessage(t.Context(), session) - if err != nil { - t.Fatalf("Failed to get final message: %v", err) - } - - if md, ok := message.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(md.Content, "4") { - t.Errorf("Expected message to contain '4', got: %v", message.Data) - } - session.Disconnect() }) @@ -114,13 +94,7 @@ func TestMCPServersE2E(t *testing.T) { } // Resume with MCP servers - mcpServers := map[string]copilot.MCPServerConfig{ - "test-server": copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"hello"}, - Tools: &[]string{"*"}, - }, - } + mcpServers := testMCPServers(t, "test-server") session2, err := client.ResumeSessionWithOptions(t.Context(), sessionID, &copilot.ResumeSessionConfig{ OnPermissionRequest: copilot.PermissionHandler.ApproveAll, @@ -133,15 +107,7 @@ func TestMCPServersE2E(t *testing.T) { if session2.SessionID != sessionID { t.Errorf("Expected session ID %s, got %s", sessionID, session2.SessionID) } - - message, err := session2.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "What is 3+3?"}) - if err != nil { - t.Fatalf("Failed to send message: %v", err) - } - - if md, ok := message.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(md.Content, "6") { - t.Errorf("Expected message to contain '6', got: %v", message.Data) - } + waitForMCPServerStatus(t, session2, "test-server", rpc.McpServerStatusConnected) session2.Disconnect() }) @@ -176,6 +142,7 @@ func TestMCPServersE2E(t *testing.T) { if session.SessionID == "" { t.Error("Expected non-empty session ID") } + waitForMCPServerStatus(t, session, "env-echo", rpc.McpServerStatusConnected) message, err := session.SendAndWait(t.Context(), copilot.MessageOptions{ Prompt: "Use the env-echo/get_env tool to read the TEST_SECRET environment variable. Reply with just the value, nothing else.", @@ -194,18 +161,7 @@ func TestMCPServersE2E(t *testing.T) { t.Run("handle multiple MCP servers", func(t *testing.T) { ctx.ConfigureForTest(t) - mcpServers := map[string]copilot.MCPServerConfig{ - "server1": copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"server1"}, - Tools: &[]string{"*"}, - }, - "server2": copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"server2"}, - Tools: &[]string{"*"}, - }, - } + mcpServers := testMCPServers(t, "server1", "server2") session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ OnPermissionRequest: copilot.PermissionHandler.ApproveAll, @@ -218,6 +174,8 @@ func TestMCPServersE2E(t *testing.T) { if session.SessionID == "" { t.Error("Expected non-empty session ID") } + waitForMCPServerStatus(t, session, "server1", rpc.McpServerStatusConnected) + waitForMCPServerStatus(t, session, "server2", rpc.McpServerStatusConnected) session.Disconnect() }) @@ -362,13 +320,7 @@ func TestCustomAgentsE2E(t *testing.T) { DisplayName: "MCP Agent", Description: "An agent with its own MCP servers", Prompt: "You are an agent with MCP servers.", - MCPServers: map[string]copilot.MCPServerConfig{ - "agent-server": copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"agent-mcp"}, - Tools: &[]string{"*"}, - }, - }, + MCPServers: testMCPServers(t, "agent-server"), }, } @@ -433,13 +385,7 @@ func TestCombinedConfigurationE2E(t *testing.T) { t.Run("accept MCP servers and custom agents", func(t *testing.T) { ctx.ConfigureForTest(t) - mcpServers := map[string]copilot.MCPServerConfig{ - "shared-server": copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"shared"}, - Tools: &[]string{"*"}, - }, - } + mcpServers := testMCPServers(t, "shared-server") customAgents := []copilot.CustomAgentConfig{ { @@ -462,6 +408,7 @@ func TestCombinedConfigurationE2E(t *testing.T) { if session.SessionID == "" { t.Error("Expected non-empty session ID") } + waitForMCPServerStatus(t, session, "shared-server", rpc.McpServerStatusConnected) session.Disconnect() }) diff --git a/go/internal/e2e/mcp_server_helpers_test.go b/go/internal/e2e/mcp_server_helpers_test.go new file mode 100644 index 000000000..f6cf2ad6b --- /dev/null +++ b/go/internal/e2e/mcp_server_helpers_test.go @@ -0,0 +1,59 @@ +package e2e + +import ( + "path/filepath" + "testing" + "time" + + copilot "github.com/github/copilot-sdk/go" + "github.com/github/copilot-sdk/go/rpc" +) + +func testMCPServers(t *testing.T, serverNames ...string) map[string]copilot.MCPServerConfig { + t.Helper() + + mcpServerPath, err := filepath.Abs("../../../test/harness/test-mcp-server.mjs") + if err != nil { + t.Fatalf("Failed to resolve test-mcp-server path: %v", err) + } + + mcpServerDir := filepath.Dir(mcpServerPath) + mcpServers := make(map[string]copilot.MCPServerConfig, len(serverNames)) + for _, serverName := range serverNames { + mcpServers[serverName] = copilot.MCPStdioServerConfig{ + Command: "node", + Args: []string{mcpServerPath}, + Tools: &[]string{"*"}, + WorkingDirectory: mcpServerDir, + } + } + return mcpServers +} + +func waitForMCPServerStatus(t *testing.T, session *copilot.Session, serverName string, expectedStatus rpc.McpServerStatus) { + t.Helper() + + var lastStatus string + deadline := time.Now().Add(60 * time.Second) + for time.Now().Before(deadline) { + result, err := session.RPC.Mcp.List(t.Context()) + if err != nil { + lastStatus = err.Error() + } else { + lastStatus = "" + for _, server := range result.Servers { + if server.Name != serverName { + continue + } + if server.Status == expectedStatus { + return + } + lastStatus = string(server.Status) + break + } + } + time.Sleep(200 * time.Millisecond) + } + + t.Fatalf("%s did not reach %s; last status was %s", serverName, expectedStatus, lastStatus) +} diff --git a/go/internal/e2e/rpc_mcp_and_skills_e2e_test.go b/go/internal/e2e/rpc_mcp_and_skills_e2e_test.go index c636201da..6063dd162 100644 --- a/go/internal/e2e/rpc_mcp_and_skills_e2e_test.go +++ b/go/internal/e2e/rpc_mcp_and_skills_e2e_test.go @@ -108,18 +108,13 @@ func TestRpcMcpAndSkillsE2E(t *testing.T) { const serverName = "rpc-list-mcp-server" session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{ OnPermissionRequest: copilot.PermissionHandler.ApproveAll, - MCPServers: map[string]copilot.MCPServerConfig{ - serverName: copilot.MCPStdioServerConfig{ - Command: "echo", - Args: []string{"rpc-list-mcp-server"}, - Tools: &[]string{"*"}, - }, - }, + MCPServers: testMCPServers(t, serverName), }) if err != nil { t.Fatalf("CreateSession failed: %v", err) } + waitForMCPServerStatus(t, session, serverName, rpc.McpServerStatusConnected) result, err := session.RPC.Mcp.List(t.Context()) if err != nil { t.Fatalf("Mcp.List failed: %v", err) diff --git a/go/internal/e2e/session_config_e2e_test.go b/go/internal/e2e/session_config_e2e_test.go index d932ae31b..e5daf931b 100644 --- a/go/internal/e2e/session_config_e2e_test.go +++ b/go/internal/e2e/session_config_e2e_test.go @@ -635,15 +635,12 @@ func TestSessionConfigExtrasE2E(t *testing.T) { } t.Cleanup(func() { _ = session2.Disconnect() }) - _, err = session2.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "What is 1+1?"}) + _, err = session2.Send(t.Context(), copilot.MessageOptions{Prompt: "What is 1+1?"}) if err != nil { - t.Fatalf("SendAndWait failed: %v", err) + t.Fatalf("Send failed: %v", err) } - exchanges, err := ctx.GetExchanges() - if err != nil { - t.Fatalf("GetExchanges failed: %v", err) - } + exchanges := ctx.WaitForExchanges(t, 1) if len(exchanges) != 1 { t.Fatalf("Expected exactly 1 exchange, got %d", len(exchanges)) } diff --git a/go/internal/e2e/session_e2e_test.go b/go/internal/e2e/session_e2e_test.go index f45a74616..fb5ecbd6e 100644 --- a/go/internal/e2e/session_e2e_test.go +++ b/go/internal/e2e/session_e2e_test.go @@ -245,26 +245,15 @@ func TestSessionE2E(t *testing.T) { if err != nil { t.Fatalf("Failed to create session: %v", err) } + t.Cleanup(func() { _ = session.Disconnect() }) _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What is 1+1?"}) if err != nil { t.Fatalf("Failed to send message: %v", err) } - _, err = testharness.GetFinalAssistantMessage(t.Context(), session) - if err != nil { - t.Fatalf("Failed to get assistant message: %v", err) - } - // Validate that only the specified tools are present - traffic, err := ctx.GetExchanges() - if err != nil { - t.Fatalf("Failed to get exchanges: %v", err) - } - if len(traffic) == 0 { - t.Fatal("Expected at least one exchange") - } - + traffic := ctx.WaitForExchanges(t, 1) toolNames := getToolNames(traffic[0]) if len(toolNames) != 2 { t.Errorf("Expected exactly 2 tools, got %d: %v", len(toolNames), toolNames) @@ -284,26 +273,15 @@ func TestSessionE2E(t *testing.T) { if err != nil { t.Fatalf("Failed to create session: %v", err) } + t.Cleanup(func() { _ = session.Disconnect() }) _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What is 1+1?"}) if err != nil { t.Fatalf("Failed to send message: %v", err) } - _, err = testharness.GetFinalAssistantMessage(t.Context(), session) - if err != nil { - t.Fatalf("Failed to get assistant message: %v", err) - } - // Validate that excluded tool is not present but others are - traffic, err := ctx.GetExchanges() - if err != nil { - t.Fatalf("Failed to get exchanges: %v", err) - } - if len(traffic) == 0 { - t.Fatal("Expected at least one exchange") - } - + traffic := ctx.WaitForExchanges(t, 1) toolNames := getToolNames(traffic[0]) if contains(toolNames, "view") { t.Errorf("Expected 'view' to be excluded, got %v", toolNames) @@ -338,26 +316,15 @@ func TestSessionE2E(t *testing.T) { if err != nil { t.Fatalf("Failed to create session: %v", err) } + t.Cleanup(func() { _ = session.Disconnect() }) _, err = session.Send(t.Context(), copilot.MessageOptions{Prompt: "What is 1+1?"}) if err != nil { t.Fatalf("Failed to send message: %v", err) } - _, err = testharness.GetFinalAssistantMessage(t.Context(), session) - if err != nil { - t.Fatalf("Failed to get assistant message: %v", err) - } - // The real assertion: verify the runtime excluded the tool from the CAPI request - traffic, err := ctx.GetExchanges() - if err != nil { - t.Fatalf("Failed to get exchanges: %v", err) - } - if len(traffic) == 0 { - t.Fatal("Expected at least one exchange") - } - + traffic := ctx.WaitForExchanges(t, 1) toolNames := getToolNames(traffic[0]) if contains(toolNames, "secret_tool") { t.Errorf("Expected 'secret_tool' to be excluded from default agent, got %v", toolNames) diff --git a/go/internal/e2e/testharness/context.go b/go/internal/e2e/testharness/context.go index c1331fbe9..cae966667 100644 --- a/go/internal/e2e/testharness/context.go +++ b/go/internal/e2e/testharness/context.go @@ -8,6 +8,7 @@ import ( "strings" "sync" "testing" + "time" copilot "github.com/github/copilot-sdk/go" ) @@ -169,6 +170,30 @@ func (c *TestContext) GetExchanges() ([]ParsedHttpExchange, error) { return c.proxy.GetExchanges() } +// WaitForExchanges waits until the proxy has captured at least the requested exchanges. +func (c *TestContext) WaitForExchanges(t *testing.T, minimumCount int) []ParsedHttpExchange { + t.Helper() + + deadline := time.Now().Add(120 * time.Second) + var lastErr error + var exchanges []ParsedHttpExchange + for time.Now().Before(deadline) { + var err error + exchanges, err = c.GetExchanges() + if err == nil && len(exchanges) >= minimumCount { + return exchanges + } + lastErr = err + time.Sleep(100 * time.Millisecond) + } + + if lastErr != nil { + t.Fatalf("Timed out waiting for %d chat completion request(s): %v", minimumCount, lastErr) + } + t.Fatalf("Timed out waiting for %d chat completion request(s); captured %d", minimumCount, len(exchanges)) + return nil +} + // SetCopilotUserByToken registers a per-token user configuration on the proxy. func (c *TestContext) SetCopilotUserByToken(token string, response map[string]interface{}) error { return c.proxy.SetCopilotUserByToken(token, response) diff --git a/java/src/test/java/com/github/copilot/sdk/E2ETestContext.java b/java/src/test/java/com/github/copilot/sdk/E2ETestContext.java index 9680148ff..f75eaa689 100644 --- a/java/src/test/java/com/github/copilot/sdk/E2ETestContext.java +++ b/java/src/test/java/com/github/copilot/sdk/E2ETestContext.java @@ -121,6 +121,13 @@ public Path getWorkDir() { return workDir; } + /** + * Gets the repository root for locating shared test assets. + */ + public Path getRepoRoot() { + return repoRoot; + } + /** * Gets the proxy URL. */ diff --git a/java/src/test/java/com/github/copilot/sdk/McpAndAgentsTest.java b/java/src/test/java/com/github/copilot/sdk/McpAndAgentsTest.java index a7d81646b..474b97782 100644 --- a/java/src/test/java/com/github/copilot/sdk/McpAndAgentsTest.java +++ b/java/src/test/java/com/github/copilot/sdk/McpAndAgentsTest.java @@ -6,6 +6,7 @@ import static org.junit.jupiter.api.Assertions.*; +import java.nio.file.Path; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -17,6 +18,7 @@ import org.junit.jupiter.api.Test; import com.github.copilot.sdk.generated.AssistantMessageEvent; +import com.github.copilot.sdk.generated.rpc.McpServerStatus; import com.github.copilot.sdk.json.CustomAgentConfig; import com.github.copilot.sdk.json.DefaultAgentConfig; import com.github.copilot.sdk.json.McpServerConfig; @@ -51,9 +53,33 @@ static void teardown() throws Exception { } } - // Helper method to create an MCP stdio server configuration - private McpStdioServerConfig createLocalMcpServer(String command, List args) { - return new McpStdioServerConfig().setCommand(command).setArgs(args).setTools(List.of("*")); + private Map createTestMcpServers(String... serverNames) { + Map servers = new HashMap<>(); + for (String serverName : serverNames) { + servers.put(serverName, createTestMcpServer()); + } + return servers; + } + + private McpStdioServerConfig createTestMcpServer() { + Path harnessDir = ctx.getRepoRoot().resolve("test").resolve("harness"); + return new McpStdioServerConfig().setCommand("node") + .setArgs(List.of(harnessDir.resolve("test-mcp-server.mjs").toString())) + .setWorkingDirectory(harnessDir.toString()).setTools(List.of("*")); + } + + private void waitForMcpServerStatus(CopilotSession session, String serverName, McpServerStatus expectedStatus) + throws Exception { + long deadline = System.nanoTime() + TimeUnit.SECONDS.toNanos(60); + while (System.nanoTime() < deadline) { + var result = session.getRpc().mcp.list().get(5, TimeUnit.SECONDS); + if (result.servers() != null && result.servers().stream() + .anyMatch(server -> serverName.equals(server.name()) && expectedStatus == server.status())) { + return; + } + Thread.sleep(200); + } + fail(serverName + " did not reach " + expectedStatus); } // ============ MCP Server Tests ============ @@ -68,8 +94,7 @@ private McpStdioServerConfig createLocalMcpServer(String command, List a void testShouldAcceptMcpServerConfigurationOnSessionCreate() throws Exception { ctx.configureForTest("mcp_and_agents", "should_accept_mcp_server_configuration_on_session_create"); - var mcpServers = new HashMap(); - mcpServers.put("test-server", createLocalMcpServer("echo", List.of("hello"))); + var mcpServers = createTestMcpServers("test-server"); try (CopilotClient client = ctx.createClient()) { CopilotSession session = client.createSession( @@ -77,6 +102,7 @@ void testShouldAcceptMcpServerConfigurationOnSessionCreate() throws Exception { .get(); assertNotNull(session.getSessionId()); + waitForMcpServerStatus(session, "test-server", McpServerStatus.CONNECTED); // Simple interaction to verify session works AssistantMessageEvent response = session.sendAndWait(new MessageOptions().setPrompt("What is 2+2?")).get(60, @@ -108,20 +134,13 @@ void testShouldAcceptMcpServerConfigurationOnSessionResume() throws Exception { session1.sendAndWait(new MessageOptions().setPrompt("What is 1+1?")).get(60, TimeUnit.SECONDS); // Resume with MCP servers - var mcpServers = new HashMap(); - mcpServers.put("test-server", createLocalMcpServer("echo", List.of("hello"))); + var mcpServers = createTestMcpServers("test-server"); CopilotSession session2 = client.resumeSession(sessionId, new ResumeSessionConfig() .setMcpServers(mcpServers).setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get(); assertEquals(sessionId, session2.getSessionId()); - - AssistantMessageEvent response = session2.sendAndWait(new MessageOptions().setPrompt("What is 3+3?")) - .get(60, TimeUnit.SECONDS); - - assertNotNull(response); - assertTrue(response.getData().content().contains("6"), - "Response should contain 6: " + response.getData().content()); + waitForMcpServerStatus(session2, "test-server", McpServerStatus.CONNECTED); session2.close(); } @@ -139,9 +158,7 @@ void testShouldHandleMultipleMcpServers() throws Exception { // count ctx.configureForTest("mcp_and_agents", "should_accept_mcp_server_configuration_on_session_create"); - var mcpServers = new HashMap(); - mcpServers.put("server1", createLocalMcpServer("echo", List.of("server1"))); - mcpServers.put("server2", createLocalMcpServer("echo", List.of("server2"))); + var mcpServers = createTestMcpServers("server1", "server2"); try (CopilotClient client = ctx.createClient()) { CopilotSession session = client.createSession( @@ -149,6 +166,8 @@ void testShouldHandleMultipleMcpServers() throws Exception { .get(); assertNotNull(session.getSessionId()); + waitForMcpServerStatus(session, "server1", McpServerStatus.CONNECTED); + waitForMcpServerStatus(session, "server2", McpServerStatus.CONNECTED); session.close(); } } @@ -261,8 +280,7 @@ void testShouldAcceptCustomAgentWithMcpServers() throws Exception { // Use combined snapshot since this uses both MCP servers and custom agents ctx.configureForTest("mcp_and_agents", "should_accept_both_mcp_servers_and_custom_agents"); - var agentMcpServers = new HashMap(); - agentMcpServers.put("agent-server", createLocalMcpServer("echo", List.of("agent-mcp"))); + var agentMcpServers = createTestMcpServers("agent-server"); List customAgents = List.of(new CustomAgentConfig().setName("mcp-agent") .setDisplayName("MCP Agent").setDescription("An agent with its own MCP servers") @@ -315,8 +333,7 @@ void testShouldAcceptMultipleCustomAgents() throws Exception { void testShouldAcceptBothMcpServersAndCustomAgents() throws Exception { ctx.configureForTest("mcp_and_agents", "should_accept_both_mcp_servers_and_custom_agents"); - var mcpServers = new HashMap(); - mcpServers.put("shared-server", createLocalMcpServer("echo", List.of("shared"))); + var mcpServers = createTestMcpServers("shared-server"); List customAgents = List.of(new CustomAgentConfig().setName("combined-agent") .setDisplayName("Combined Agent").setDescription("An agent using shared MCP servers") @@ -327,6 +344,7 @@ void testShouldAcceptBothMcpServersAndCustomAgents() throws Exception { .setCustomAgents(customAgents).setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get(); assertNotNull(session.getSessionId()); + waitForMcpServerStatus(session, "shared-server", McpServerStatus.CONNECTED); AssistantMessageEvent response = session.sendAndWait(new MessageOptions().setPrompt("What is 7+7?")).get(60, TimeUnit.SECONDS); diff --git a/nodejs/test/e2e/mcp_and_agents.e2e.test.ts b/nodejs/test/e2e/mcp_and_agents.e2e.test.ts index 93a8df7a4..a593ff988 100644 --- a/nodejs/test/e2e/mcp_and_agents.e2e.test.ts +++ b/nodejs/test/e2e/mcp_and_agents.e2e.test.ts @@ -6,27 +6,62 @@ import { dirname, resolve } from "path"; import { fileURLToPath } from "url"; import { describe, expect, it } from "vitest"; import { z } from "zod"; -import type { CustomAgentConfig, MCPStdioServerConfig, MCPServerConfig } from "../../src/index.js"; +import type { + CopilotSession, + CustomAgentConfig, + MCPStdioServerConfig, + MCPServerConfig, +} from "../../src/index.js"; import { approveAll, defineTool } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext.js"; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const TEST_MCP_SERVER = resolve(__dirname, "../../../test/harness/test-mcp-server.mjs"); +const TEST_HARNESS_DIR = dirname(TEST_MCP_SERVER); + +function createTestMcpServers(...serverNames: string[]): Record { + return Object.fromEntries( + serverNames.map((name) => [ + name, + { + type: "local", + command: "node", + args: [TEST_MCP_SERVER], + workingDirectory: TEST_HARNESS_DIR, + tools: ["*"], + } as MCPStdioServerConfig, + ]) + ); +} + +async function waitForMcpServerStatus( + session: CopilotSession, + serverName: string, + expectedStatus = "connected" +): Promise { + const deadline = Date.now() + 60_000; + let lastStatus = ""; + + while (Date.now() < deadline) { + const result = await session.rpc.mcp.list(); + const server = result.servers.find((s) => s.name === serverName); + if (server?.status === expectedStatus) { + return; + } + lastStatus = server?.status ?? ""; + await new Promise((resolve) => setTimeout(resolve, 200)); + } + + throw new Error(`${serverName} did not reach ${expectedStatus}; last status was ${lastStatus}`); +} describe("MCP Servers and Custom Agents", async () => { const { copilotClient: client, openAiEndpoint } = await createSdkTestContext(); describe("MCP Servers", () => { it("should accept MCP server configuration on session create", async () => { - const mcpServers: Record = { - "test-server": { - type: "local", - command: "echo", - args: ["hello"], - tools: ["*"], - } as MCPStdioServerConfig, - }; + const mcpServers = createTestMcpServers("test-server"); const session = await client.createSession({ onPermissionRequest: approveAll, @@ -34,6 +69,7 @@ describe("MCP Servers and Custom Agents", async () => { }); expect(session.sessionId).toBeDefined(); + await waitForMcpServerStatus(session, "test-server"); // Simple interaction to verify session works const message = await session.sendAndWait({ @@ -48,7 +84,7 @@ describe("MCP Servers and Custom Agents", async () => { const mcpServers: Record = { "test-server": { type: "local", - command: "echo", + command: "git", tools: ["*"], } as MCPStdioServerConfig, }; @@ -60,11 +96,6 @@ describe("MCP Servers and Custom Agents", async () => { expect(session.sessionId).toBeDefined(); - const message = await session.sendAndWait({ - prompt: "What is 2+2?", - }); - expect(message?.data.content).toContain("4"); - await session.disconnect(); }); @@ -75,14 +106,7 @@ describe("MCP Servers and Custom Agents", async () => { await session1.sendAndWait({ prompt: "What is 1+1?" }); // Resume with MCP servers - const mcpServers: Record = { - "test-server": { - type: "local", - command: "echo", - args: ["hello"], - tools: ["*"], - } as MCPStdioServerConfig, - }; + const mcpServers = createTestMcpServers("test-server"); const session2 = await client.resumeSession(sessionId, { onPermissionRequest: approveAll, @@ -90,30 +114,13 @@ describe("MCP Servers and Custom Agents", async () => { }); expect(session2.sessionId).toBe(sessionId); - - const message = await session2.sendAndWait({ - prompt: "What is 3+3?", - }); - expect(message?.data.content).toContain("6"); + await waitForMcpServerStatus(session2, "test-server"); await session2.disconnect(); }); it("should handle multiple MCP servers", async () => { - const mcpServers: Record = { - server1: { - type: "local", - command: "echo", - args: ["server1"], - tools: ["*"], - } as MCPStdioServerConfig, - server2: { - type: "local", - command: "echo", - args: ["server2"], - tools: ["*"], - } as MCPStdioServerConfig, - }; + const mcpServers = createTestMcpServers("server1", "server2"); const session = await client.createSession({ onPermissionRequest: approveAll, @@ -121,6 +128,8 @@ describe("MCP Servers and Custom Agents", async () => { }); expect(session.sessionId).toBeDefined(); + await waitForMcpServerStatus(session, "server1"); + await waitForMcpServerStatus(session, "server2"); await session.disconnect(); }); @@ -132,6 +141,7 @@ describe("MCP Servers and Custom Agents", async () => { args: [TEST_MCP_SERVER], tools: ["*"], env: { TEST_SECRET: "hunter2" }, + workingDirectory: TEST_HARNESS_DIR, } as MCPStdioServerConfig, }; @@ -141,6 +151,7 @@ describe("MCP Servers and Custom Agents", async () => { }); expect(session.sessionId).toBeDefined(); + await waitForMcpServerStatus(session, "env-echo"); const message = await session.sendAndWait({ prompt: "Use the env-echo/get_env tool to read the TEST_SECRET environment variable. Reply with just the value, nothing else.", @@ -239,12 +250,7 @@ describe("MCP Servers and Custom Agents", async () => { description: "An agent with its own MCP servers", prompt: "You are an agent with MCP servers.", mcpServers: { - "agent-server": { - type: "local", - command: "echo", - args: ["agent-mcp"], - tools: ["*"], - } as MCPStdioServerConfig, + ...createTestMcpServers("agent-server"), }, }, ]; @@ -287,14 +293,7 @@ describe("MCP Servers and Custom Agents", async () => { describe("Combined Configuration", () => { it("should accept both MCP servers and custom agents", async () => { - const mcpServers: Record = { - "shared-server": { - type: "local", - command: "echo", - args: ["shared"], - tools: ["*"], - } as MCPStdioServerConfig, - }; + const mcpServers = createTestMcpServers("shared-server"); const customAgents: CustomAgentConfig[] = [ { @@ -312,6 +311,7 @@ describe("MCP Servers and Custom Agents", async () => { }); expect(session.sessionId).toBeDefined(); + await waitForMcpServerStatus(session, "shared-server"); await session.disconnect(); }); diff --git a/nodejs/test/e2e/rpc_mcp_and_skills.e2e.test.ts b/nodejs/test/e2e/rpc_mcp_and_skills.e2e.test.ts index 91e23200c..5d171c778 100644 --- a/nodejs/test/e2e/rpc_mcp_and_skills.e2e.test.ts +++ b/nodejs/test/e2e/rpc_mcp_and_skills.e2e.test.ts @@ -4,11 +4,19 @@ import * as fs from "fs"; import * as path from "path"; +import { fileURLToPath } from "url"; import { describe, expect, it } from "vitest"; import { approveAll, RuntimeConnection } from "../../src/index.js"; -import type { MCPServerConfig } from "../../src/index.js"; +import type { CopilotSession, MCPServerConfig, MCPStdioServerConfig } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext.js"; +const __filename = fileURLToPath(import.meta.url); +const TEST_MCP_SERVER = path.resolve( + path.dirname(__filename), + "../../../test/harness/test-mcp-server.mjs" +); +const TEST_HARNESS_DIR = path.dirname(TEST_MCP_SERVER); + describe("Session MCP and skills RPC", async () => { // --yolo auto-approves extension permission gates at the CLI level, // preventing breakage from new gates (e.g., extension-permission-access). @@ -34,6 +42,44 @@ describe("Session MCP and skills RPC", async () => { return skillsDir; } + function createTestMcpServers(...serverNames: string[]): Record { + return Object.fromEntries( + serverNames.map((name) => [ + name, + { + type: "stdio", + command: "node", + args: [TEST_MCP_SERVER], + workingDirectory: TEST_HARNESS_DIR, + tools: ["*"], + } as MCPStdioServerConfig, + ]) + ); + } + + async function waitForMcpServerStatus( + session: CopilotSession, + serverName: string, + expectedStatus = "connected" + ): Promise { + const deadline = Date.now() + 60_000; + let lastStatus = ""; + + while (Date.now() < deadline) { + const result = await session.rpc.mcp.list(); + const server = result.servers.find((s) => s.name === serverName); + if (server?.status === expectedStatus) { + return; + } + lastStatus = server?.status ?? ""; + await new Promise((resolve) => setTimeout(resolve, 200)); + } + + throw new Error( + `${serverName} did not reach ${expectedStatus}; last status was ${lastStatus}` + ); + } + async function expectFailure( action: () => Promise, expectedMessage: string @@ -106,20 +152,14 @@ describe("Session MCP and skills RPC", async () => { it("should list mcp servers with configured server", async () => { const serverName = "rpc-list-mcp-server"; - const mcpServers: Record = { - [serverName]: { - type: "stdio", - command: "echo", - args: ["rpc-list-mcp-server"], - tools: ["*"], - }, - }; + const mcpServers = createTestMcpServers(serverName); const session = await client.createSession({ onPermissionRequest: approveAll, mcpServers, }); + await waitForMcpServerStatus(session, serverName); const result = await session.rpc.mcp.list(); const server = result.servers.find((s) => s.name === serverName); expect(server).toBeDefined(); diff --git a/nodejs/test/e2e/session.e2e.test.ts b/nodejs/test/e2e/session.e2e.test.ts index fe6d4b9b2..bb935f829 100644 --- a/nodejs/test/e2e/session.e2e.test.ts +++ b/nodejs/test/e2e/session.e2e.test.ts @@ -3,7 +3,7 @@ import { describe, expect, it, onTestFinished, vi } from "vitest"; import { ParsedHttpExchange } from "../../../test/harness/replayingCapiProxy.js"; import { CopilotClient, approveAll, defineTool, RuntimeConnection } from "../../src/index.js"; import { createSdkTestContext, isCI } from "./harness/sdkTestContext.js"; -import { getFinalAssistantMessage, getNextEventOfType } from "./harness/sdkTestHelper.js"; +import { getFinalAssistantMessage, getNextEventOfType, retry } from "./harness/sdkTestHelper.js"; describe("Sessions", async () => { const { @@ -14,6 +14,18 @@ describe("Sessions", async () => { env, } = await createSdkTestContext(); + async function waitForExchanges(minimumCount = 1) { + await retry( + `capture ${minimumCount} chat completion request(s)`, + async () => { + const exchanges = await openAiEndpoint.getExchanges(); + expect(exchanges.length).toBeGreaterThanOrEqual(minimumCount); + }, + 1_200 + ); + return openAiEndpoint.getExchanges(); + } + it.each([ ["stdio", () => RuntimeConnection.forStdio({ path: process.env.COPILOT_CLI_PATH })], ["tcp", () => RuntimeConnection.forTcp({ path: process.env.COPILOT_CLI_PATH })], @@ -233,14 +245,18 @@ describe("Sessions", async () => { availableTools: ["view", "edit"], }); - await session.sendAndWait({ prompt: "What is 1+1?" }); + try { + await session.send({ prompt: "What is 1+1?" }); - // It only tells the model about the specified tools and no others - const traffic = await openAiEndpoint.getExchanges(); - expect(traffic[0].request.tools).toMatchObject([ - { function: { name: "view" } }, - { function: { name: "edit" } }, - ]); + // It only tells the model about the specified tools and no others + const traffic = await waitForExchanges(); + expect(traffic[0].request.tools).toMatchObject([ + { function: { name: "view" } }, + { function: { name: "edit" } }, + ]); + } finally { + await session.disconnect(); + } }); it("should create a session with excludedTools", async () => { @@ -249,16 +265,20 @@ describe("Sessions", async () => { excludedTools: ["view"], }); - await session.sendAndWait({ prompt: "What is 1+1?" }); + try { + await session.send({ prompt: "What is 1+1?" }); - // It has other tools, but not the one we excluded - const traffic = await openAiEndpoint.getExchanges(); - const functionNames = traffic[0].request.tools?.map( - (t) => (t as { function: { name: string } }).function.name - ); - expect(functionNames).toContain("edit"); - expect(functionNames).toContain("grep"); - expect(functionNames).not.toContain("view"); + // It has other tools, but not the one we excluded + const traffic = await waitForExchanges(); + const functionNames = traffic[0].request.tools?.map( + (t) => (t as { function: { name: string } }).function.name + ); + expect(functionNames).toContain("edit"); + expect(functionNames).toContain("grep"); + expect(functionNames).not.toContain("view"); + } finally { + await session.disconnect(); + } }); it("should create a session with defaultAgent excludedTools", async () => { @@ -280,18 +300,19 @@ describe("Sessions", async () => { }, }); - await session.sendAndWait({ prompt: "What is 1+1?" }); - - // The secret_tool should be registered with the runtime but not advertised - // to the default agent's underlying model call. - const traffic = await openAiEndpoint.getExchanges(); - expect(traffic.length).toBeGreaterThan(0); - const functionNames = traffic[0].request.tools?.map( - (t) => (t as { function: { name: string } }).function.name - ); - expect(functionNames).not.toContain("secret_tool"); + try { + await session.send({ prompt: "What is 1+1?" }); - await session.disconnect(); + // The secret_tool should be registered with the runtime but not advertised + // to the default agent's underlying model call. + const traffic = await waitForExchanges(); + const functionNames = traffic[0].request.tools?.map( + (t) => (t as { function: { name: string } }).function.name + ); + expect(functionNames).not.toContain("secret_tool"); + } finally { + await session.disconnect(); + } }); // TODO: This test shows there's a race condition inside client.ts. If createSession is called diff --git a/nodejs/test/e2e/session_config.e2e.test.ts b/nodejs/test/e2e/session_config.e2e.test.ts index b86c3fa51..acb31f058 100644 --- a/nodejs/test/e2e/session_config.e2e.test.ts +++ b/nodejs/test/e2e/session_config.e2e.test.ts @@ -3,10 +3,23 @@ import { writeFile, mkdir } from "fs/promises"; import { join } from "path"; import { approveAll } from "../../src/index.js"; import { createSdkTestContext } from "./harness/sdkTestContext.js"; +import { retry } from "./harness/sdkTestHelper.js"; describe("Session Configuration", async () => { const { copilotClient: client, workDir, openAiEndpoint } = await createSdkTestContext(); + async function waitForExchanges(minimumCount = 1) { + await retry( + `capture ${minimumCount} chat completion request(s)`, + async () => { + const exchanges = await openAiEndpoint.getExchanges(); + expect(exchanges.length).toBeGreaterThanOrEqual(minimumCount); + }, + 1_200 + ); + return openAiEndpoint.getExchanges(); + } + it("should use workingDirectory for tool execution", async () => { const subDir = join(workDir, "subproject"); await mkdir(subDir, { recursive: true }); @@ -428,13 +441,14 @@ describe("Session Configuration", async () => { availableTools: ["view"], }); - await session2.sendAndWait({ prompt: "What is 1+1?" }); - - const exchanges = await openAiEndpoint.getExchanges(); - expect(exchanges.length).toBeGreaterThan(0); - const toolNames = getToolNames(exchanges[exchanges.length - 1]); - expect(toolNames).toEqual(["view"]); + try { + await session2.send({ prompt: "What is 1+1?" }); - await session2.disconnect(); + const exchanges = await waitForExchanges(); + const toolNames = getToolNames(exchanges[exchanges.length - 1]); + expect(toolNames).toEqual(["view"]); + } finally { + await session2.disconnect(); + } }); }); diff --git a/python/e2e/test_mcp_and_agents_e2e.py b/python/e2e/test_mcp_and_agents_e2e.py index 5033524f2..be017a1e5 100644 --- a/python/e2e/test_mcp_and_agents_e2e.py +++ b/python/e2e/test_mcp_and_agents_e2e.py @@ -2,10 +2,13 @@ Tests for MCP servers and custom agents functionality """ +import asyncio +import time from pathlib import Path import pytest +from copilot.generated.rpc import McpServerStatus from copilot.session import CustomAgentConfig, MCPServerConfig, PermissionHandler from .testharness import E2ETestContext @@ -18,24 +21,50 @@ pytestmark = pytest.mark.asyncio(loop_scope="module") +def _test_mcp_servers(*server_names: str) -> dict[str, MCPServerConfig]: + return { + server_name: { + "command": "node", + "args": [TEST_MCP_SERVER], + "tools": ["*"], + "working_directory": TEST_HARNESS_DIR, + } + for server_name in server_names + } + + +async def _wait_for_mcp_server_status( + session, server_name: str, expected_status: McpServerStatus = McpServerStatus.CONNECTED +) -> None: + deadline = time.monotonic() + 60 + last_status = "" + + while time.monotonic() < deadline: + result = await session.rpc.mcp.list() + server = next((s for s in result.servers if s.name == server_name), None) + if server is not None and server.status == expected_status: + return + last_status = server.status if server is not None else "" + await asyncio.sleep(0.2) + + raise AssertionError( + f"{server_name} did not reach {expected_status.value}; last status was {last_status}" + ) + + class TestMCPServers: async def test_should_accept_mcp_server_configuration_on_session_create( self, ctx: E2ETestContext ): """Test that MCP server configuration is accepted on session create""" - mcp_servers: dict[str, MCPServerConfig] = { - "test-server": { - "command": "echo", - "args": ["hello"], - "tools": ["*"], - } - } + mcp_servers = _test_mcp_servers("test-server") session = await ctx.client.create_session( on_permission_request=PermissionHandler.approve_all, mcp_servers=mcp_servers ) assert session.session_id is not None + await _wait_for_mcp_server_status(session, "test-server") # Simple interaction to verify session works message = await session.send_and_wait("What is 2+2?") @@ -48,7 +77,7 @@ async def test_should_accept_mcp_server_configuration_without_args(self, ctx: E2 """Test that MCP server configuration works without args field""" mcp_servers: dict[str, MCPServerConfig] = { "test-server": { - "command": "echo", + "command": "git", "tools": ["*"], } } @@ -59,10 +88,6 @@ async def test_should_accept_mcp_server_configuration_without_args(self, ctx: E2 assert session.session_id is not None - message = await session.send_and_wait("What is 2+2?") - assert message is not None - assert "4" in message.data.content - await session.disconnect() async def test_should_accept_mcp_server_configuration_on_session_resume( @@ -77,13 +102,7 @@ async def test_should_accept_mcp_server_configuration_on_session_resume( await session1.send_and_wait("What is 1+1?") # Resume with MCP servers - mcp_servers: dict[str, MCPServerConfig] = { - "test-server": { - "command": "echo", - "args": ["hello"], - "tools": ["*"], - } - } + mcp_servers = _test_mcp_servers("test-server") session2 = await ctx.client.resume_session( session_id, @@ -92,10 +111,7 @@ async def test_should_accept_mcp_server_configuration_on_session_resume( ) assert session2.session_id == session_id - - message = await session2.send_and_wait("What is 3+3?") - assert message is not None - assert "6" in message.data.content + await _wait_for_mcp_server_status(session2, "test-server") await session2.disconnect() @@ -118,6 +134,7 @@ async def test_should_pass_literal_env_values_to_mcp_server_subprocess( ) assert session.session_id is not None + await _wait_for_mcp_server_status(session, "env-echo") message = await session.send_and_wait( "Use the env-echo/get_env tool to read the TEST_SECRET " @@ -194,10 +211,7 @@ async def test_should_accept_custom_agent_configuration_on_session_resume( async def test_should_handle_multiple_mcp_servers(self, ctx: E2ETestContext): """Multiple MCP servers can be configured at once.""" - mcp_servers: dict[str, MCPServerConfig] = { - "server1": {"command": "echo", "args": ["server1"], "tools": ["*"]}, - "server2": {"command": "echo", "args": ["server2"], "tools": ["*"]}, - } + mcp_servers = _test_mcp_servers("server1", "server2") session = await ctx.client.create_session( on_permission_request=PermissionHandler.approve_all, @@ -205,6 +219,8 @@ async def test_should_handle_multiple_mcp_servers(self, ctx: E2ETestContext): ) try: assert session.session_id is not None + await _wait_for_mcp_server_status(session, "server1") + await _wait_for_mcp_server_status(session, "server2") import re assert re.match(r"^[a-f0-9-]+$", session.session_id) @@ -215,13 +231,7 @@ async def test_should_handle_multiple_mcp_servers(self, ctx: E2ETestContext): class TestCombinedConfiguration: async def test_should_accept_both_mcp_servers_and_custom_agents(self, ctx: E2ETestContext): """Test that both MCP servers and custom agents can be configured together""" - mcp_servers: dict[str, MCPServerConfig] = { - "shared-server": { - "command": "echo", - "args": ["shared"], - "tools": ["*"], - } - } + mcp_servers = _test_mcp_servers("shared-server") custom_agents: list[CustomAgentConfig] = [ { @@ -239,6 +249,7 @@ async def test_should_accept_both_mcp_servers_and_custom_agents(self, ctx: E2ETe ) assert session.session_id is not None + await _wait_for_mcp_server_status(session, "shared-server") await session.disconnect() @@ -275,13 +286,7 @@ async def test_should_handle_custom_agent_with_mcp_servers(self, ctx: E2ETestCon "display_name": "MCP Agent", "description": "An agent with its own MCP servers", "prompt": "You are an agent with MCP servers.", - "mcp_servers": { - "agent-server": { - "command": "echo", - "args": ["agent-mcp"], - "tools": ["*"], - } - }, + "mcp_servers": _test_mcp_servers("agent-server"), } ] diff --git a/python/e2e/test_rpc_mcp_and_skills_e2e.py b/python/e2e/test_rpc_mcp_and_skills_e2e.py index 6c7d66208..dee98b1dd 100644 --- a/python/e2e/test_rpc_mcp_and_skills_e2e.py +++ b/python/e2e/test_rpc_mcp_and_skills_e2e.py @@ -7,7 +7,9 @@ from __future__ import annotations +import asyncio import os +import time import uuid from pathlib import Path @@ -19,6 +21,7 @@ ExtensionsEnableRequest, MCPDisableRequest, MCPEnableRequest, + McpServerStatus, SkillsDisableRequest, SkillsEnableRequest, ) @@ -28,6 +31,11 @@ pytestmark = pytest.mark.asyncio(loop_scope="module") +TEST_MCP_SERVER = str( + (Path(__file__).parents[2] / "test" / "harness" / "test-mcp-server.mjs").resolve() +) +TEST_HARNESS_DIR = str((Path(__file__).parents[2] / "test" / "harness").resolve()) + # --yolo auto-approves extension permission gates at the CLI level, # preventing breakage from new gates (e.g., extension-permission-access). @@ -62,6 +70,37 @@ def _create_skill_directory(work_dir: str, skill_name: str, description: str) -> return str(skills_dir) +def _test_mcp_servers(*server_names: str) -> dict: + return { + server_name: { + "command": "node", + "args": [TEST_MCP_SERVER], + "tools": ["*"], + "working_directory": TEST_HARNESS_DIR, + } + for server_name in server_names + } + + +async def _wait_for_mcp_server_status( + session, server_name: str, expected_status: McpServerStatus = McpServerStatus.CONNECTED +) -> None: + deadline = time.monotonic() + 60 + last_status = "" + + while time.monotonic() < deadline: + result = await session.rpc.mcp.list() + server = next((s for s in result.servers if s.name == server_name), None) + if server is not None and server.status == expected_status: + return + last_status = server.status if server is not None else "" + await asyncio.sleep(0.2) + + raise AssertionError( + f"{server_name} did not reach {expected_status.value}; last status was {last_status}" + ) + + def _assert_skill(skills, skill_name: str, *, enabled: bool): matching = [s for s in skills if s.name == skill_name] assert len(matching) == 1, f"Expected exactly one skill named {skill_name!r}" @@ -130,15 +169,10 @@ async def test_should_list_mcp_servers_with_configured_server(self, ctx: E2ETest server_name = "rpc-list-mcp-server" session = await ctx.client.create_session( on_permission_request=PermissionHandler.approve_all, - mcp_servers={ - server_name: { - "command": "echo", - "args": ["rpc-list-mcp-server"], - "tools": ["*"], - } - }, + mcp_servers=_test_mcp_servers(server_name), ) try: + await _wait_for_mcp_server_status(session, server_name) result = await session.rpc.mcp.list() matching = [s for s in result.servers if s.name == server_name] assert len(matching) == 1 diff --git a/python/e2e/test_session_config_e2e.py b/python/e2e/test_session_config_e2e.py index 1fd2cd0a2..fb8a35c4d 100644 --- a/python/e2e/test_session_config_e2e.py +++ b/python/e2e/test_session_config_e2e.py @@ -422,11 +422,11 @@ async def test_should_apply_availabletools_on_session_resume(self, ctx: E2ETestC available_tools=["view"], ) - await session2.send_and_wait("What is 1+1?") - - exchanges = await ctx.get_exchanges() - assert exchanges - assert _get_tool_names(exchanges[-1]) == ["view"] - - await session2.disconnect() - await session1.disconnect() + try: + await session2.send("What is 1+1?") + + exchanges = await ctx.wait_for_exchanges() + assert _get_tool_names(exchanges[-1]) == ["view"] + finally: + await session2.disconnect() + await session1.disconnect() diff --git a/python/e2e/test_session_e2e.py b/python/e2e/test_session_e2e.py index d5a0c970e..eefb23146 100644 --- a/python/e2e/test_session_e2e.py +++ b/python/e2e/test_session_e2e.py @@ -119,32 +119,36 @@ async def test_should_create_a_session_with_availableTools(self, ctx: E2ETestCon available_tools=["view", "edit"], ) - await session.send("What is 1+1?") - await get_final_assistant_message(session) - - # It only tells the model about the specified tools and no others - traffic = await ctx.get_exchanges() - tools = traffic[0]["request"]["tools"] - tool_names = [t["function"]["name"] for t in tools] - assert len(tool_names) == 2 - assert "view" in tool_names - assert "edit" in tool_names + try: + await session.send("What is 1+1?") + + # It only tells the model about the specified tools and no others + traffic = await ctx.wait_for_exchanges() + tools = traffic[0]["request"]["tools"] + tool_names = [t["function"]["name"] for t in tools] + assert len(tool_names) == 2 + assert "view" in tool_names + assert "edit" in tool_names + finally: + await session.disconnect() async def test_should_create_a_session_with_excludedTools(self, ctx: E2ETestContext): session = await ctx.client.create_session( on_permission_request=PermissionHandler.approve_all, excluded_tools=["view"] ) - await session.send("What is 1+1?") - await get_final_assistant_message(session) - - # It has other tools, but not the one we excluded - traffic = await ctx.get_exchanges() - tools = traffic[0]["request"]["tools"] - tool_names = [t["function"]["name"] for t in tools] - assert "edit" in tool_names - assert "grep" in tool_names - assert "view" not in tool_names + try: + await session.send("What is 1+1?") + + # It has other tools, but not the one we excluded + traffic = await ctx.wait_for_exchanges() + tools = traffic[0]["request"]["tools"] + tool_names = [t["function"]["name"] for t in tools] + assert "edit" in tool_names + assert "grep" in tool_names + assert "view" not in tool_names + finally: + await session.disconnect() async def test_should_create_a_session_with_defaultAgent_excludedTools( self, ctx: E2ETestContext @@ -165,14 +169,16 @@ async def test_should_create_a_session_with_defaultAgent_excludedTools( default_agent={"excluded_tools": ["secret_tool"]}, ) - await session.send("What is 1+1?") - await get_final_assistant_message(session) + try: + await session.send("What is 1+1?") - # The real assertion: verify the runtime excluded the tool from the CAPI request - traffic = await ctx.get_exchanges() - tools = traffic[0]["request"]["tools"] - tool_names = [t["function"]["name"] for t in tools] - assert "secret_tool" not in tool_names + # The real assertion: verify the runtime excluded the tool from the CAPI request + traffic = await ctx.wait_for_exchanges() + tools = traffic[0]["request"]["tools"] + tool_names = [t["function"]["name"] for t in tools] + assert "secret_tool" not in tool_names + finally: + await session.disconnect() # TODO: This test shows there's a race condition inside client.ts. If createSession # is called concurrently and autoStart is on, it may start multiple child processes. diff --git a/python/e2e/testharness/context.py b/python/e2e/testharness/context.py index d67311598..4479e9035 100644 --- a/python/e2e/testharness/context.py +++ b/python/e2e/testharness/context.py @@ -4,11 +4,13 @@ Provides isolated directories and a replaying proxy for testing the SDK. """ +import asyncio import contextlib import os import re import shutil import tempfile +import time from pathlib import Path from typing import Any @@ -177,3 +179,18 @@ async def get_exchanges(self): if not self._proxy: raise RuntimeError("Proxy not started") return await self._proxy.get_exchanges() + + async def wait_for_exchanges( + self, minimum_count: int = 1, timeout: float = 120.0 + ) -> list[dict[str, Any]]: + """Wait until the proxy has captured at least the requested exchanges.""" + deadline = time.monotonic() + timeout + exchanges: list[dict[str, Any]] = [] + while time.monotonic() < deadline: + exchanges = await self.get_exchanges() + if len(exchanges) >= minimum_count: + return exchanges + await asyncio.sleep(0.1) + raise TimeoutError( + f"Timed out waiting for {minimum_count} chat completion request(s)" + ) diff --git a/rust/tests/e2e/rpc_mcp_and_skills.rs b/rust/tests/e2e/rpc_mcp_and_skills.rs index 932ac35a3..60493d6fb 100644 --- a/rust/tests/e2e/rpc_mcp_and_skills.rs +++ b/rust/tests/e2e/rpc_mcp_and_skills.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::path::Path; use github_copilot_sdk::generated::api_types::{ ExtensionsDisableRequest, ExtensionsEnableRequest, McpDisableRequest, McpEnableRequest, @@ -136,7 +137,7 @@ async fn should_list_mcp_servers_with_configured_server() { let session = client .create_session( ctx.approve_all_session_config() - .with_mcp_servers(test_mcp_servers(server_name)), + .with_mcp_servers(test_mcp_servers(ctx.repo_root(), server_name)), ) .await .expect("create session"); @@ -280,10 +281,9 @@ async fn should_report_error_when_mcp_oauth_server_is_not_configured() { ctx.set_default_copilot_user(); let client = ctx.start_client().await; let session = client - .create_session( - ctx.approve_all_session_config() - .with_mcp_servers(test_mcp_servers("configured-stdio-server")), - ) + .create_session(ctx.approve_all_session_config().with_mcp_servers( + test_mcp_servers(ctx.repo_root(), "configured-stdio-server"), + )) .await .expect("create session"); @@ -319,7 +319,7 @@ async fn should_report_error_when_mcp_oauth_server_is_not_remote() { let session = client .create_session( ctx.approve_all_session_config() - .with_mcp_servers(test_mcp_servers(server_name)), + .with_mcp_servers(test_mcp_servers(ctx.repo_root(), server_name)), ) .await .expect("create session"); @@ -434,13 +434,24 @@ fn assert_skill( skill } -fn test_mcp_servers(message: &str) -> HashMap { +fn test_mcp_servers(repo_root: &Path, server_name: &str) -> HashMap { + let harness_dir = repo_root.join("test").join("harness"); + let server_path = harness_dir + .join("test-mcp-server.mjs") + .to_string_lossy() + .to_string(); + HashMap::from([( - message.to_string(), + server_name.to_string(), McpServerConfig::Stdio(McpStdioServerConfig { tools: Some(vec!["*".to_string()]), - command: echo_command(), - args: echo_args(message), + command: if cfg!(windows) { + "node.exe".to_string() + } else { + "node".to_string() + }, + args: vec![server_path], + working_directory: Some(harness_dir.to_string_lossy().to_string()), ..McpStdioServerConfig::default() }), )]) @@ -461,23 +472,3 @@ async fn expect_err_contains( "expected error to contain {expected:?}, got {err}" ); } - -#[cfg(windows)] -fn echo_command() -> String { - "cmd".to_string() -} - -#[cfg(not(windows))] -fn echo_command() -> String { - "echo".to_string() -} - -#[cfg(windows)] -fn echo_args(message: &str) -> Vec { - vec!["/C".to_string(), "echo".to_string(), message.to_string()] -} - -#[cfg(not(windows))] -fn echo_args(message: &str) -> Vec { - vec![message.to_string()] -} diff --git a/rust/tests/e2e/session.rs b/rust/tests/e2e/session.rs index ce07bf7f7..0bb08587e 100644 --- a/rust/tests/e2e/session.rs +++ b/rust/tests/e2e/session.rs @@ -273,7 +273,11 @@ async fn should_create_a_session_with_availabletools() { .await .expect("create session"); - session.send_and_wait("What is 1+1?").await.expect("send"); + session.send("What is 1+1?").await.expect("send"); + wait_for_condition("captured CAPI exchange", || async { + !ctx.exchanges().is_empty() + }) + .await; let exchanges = ctx.exchanges(); let tool_names = get_tool_names(&exchanges[0]); assert_eq!(tool_names.len(), 2); @@ -305,7 +309,11 @@ async fn should_create_a_session_with_excludedtools() { .await .expect("create session"); - session.send_and_wait("What is 1+1?").await.expect("send"); + session.send("What is 1+1?").await.expect("send"); + wait_for_condition("captured CAPI exchange", || async { + !ctx.exchanges().is_empty() + }) + .await; let exchanges = ctx.exchanges(); let tool_names = get_tool_names(&exchanges[0]); assert!(!tool_names.contains(&"view".to_string())); @@ -342,7 +350,11 @@ async fn should_create_a_session_with_defaultagent_excludedtools() { .await .expect("create session"); - session.send_and_wait("What is 1+1?").await.expect("send"); + session.send("What is 1+1?").await.expect("send"); + wait_for_condition("captured CAPI exchange", || async { + !ctx.exchanges().is_empty() + }) + .await; let exchanges = ctx.exchanges(); let tool_names = get_tool_names(&exchanges[0]); assert!(!tool_names.contains(&"secret_tool".to_string())); diff --git a/test/snapshots/mcp_and_agents/accept_mcp_server_config_on_resume.yaml b/test/snapshots/mcp_and_agents/accept_mcp_server_config_on_resume.yaml index 8c3e28542..f9918fa13 100644 --- a/test/snapshots/mcp_and_agents/accept_mcp_server_config_on_resume.yaml +++ b/test/snapshots/mcp_and_agents/accept_mcp_server_config_on_resume.yaml @@ -8,7 +8,3 @@ conversations: content: What is 1+1? - role: assistant content: 1+1 equals 2. - - role: user - content: What is 3+3? - - role: assistant - content: 3+3 equals 6. diff --git a/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml b/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml index 82c9917c3..250402101 100644 --- a/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml +++ b/test/snapshots/mcp_and_agents/should_accept_mcp_server_configuration_on_session_resume.yaml @@ -8,7 +8,3 @@ conversations: content: What is 1+1? - role: assistant content: 1 + 1 = 2 - - role: user - content: What is 3+3? - - role: assistant - content: 3 + 3 = 6 From 961a6dafa365ba31d23b7f335d7e4bf8a4f690c8 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 22 May 2026 11:54:11 -0400 Subject: [PATCH 2/6] Format Python E2E harness Apply Ruff formatting to the new exchange polling helper. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- python/e2e/testharness/context.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/e2e/testharness/context.py b/python/e2e/testharness/context.py index 4479e9035..b0a7c6f6c 100644 --- a/python/e2e/testharness/context.py +++ b/python/e2e/testharness/context.py @@ -191,6 +191,4 @@ async def wait_for_exchanges( if len(exchanges) >= minimum_count: return exchanges await asyncio.sleep(0.1) - raise TimeoutError( - f"Timed out waiting for {minimum_count} chat completion request(s)" - ) + raise TimeoutError(f"Timed out waiting for {minimum_count} chat completion request(s)") From 6deb78aabc011d94bac4e13352fda899f3b40d10 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 22 May 2026 12:05:08 -0400 Subject: [PATCH 3/6] Address CodeQL review comments Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/Harness/E2ETestBase.cs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index 2bcd3d81e..9d8037765 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -152,6 +152,7 @@ protected async Task> SendAndWaitForExchangesAsync( } catch (OperationCanceledException) when (cts.IsCancellationRequested) { + // Expected when cleanup cancels the send task. } } } @@ -165,7 +166,7 @@ protected static Dictionary CreateTestMcpServers(params _ => (McpServerConfig)new McpStdioServerConfig { Command = "node", - Args = [Path.Combine(testHarnessDir, "test-mcp-server.mjs")], + Args = [Path.Join(testHarnessDir, "test-mcp-server.mjs")], WorkingDirectory = testHarnessDir, Tools = ["*"] }); @@ -173,10 +174,11 @@ protected static Dictionary CreateTestMcpServers(params protected static string FindTestHarnessDir() { + var relativePath = Path.Join("test", "harness", "test-mcp-server.mjs"); var dir = new DirectoryInfo(AppContext.BaseDirectory); while (dir != null) { - var candidate = Path.Combine(dir.FullName, "test", "harness", "test-mcp-server.mjs"); + var candidate = Path.Combine(dir.FullName, relativePath); if (File.Exists(candidate)) return Path.GetDirectoryName(candidate)!; dir = dir.Parent; From 8874d2090d69cf6358e04d0e9dda5540c994e22c Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 22 May 2026 12:15:52 -0400 Subject: [PATCH 4/6] Avoid rooted path combine in test harness lookup Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/Harness/E2ETestBase.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dotnet/test/Harness/E2ETestBase.cs b/dotnet/test/Harness/E2ETestBase.cs index 9d8037765..ddd1b894b 100644 --- a/dotnet/test/Harness/E2ETestBase.cs +++ b/dotnet/test/Harness/E2ETestBase.cs @@ -178,7 +178,7 @@ protected static string FindTestHarnessDir() var dir = new DirectoryInfo(AppContext.BaseDirectory); while (dir != null) { - var candidate = Path.Combine(dir.FullName, relativePath); + var candidate = Path.Join(dir.FullName, relativePath); if (File.Exists(candidate)) return Path.GetDirectoryName(candidate)!; dir = dir.Parent; From 86a613914baea66e339fc971285c6fa3059d0f74 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 22 May 2026 12:56:44 -0400 Subject: [PATCH 5/6] Stabilize custom config dir E2E test Use SendAndWaitAsync so the test subscribes for response events before sending the prompt, and dispose the session after the custom config dir check. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/E2E/SessionE2ETests.cs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dotnet/test/E2E/SessionE2ETests.cs b/dotnet/test/E2E/SessionE2ETests.cs index cfa361793..88cfb1707 100644 --- a/dotnet/test/E2E/SessionE2ETests.cs +++ b/dotnet/test/E2E/SessionE2ETests.cs @@ -551,11 +551,17 @@ public async Task Should_Create_Session_With_Custom_Config_Dir() Assert.Matches(@"^[a-f0-9-]+$", session.SessionId); - // Session should work normally with custom config dir - await session.SendAsync(new MessageOptions { Prompt = "What is 1+1?" }); - var assistantMessage = await TestHelper.GetFinalAssistantMessageAsync(session); - Assert.NotNull(assistantMessage); - Assert.Contains("2", assistantMessage!.Data.Content); + try + { + // Session should work normally with custom config dir. + var assistantMessage = await session.SendAndWaitAsync(new MessageOptions { Prompt = "What is 1+1?" }); + Assert.NotNull(assistantMessage); + Assert.Contains("2", assistantMessage!.Data.Content); + } + finally + { + await session.DisposeAsync(); + } } [Fact] From 0811aebe6e33735a08839184561209a3ed71b6a3 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 22 May 2026 13:20:55 -0400 Subject: [PATCH 6/6] Stabilize permission handler E2E test Start the send-and-wait operation before awaiting permission callbacks so response events are observed by the operation's subscription. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- dotnet/test/E2E/PermissionE2ETests.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dotnet/test/E2E/PermissionE2ETests.cs b/dotnet/test/E2E/PermissionE2ETests.cs index 953ab1469..d2ff119d2 100644 --- a/dotnet/test/E2E/PermissionE2ETests.cs +++ b/dotnet/test/E2E/PermissionE2ETests.cs @@ -49,14 +49,14 @@ public async Task Should_Invoke_Permission_Handler_For_Write_Operations() await File.WriteAllTextAsync(Path.Combine(Ctx.WorkDir, "test.txt"), "original content"); - await session.SendAsync(new MessageOptions + var sendTask = session.SendAndWaitAsync(new MessageOptions { Prompt = "Edit test.txt and replace 'original' with 'modified'" }); var readRequest = await readPermissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30)); var writeRequest = await writePermissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30)); - await TestHelper.GetFinalAssistantMessageAsync(session); + await sendTask; List observedPermissionRequests; lock (permissionRequestsLock)