Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions dotnet/test/E2E/PermissionE2ETests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -226,24 +226,46 @@ await session2.SendAndWaitAsync(new MessageOptions
[Fact]
public async Task Should_Handle_Permission_Handler_Errors_Gracefully()
{
var permissionRequestReceived =
new TaskCompletionSource<PermissionRequest>(TaskCreationOptions.RunContinuationsAsynchronously);
var session = await CreateSessionAsync(new SessionConfig
{
OnPermissionRequest = (request, invocation) =>
{
// Simulate an error in the handler
permissionRequestReceived.TrySetResult(request);
throw new InvalidOperationException("Handler error");
}
});

await session.SendAsync(new MessageOptions
try
{
Prompt = "Run 'echo test'. If you can't, say 'failed'."
});

var message = await TestHelper.GetFinalAssistantMessageAsync(session);

// Should handle the error and deny permission
Assert.Matches("fail|cannot|unable|permission", message?.Data.Content?.ToLowerInvariant() ?? string.Empty);
var exchanges = await SendAndWaitForExchangesAsync(
session,
new MessageOptions
{
Prompt = "Run 'echo test'. If you can't, say 'failed'."
},
minimumCount: 2);

var permissionRequest = await permissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30));
Assert.IsType<PermissionRequestShell>(permissionRequest);

var toolResultMessage = exchanges
.SelectMany(exchange => exchange.Request.Messages)
.LastOrDefault(message =>
message.Role == "tool" &&
message.StringContent?.Contains("Permission denied", StringComparison.OrdinalIgnoreCase) == true);

Assert.NotNull(toolResultMessage);
Assert.Contains(
"could not request permission",
toolResultMessage.StringContent ?? string.Empty,
StringComparison.OrdinalIgnoreCase);
}
finally
{
await session.DisposeAsync();
}
}

[Fact]
Expand Down
17 changes: 17 additions & 0 deletions dotnet/test/E2E/RpcTasksAndHandlersE2ETests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,22 @@ public async Task Should_Start_Background_Agent_And_Report_Task_Details()
});
Assert.Contains("TASK_AGENT_READY", ready?.Data.Content ?? string.Empty, StringComparison.Ordinal);

var taskCompletionNotification =
new TaskCompletionSource<AssistantMessageEvent>(TaskCreationOptions.RunContinuationsAsynchronously);
using var subscription = session.On<SessionEvent>(evt =>
{
switch (evt)
{
case AssistantMessageEvent assistantMessage
when assistantMessage.Data.Content?.Contains("TASK_AGENT_DONE", StringComparison.Ordinal) == true:
taskCompletionNotification.TrySetResult(assistantMessage);
break;
case SessionErrorEvent error:
taskCompletionNotification.TrySetException(new Exception(error.Data.Message ?? "session error"));
break;
}
});

var started = await session.Rpc.Tasks.StartAgentAsync(
agentType: "general-purpose",
prompt: "Reply with TASK_AGENT_DONE exactly.",
Expand Down Expand Up @@ -123,6 +139,7 @@ await TestHelper.WaitForConditionAsync(

Assert.NotNull(task);
Assert.Contains("TASK_AGENT_DONE", task.LatestResponse ?? task.Result ?? string.Empty);
await taskCompletionNotification.Task.WaitAsync(TimeSpan.FromSeconds(30));

if (task.Status == GitHub.Copilot.Rpc.TaskStatus.Idle)
{
Expand Down
32 changes: 24 additions & 8 deletions python/e2e/test_permissions_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,38 @@ def on_permission_request(

async def test_should_handle_permission_handler_errors_gracefully(self, ctx: E2ETestContext):
"""Test that permission handler errors are handled gracefully"""
permission_request_received = asyncio.get_running_loop().create_future()

def on_permission_request(
request: PermissionRequest, invocation: dict
) -> PermissionRequestResult:
if not permission_request_received.done():
permission_request_received.set_result(request)
raise RuntimeError("Handler error")

session = await ctx.client.create_session(on_permission_request=on_permission_request)
try:
await session.send("Run 'echo test'. If you can't, say 'failed'.")

message = await session.send_and_wait("Run 'echo test'. If you can't, say 'failed'.")

# Should handle the error and deny permission
assert message is not None
content_lower = message.data.content.lower()
assert any(word in content_lower for word in ["fail", "cannot", "unable", "permission"])

await session.disconnect()
permission_request = await asyncio.wait_for(
permission_request_received,
timeout=30,
)
assert permission_request.kind == "shell"

exchanges = await ctx.wait_for_exchanges(2, timeout=60)
tool_messages = [
message
for exchange in exchanges
for message in exchange["request"]["messages"]
if message.get("role") == "tool"
and "Permission denied" in str(message.get("content", ""))
]

assert tool_messages
assert "could not request permission" in tool_messages[-1]["content"]
finally:
await session.disconnect()

async def test_should_receive_toolcallid_in_permission_requests(self, ctx: E2ETestContext):
"""Test that toolCallId is included in permission requests"""
Expand Down
18 changes: 18 additions & 0 deletions python/e2e/test_rpc_tasks_and_handlers_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
UIElicitationResponseAction,
UIHandlePendingElicitationRequest,
)
from copilot.generated.session_events import AssistantMessageData, SessionErrorData
from copilot.session import PermissionHandler

from .testharness import E2ETestContext
Expand Down Expand Up @@ -211,6 +212,21 @@ async def test_should_start_background_agent_and_report_task_details(self, ctx:
session = await ctx.client.create_session(
on_permission_request=PermissionHandler.approve_all,
)
task_completion_notification = asyncio.get_running_loop().create_future()

def on_event(event):
if isinstance(event.data, AssistantMessageData) and "TASK_AGENT_DONE" in (
event.data.content or ""
):
if not task_completion_notification.done():
task_completion_notification.set_result(event)
elif isinstance(event.data, SessionErrorData):
if not task_completion_notification.done():
task_completion_notification.set_exception(
RuntimeError(event.data.message or "session error")
)

unsubscribe = session.on(on_event)
try:
ready = await session.send_and_wait(
"Reply with TASK_AGENT_READY exactly.",
Expand Down Expand Up @@ -262,6 +278,7 @@ async def test_should_start_background_agent_and_report_task_details(self, ctx:
)
assert found_task is not None, f"Task {task_id} disappeared before it completed"
assert "TASK_AGENT_DONE" in (found_task.latest_response or found_task.result or "")
await asyncio.wait_for(task_completion_notification, timeout=30.0)

if found_task.status == TaskInfoStatus.IDLE:
cancel = await session.rpc.tasks.cancel(TasksCancelRequest(id=task_id))
Expand All @@ -274,4 +291,5 @@ async def test_should_start_background_agent_and_report_task_details(self, ctx:
after_remove = await session.rpc.tasks.list()
assert not any(t.id == task_id for t in (after_remove.tasks or []))
finally:
unsubscribe()
await session.disconnect()
86 changes: 86 additions & 0 deletions test/harness/replayingCapiProxy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,38 @@ describe("ReplayingCapiProxy", () => {
expect(result.conversations[0].messages[0].content).toBe("What is 2+2?");
});

test("normalizes task completion notification wording", async () => {
const unreadNotification = [
"<system_notification>",
'Agent "sdk-background-agent" (general-purpose) has completed successfully. Use read_agent with agent_id "sdk-background-agent" to retrieve unread results.',
"</system_notification>",
].join("\n");
const fullNotification = [
"<system_notification>",
'Agent "sdk-background-agent" (general-purpose) has completed successfully. Use read_agent with agent_id "sdk-background-agent" to retrieve the full results.',
"</system_notification>",
].join("\n");

const requestBody = JSON.stringify({
messages: [
{
role: "user",
content: unreadNotification,
},
],
});
const responseBody = JSON.stringify({
choices: [{ message: { role: "assistant", content: "Done" } }],
});

const outputPath = await createProxy([
{ url: "/chat/completions", requestBody, responseBody },
]);

const result = await readYamlOutput(outputPath);
expect(result.conversations[0].messages[0].content).toBe(fullNotification);
});

test("strips agent_instructions from user messages", async () => {
const requestBody = JSON.stringify({
messages: [
Expand Down Expand Up @@ -811,6 +843,60 @@ Always include PINEAPPLE_COCONUT_42.
}
});

test("matches cached task completion notification wording variants", async () => {
const cachePath = path.join(tempDir, "cache.yaml");
const unreadNotification = [
"<system_notification>",
'Agent "read-file" (explore) has completed successfully. Use read_agent with agent_id "read-file" to retrieve unread results.',
"</system_notification>",
].join("\n");

const cacheContent = yaml.stringify({
models: ["test-model"],
conversations: [
{
messages: [
{ role: "system", content: "${system}" },
{ role: "user", content: "Hello" },
{ role: "assistant", content: "Hi!" },
{ role: "user", content: unreadNotification },
{ role: "assistant", content: "Read agent completed." },
],
},
],
} satisfies NormalizedData);
await writeFile(cachePath, cacheContent);

const proxy = new ReplayingCapiProxy(
"http://localhost:9999",
cachePath,
workDir,
);
const proxyUrl = await proxy.start();

try {
const response = await makeRequest(proxyUrl, "/chat/completions", {
body: {
model: "test-model",
messages: [
{ role: "system", content: "Be helpful" },
{ role: "user", content: "Hello" },
{ role: "assistant", content: "Hi!" },
{ role: "user", content: unreadNotification },
],
},
});

expect(response.status).toBe(200);
expect(
(JSON.parse(response.body) as ChatCompletion).choices[0].message
.content,
).toBe("Read agent completed.");
} finally {
await proxy.stop();
}
});

test("matches parallel tool results regardless of arrival order", async () => {
const cachePath = path.join(tempDir, "cache.yaml");
const cacheContent = yaml.stringify({
Expand Down
20 changes: 20 additions & 0 deletions test/harness/replayingCapiProxy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ export class ReplayingCapiProxy extends CapturingHttpProxy {
const content = await readFile(this.state.filePath, "utf-8");
this.state.storedData = yaml.parse(content) as NormalizedData;
normalizeToolResultOrder(this.state.storedData.conversations);
normalizeStoredUserMessages(this.state.storedData.conversations);
}
}

Expand Down Expand Up @@ -1034,6 +1035,7 @@ function transformOpenAIRequestMessage(

function normalizeUserMessage(content: string): string {
return normalizeSkillContextFrontmatter(content)
.replace(taskCompletionNotificationPattern, taskCompletionNotificationReplacement)
.replace(/<current_datetime>.*?<\/current_datetime>/g, "")
.replace(/<reminder>[\s\S]*?<\/reminder>/g, "")
.replace(/<system_reminder>[\s\S]*?<\/system_reminder>/g, "")
Expand All @@ -1045,6 +1047,24 @@ function normalizeUserMessage(content: string): string {
.trim();
}

const taskCompletionNotificationPattern =
/Use read_agent with agent_id "([^"]+)" to retrieve unread results\./g;
const taskCompletionNotificationReplacement =
'Use read_agent with agent_id "$1" to retrieve the full results.';

function normalizeStoredUserMessages(conversations: NormalizedConversation[]) {
for (const conversation of conversations) {
for (const message of conversation.messages) {
if (message.role === "user" && typeof message.content === "string") {
message.content = message.content.replace(
taskCompletionNotificationPattern,
taskCompletionNotificationReplacement,
);
}
}
}
}

function normalizeSkillContextFrontmatter(content: string): string {
// Runtime versions may include or omit SKILL.md metadata in the prompt context.
return content.replace(
Expand Down
Loading