From fce935f76d586cb7dcd9de2041648f174b9c9abe Mon Sep 17 00:00:00 2001 From: David Gageot Date: Sat, 14 Mar 2026 09:02:43 +0100 Subject: [PATCH] Extract compaction logic into dedicated pkg/compaction package Move session compaction helpers (EstimateMessageTokens, ShouldCompact, HasConversationMessages, BuildPrompt, SystemPrompt) out of pkg/runtime into the new pkg/compaction package so the logic can be reused and tested independently. - Add pkg/compaction with compaction.go, compaction_test.go, and embedded prompt files. - Remove sessionCompactor struct; replace with doCompact method on LocalRuntime and a standalone runSummarization function. - Simplify compactIfNeeded and RunStream to call compaction.ShouldCompact. - Delete pkg/runtime/prompts/compaction-{system,user}.txt (moved). - Remove estimateMessageTokens and its tests from pkg/runtime. Assisted-By: docker-agent --- pkg/compaction/compaction.go | 134 +++++++++ pkg/compaction/compaction_test.go | 280 ++++++++++++++++++ .../prompts/compaction-system.txt | 0 .../prompts/compaction-user.txt | 0 pkg/runtime/loop.go | 15 +- pkg/runtime/runtime.go | 8 +- pkg/runtime/runtime_test.go | 85 ------ pkg/runtime/session_compaction.go | 126 +++----- pkg/runtime/streaming.go | 42 --- 9 files changed, 466 insertions(+), 224 deletions(-) create mode 100644 pkg/compaction/compaction.go create mode 100644 pkg/compaction/compaction_test.go rename pkg/{runtime => compaction}/prompts/compaction-system.txt (100%) rename pkg/{runtime => compaction}/prompts/compaction-user.txt (100%) diff --git a/pkg/compaction/compaction.go b/pkg/compaction/compaction.go new file mode 100644 index 000000000..d2af35d99 --- /dev/null +++ b/pkg/compaction/compaction.go @@ -0,0 +1,134 @@ +// Package compaction provides conversation compaction (summarization) for +// chat sessions that approach their model's context window limit. +// +// It is designed as a standalone component that can be used independently of +// the runtime loop. The package exposes: +// +// - [BuildPrompt]: prepares a conversation for summarization by appending +// the compaction prompt and sanitizing message costs. +// - [ShouldCompact]: decides whether a session needs compaction based on +// token usage and context window limits. +// - [EstimateMessageTokens]: a fast heuristic for estimating the token +// count of a single chat message. +// - [HasConversationMessages]: checks whether a message list contains any +// non-system messages worth summarizing. +package compaction + +import ( + _ "embed" + "time" + + "github.com/docker/docker-agent/pkg/chat" +) + +var ( + //go:embed prompts/compaction-system.txt + SystemPrompt string + + //go:embed prompts/compaction-user.txt + userPrompt string +) + +// contextThreshold is the fraction of the context window at which compaction +// is triggered. When the estimated token usage exceeds this fraction of the +// context limit, compaction is recommended. +const contextThreshold = 0.9 + +// Result holds the outcome of a compaction operation. +type Result struct { + // Summary is the generated summary text. + Summary string + + // InputTokens is the token count reported by the summarization model, + // used as an approximation of the new context size after compaction. + InputTokens int64 + + // Cost is the cost of the summarization request in dollars. + Cost float64 +} + +// BuildPrompt prepares the messages for a summarization request. +// It clones the conversation (zeroing per-message costs so they aren't +// double-counted), then appends a user message containing the compaction +// prompt. If additionalPrompt is non-empty it is included as extra +// instructions. +// +// Callers should first check [HasConversationMessages] to avoid sending +// an empty conversation to the model. +func BuildPrompt(messages []chat.Message, additionalPrompt string) []chat.Message { + prompt := userPrompt + if additionalPrompt != "" { + prompt += "\n\nAdditional instructions from user: " + additionalPrompt + } + + out := make([]chat.Message, len(messages), len(messages)+1) + for i, msg := range messages { + cloned := msg + cloned.Cost = 0 + out[i] = cloned + } + out = append(out, chat.Message{ + Role: chat.MessageRoleUser, + Content: prompt, + CreatedAt: time.Now().Format(time.RFC3339), + }) + + return out +} + +// ShouldCompact reports whether a session's context usage has crossed the +// compaction threshold. It returns true when the estimated total token count +// (input + output + addedTokens) exceeds [contextThreshold] (90%) of +// contextLimit. A non-positive contextLimit is treated as unlimited and +// always returns false. +func ShouldCompact(inputTokens, outputTokens, addedTokens, contextLimit int64) bool { + if contextLimit <= 0 { + return false + } + estimated := inputTokens + outputTokens + addedTokens + return estimated > int64(float64(contextLimit)*contextThreshold) +} + +// EstimateMessageTokens returns a rough token-count estimate for a single +// chat message based on its text length. This is intentionally conservative +// (overestimates) so that proactive compaction fires before we hit the limit. +// +// The estimate accounts for message content, multi-content text parts, +// reasoning content, tool call arguments, and a small per-message overhead +// for role/metadata tokens. +func EstimateMessageTokens(msg *chat.Message) int64 { + // charsPerToken: average characters per token. 4 is a widely-used + // heuristic for English; slightly overestimates for code/JSON (~3.5). + const charsPerToken = 4 + + // perMessageOverhead: role, ToolCallID, delimiters, etc. + const perMessageOverhead = 5 + + var chars int + chars += len(msg.Content) + for _, part := range msg.MultiContent { + chars += len(part.Text) + } + chars += len(msg.ReasoningContent) + for _, tc := range msg.ToolCalls { + chars += len(tc.Function.Arguments) + chars += len(tc.Function.Name) + } + + if chars == 0 { + return perMessageOverhead + } + return int64(chars/charsPerToken) + perMessageOverhead +} + +// HasConversationMessages reports whether messages contains at least one +// non-system message. A session with only system prompts has no conversation +// to summarize. +func HasConversationMessages(messages []chat.Message) bool { + for _, msg := range messages { + if msg.Role != chat.MessageRoleSystem { + return true + } + } + return false +} diff --git a/pkg/compaction/compaction_test.go b/pkg/compaction/compaction_test.go new file mode 100644 index 000000000..37103aa39 --- /dev/null +++ b/pkg/compaction/compaction_test.go @@ -0,0 +1,280 @@ +package compaction + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/tools" +) + +func TestEstimateMessageTokens(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + msg chat.Message + expected int64 + }{ + { + name: "empty message returns overhead only", + msg: chat.Message{}, + expected: 5, // perMessageOverhead + }, + { + name: "text-only message", + msg: chat.Message{Content: "Hello, world!"}, // 13 chars → 13/4 = 3 + 5 = 8 + expected: 8, + }, + { + name: "multi-content text parts", + msg: chat.Message{ + MultiContent: []chat.MessagePart{ + {Type: chat.MessagePartTypeText, Text: "first part"}, // 10 chars + {Type: chat.MessagePartTypeText, Text: "second part"}, // 11 chars + }, + }, + // 21 total chars → 21/4 = 5 + 5 overhead = 10 + expected: 10, + }, + { + name: "message with tool calls", + msg: chat.Message{ + ToolCalls: []tools.ToolCall{ + { + Function: tools.FunctionCall{ + Name: "read_file", // 9 chars + Arguments: `{"path":"/tmp/test.txt"}`, // 24 chars + }, + }, + }, + }, + // 33 chars → 33/4 = 8 + 5 overhead = 13 + expected: 13, + }, + { + name: "message with reasoning content", + msg: chat.Message{ + Content: "answer", // 6 chars + ReasoningContent: "Let me think about this carefully step by step", // 47 chars + }, + // 53 chars → 53/4 = 13 + 5 overhead = 18 + expected: 18, + }, + { + name: "combined content types", + msg: chat.Message{ + Content: "result", // 6 chars + ReasoningContent: "thinking", // 8 chars + MultiContent: []chat.MessagePart{{Text: "extra detail"}}, // 12 chars + ToolCalls: []tools.ToolCall{ + {Function: tools.FunctionCall{Name: "cmd", Arguments: `{"x":"y"}`}}, // 3 + 9 = 12 chars + }, + }, + // 38 total chars → 38/4 = 9 + 5 overhead = 14 + expected: 14, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := EstimateMessageTokens(&tt.msg) + assert.Equal(t, tt.expected, got) + }) + } +} + +func TestShouldCompact(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input int64 + output int64 + added int64 + contextLimit int64 + want bool + }{ + { + name: "below threshold", + input: 5000, + output: 2000, + added: 0, + contextLimit: 100000, + want: false, + }, + { + name: "exactly at 90% boundary", + input: 90000, + output: 0, + added: 0, + contextLimit: 100000, + want: false, // 90000 == int64(100000*0.9), need > not >= + }, + { + name: "just above 90% threshold", + input: 90001, + output: 0, + added: 0, + contextLimit: 100000, + want: true, + }, + { + name: "tool results push past threshold", + input: 70000, + output: 10000, + added: 15000, + contextLimit: 100000, + want: true, // 95000 > 90000 + }, + { + name: "zero context limit means unlimited", + input: 999999, + output: 999999, + added: 999999, + contextLimit: 0, + want: false, + }, + { + name: "negative context limit means unlimited", + input: 999999, + output: 999999, + added: 999999, + contextLimit: -1, + want: false, + }, + { + name: "all zeros", + input: 0, + output: 0, + added: 0, + contextLimit: 100000, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := ShouldCompact(tt.input, tt.output, tt.added, tt.contextLimit) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestHasConversationMessages(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + messages []chat.Message + want bool + }{ + { + name: "empty", + messages: nil, + want: false, + }, + { + name: "system only", + messages: []chat.Message{ + {Role: chat.MessageRoleSystem, Content: "You are helpful."}, + }, + want: false, + }, + { + name: "system and user", + messages: []chat.Message{ + {Role: chat.MessageRoleSystem, Content: "You are helpful."}, + {Role: chat.MessageRoleUser, Content: "Hello"}, + }, + want: true, + }, + { + name: "only user", + messages: []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Hello"}, + }, + want: true, + }, + { + name: "assistant message", + messages: []chat.Message{ + {Role: chat.MessageRoleAssistant, Content: "Hi there"}, + }, + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := HasConversationMessages(tt.messages) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestBuildPrompt(t *testing.T) { + t.Parallel() + + messages := []chat.Message{ + {Role: chat.MessageRoleSystem, Content: "You are helpful."}, + {Role: chat.MessageRoleUser, Content: "Hello", Cost: 0.05}, + {Role: chat.MessageRoleAssistant, Content: "Hi!", Cost: 0.10}, + } + + t.Run("basic", func(t *testing.T) { + t.Parallel() + + out := BuildPrompt(messages, "") + + // Original messages + appended summarization prompt. + require.Len(t, out, 4) + + // Costs are zeroed. + for _, msg := range out[:3] { + assert.Zero(t, msg.Cost, "cost should be zeroed for %q", msg.Content) + } + + // Last message is the summarization prompt. + last := out[len(out)-1] + assert.Equal(t, chat.MessageRoleUser, last.Role) + assert.Contains(t, last.Content, "summary") + assert.NotEmpty(t, last.CreatedAt) + }) + + t.Run("with additional prompt", func(t *testing.T) { + t.Parallel() + + out := BuildPrompt(messages, "focus on code changes") + + last := out[len(out)-1] + assert.Contains(t, last.Content, "Additional instructions from user: focus on code changes") + }) + + t.Run("does not modify original messages", func(t *testing.T) { + t.Parallel() + + original := []chat.Message{ + {Role: chat.MessageRoleUser, Content: "Hello", Cost: 0.05}, + } + + _ = BuildPrompt(original, "") + + assert.InDelta(t, 0.05, original[0].Cost, 1e-9) + assert.Len(t, original, 1) + }) +} + +func TestPromptsAreEmbedded(t *testing.T) { + t.Parallel() + + assert.NotEmpty(t, SystemPrompt, "compaction system prompt should be embedded") + assert.NotEmpty(t, userPrompt, "compaction user prompt should be embedded") + assert.Contains(t, SystemPrompt, "summary") + assert.Contains(t, userPrompt, "summary") +} diff --git a/pkg/runtime/prompts/compaction-system.txt b/pkg/compaction/prompts/compaction-system.txt similarity index 100% rename from pkg/runtime/prompts/compaction-system.txt rename to pkg/compaction/prompts/compaction-system.txt diff --git a/pkg/runtime/prompts/compaction-user.txt b/pkg/compaction/prompts/compaction-user.txt similarity index 100% rename from pkg/runtime/prompts/compaction-user.txt rename to pkg/compaction/prompts/compaction-user.txt diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go index 5cb801c90..10a570ebf 100644 --- a/pkg/runtime/loop.go +++ b/pkg/runtime/loop.go @@ -16,6 +16,7 @@ import ( "github.com/docker/docker-agent/pkg/agent" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/compaction" "github.com/docker/docker-agent/pkg/model/provider" "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/modelerrors" @@ -252,11 +253,8 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c contextLimit = int64(m.Limit.Context) } - if m != nil && r.sessionCompaction { - contextLength := sess.InputTokens + sess.OutputTokens - if contextLength > int64(float64(contextLimit)*0.9) { - r.Summarize(ctx, sess, "", events) - } + if m != nil && r.sessionCompaction && compaction.ShouldCompact(sess.InputTokens, sess.OutputTokens, 0, contextLimit) { + r.Summarize(ctx, sess, "", events) } messages := sess.GetMessages(a) @@ -466,11 +464,10 @@ func (r *LocalRuntime) compactIfNeeded( newMessages := sess.GetAllMessages()[messageCountBefore:] var addedTokens int64 for _, msg := range newMessages { - addedTokens += estimateMessageTokens(&msg.Message) + addedTokens += compaction.EstimateMessageTokens(&msg.Message) } - estimatedTotal := sess.InputTokens + sess.OutputTokens + addedTokens - if estimatedTotal <= int64(float64(contextLimit)*0.9) { + if !compaction.ShouldCompact(sess.InputTokens, sess.OutputTokens, addedTokens, contextLimit) { return } @@ -479,7 +476,7 @@ func (r *LocalRuntime) compactIfNeeded( "input_tokens", sess.InputTokens, "output_tokens", sess.OutputTokens, "added_estimated_tokens", addedTokens, - "estimated_total", estimatedTotal, + "estimated_total", sess.InputTokens+sess.OutputTokens+addedTokens, "context_limit", contextLimit, ) r.Summarize(ctx, sess, "", events) diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go index fd7e35e7a..5d01d55f8 100644 --- a/pkg/runtime/runtime.go +++ b/pkg/runtime/runtime.go @@ -193,7 +193,6 @@ type LocalRuntime struct { elicitationEventsChannel chan Event // Current events channel for sending elicitation requests elicitationEventsChannelMux sync.RWMutex // Protects elicitationEventsChannel ragInitialized atomic.Bool - sessionCompactor *sessionCompactor sessionStore session.Store workingDir string // Working directory for hooks execution env []string // Environment variables for hooks execution @@ -327,13 +326,10 @@ func NewLocalRuntime(agents *team.Team, opts ...Opt) (*LocalRuntime, error) { return nil, err } - model := defaultAgent.Model() - if model == nil { + if defaultAgent.Model() == nil { return nil, fmt.Errorf("agent %s has no valid model", defaultAgent.Name()) } - r.sessionCompactor = newSessionCompactor(model, r.sessionStore) - // Register runtime-managed tool handlers once during construction. // This avoids concurrent map writes when multiple goroutines call // RunStream on the same runtime (e.g. background agent sessions). @@ -1008,7 +1004,7 @@ func (r *LocalRuntime) startSpan(ctx context.Context, name string, opts ...trace // for the summarization (e.g., "focus on code changes" or "include action items"). func (r *LocalRuntime) Summarize(ctx context.Context, sess *session.Session, additionalPrompt string, events chan Event) { a := r.resolveSessionAgent(sess) - r.sessionCompactor.Compact(ctx, sess, additionalPrompt, events, a.Name()) + r.doCompact(ctx, sess, a, additionalPrompt, events) // Emit a TokenUsageEvent so the sidebar immediately reflects the // compaction: tokens drop to the summary size, context % drops, and diff --git a/pkg/runtime/runtime_test.go b/pkg/runtime/runtime_test.go index 88e5c9824..916f9d3fa 100644 --- a/pkg/runtime/runtime_test.go +++ b/pkg/runtime/runtime_test.go @@ -1823,91 +1823,6 @@ func TestStripImageContent(t *testing.T) { } } -func TestEstimateMessageTokens(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - msg chat.Message - expected int64 - }{ - { - name: "empty message returns overhead only", - msg: chat.Message{}, - expected: 5, // perMessageOverhead - }, - { - name: "text-only message", - msg: chat.Message{Content: "Hello, world!"}, // 13 chars → 13/4 = 3 + 5 overhead = 8 - expected: 8, - }, - { - name: "multi-content text parts", - msg: chat.Message{ - MultiContent: []chat.MessagePart{ - {Type: chat.MessagePartTypeText, Text: "first part"}, // 10 chars - {Type: chat.MessagePartTypeText, Text: "second part"}, // 11 chars - }, - }, - // 21 total chars → 21/4 = 5 + 5 overhead = 10 - expected: 10, - }, - { - name: "message with tool calls", - msg: chat.Message{ - ToolCalls: []tools.ToolCall{ - { - Function: tools.FunctionCall{ - Name: "read_file", // 9 chars - Arguments: `{"path":"/tmp/test.txt"}`, // 24 chars - }, - }, - }, - }, - // 33 chars → 33/4 = 8 + 5 overhead = 13 - expected: 13, - }, - { - name: "message with reasoning content", - msg: chat.Message{ - Content: "answer", // 6 chars - ReasoningContent: "Let me think about this carefully step by step", // 47 chars - }, - // 53 chars → 53/4 = 13 + 5 overhead = 18 - expected: 18, - }, - { - name: "combined content types", - msg: chat.Message{ - Content: "result", // 6 chars - ReasoningContent: "thinking", // 8 chars - MultiContent: []chat.MessagePart{{Text: "extra detail"}}, // 12 chars - ToolCalls: []tools.ToolCall{ - {Function: tools.FunctionCall{Name: "cmd", Arguments: `{"x":"y"}`}}, // 3 + 9 = 12 chars - }, - }, - // 38 chars → 38/4 = 9 + 5 overhead = 14 - expected: 14, - }, - { - name: "large tool result", - msg: chat.Message{ - Content: string(make([]byte, 40000)), // 40000 null bytes - }, - // 40000/4 = 10000 + 5 overhead = 10005 - expected: 10005, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - got := estimateMessageTokens(&tt.msg) - assert.Equal(t, tt.expected, got, "estimateMessageTokens mismatch") - }) - } -} - // TestResolveSessionAgent_PinnedAgent verifies that resolveSessionAgent returns // the session-pinned agent when AgentName is set, even though the runtime's // currentAgent points elsewhere (root). Before the fix, the shared currentAgent diff --git a/pkg/runtime/session_compaction.go b/pkg/runtime/session_compaction.go index 62b3d2404..72db1e184 100644 --- a/pkg/runtime/session_compaction.go +++ b/pkg/runtime/session_compaction.go @@ -2,121 +2,83 @@ package runtime import ( "context" - _ "embed" "log/slog" - "time" "github.com/docker/docker-agent/pkg/agent" "github.com/docker/docker-agent/pkg/chat" + "github.com/docker/docker-agent/pkg/compaction" "github.com/docker/docker-agent/pkg/model/provider" "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/session" "github.com/docker/docker-agent/pkg/team" ) -//go:embed prompts/compaction-system.txt -var compactionSystemPrompt string +// runSummarization sends the prepared messages through a one-shot runtime +// and returns the model's summary together with the output token count and +// cost. The runtime is created with compaction disabled so it cannot recurse. +func runSummarization(ctx context.Context, model provider.Provider, messages []chat.Message) (compaction.Result, error) { + summaryModel := provider.CloneWithOptions(ctx, model, options.WithStructuredOutput(nil)) + root := agent.New("root", compaction.SystemPrompt, agent.WithModel(summaryModel)) + t := team.New(team.WithAgents(root)) -//go:embed prompts/compaction-user.txt -var compactionUserPrompt string - -type sessionCompactor struct { - model provider.Provider - sessionStore session.Store -} + sess := session.New() + sess.Title = "Generating summary..." + for _, msg := range messages { + sess.AddMessage(&session.Message{Message: msg}) + } -func newSessionCompactor(model provider.Provider, sessionStore session.Store) *sessionCompactor { - return &sessionCompactor{ - model: model, - sessionStore: sessionStore, + rt, err := New(t, WithSessionCompaction(false)) + if err != nil { + return compaction.Result{}, err + } + if _, err = rt.Run(ctx, sess); err != nil { + return compaction.Result{}, err } + + return compaction.Result{ + Summary: sess.GetLastAssistantMessageContent(), + InputTokens: sess.OutputTokens, + Cost: sess.TotalCost(), + }, nil } -func (c *sessionCompactor) Compact(ctx context.Context, sess *session.Session, additionalPrompt string, events chan Event, agentName string) { +// doCompact runs compaction on a session and applies the result (events, +// persistence, token count updates). The agent is used to extract the +// conversation from the session and to obtain the model for summarization. +func (r *LocalRuntime) doCompact(ctx context.Context, sess *session.Session, a *agent.Agent, additionalPrompt string, events chan Event) { slog.Debug("Generating summary for session", "session_id", sess.ID) - events <- SessionCompaction(sess.ID, "started", agentName) + events <- SessionCompaction(sess.ID, "started", a.Name()) defer func() { - events <- SessionCompaction(sess.ID, "completed", agentName) + events <- SessionCompaction(sess.ID, "completed", a.Name()) }() - summaryModel := provider.CloneWithOptions(ctx, c.model, options.WithStructuredOutput(nil)) - root := agent.New("root", compactionSystemPrompt, agent.WithModel(summaryModel)) - newTeam := team.New(team.WithAgents(root)) - - messages := sess.GetMessages(root) - if !hasConversationMessages(messages) { - events <- Warning("Session is empty. Start a conversation before compacting.", agentName) + messages := sess.GetMessages(a) + if !compaction.HasConversationMessages(messages) { + if additionalPrompt == "" { + events <- Warning("Session is empty. Start a conversation before compacting.", a.Name()) + } return } - summarySession := session.New() - summarySession.Title = "Generating summary..." - for _, msg := range messages { - // Copy messages without their cost — the summary session should - // only track the cost of generating the summary itself, not the - // original conversation costs (which are already accounted for - // in the parent session). - cloned := msg - cloned.Cost = 0 - summarySession.AddMessage(&session.Message{Message: cloned}) - } - - prompt := compactionUserPrompt - if additionalPrompt != "" { - prompt += "\n\nAdditional instructions from user: " + additionalPrompt - } - summarySession.AddMessage(&session.Message{ - Message: chat.Message{ - Role: chat.MessageRoleUser, - Content: prompt, - CreatedAt: time.Now().Format(time.RFC3339), - }, - }) - - summaryRuntime, err := New(newTeam, WithSessionCompaction(false)) - if err != nil { - slog.Error("Failed to create summary generator runtime", "error", err) - events <- Error(err.Error()) - return - } + prepared := compaction.BuildPrompt(messages, additionalPrompt) - _, err = summaryRuntime.Run(ctx, summarySession) + result, err := runSummarization(ctx, a.Model(), prepared) if err != nil { slog.Error("Failed to generate session summary", "error", err) events <- Error(err.Error()) return } - - summary := summarySession.GetLastAssistantMessageContent() - if summary == "" { + if result.Summary == "" { return } - compactionCost := summarySession.TotalCost() - - // Store the compaction cost on the summary item so that TotalCost() - // can discover it when walking the session tree. - sess.Messages = append(sess.Messages, session.Item{Summary: summary, Cost: compactionCost}) - - // Update the parent session's token counts to reflect the compacted - // context. The summary model's output tokens approximate the new - // context size (system prompt + summary). The old counts reflected - // the pre-compaction context and are no longer meaningful. - sess.InputTokens = summarySession.OutputTokens + sess.Messages = append(sess.Messages, session.Item{Summary: result.Summary, Cost: result.Cost}) + sess.InputTokens = result.InputTokens sess.OutputTokens = 0 - _ = c.sessionStore.UpdateSession(ctx, sess) - - slog.Debug("Generated session summary", "session_id", sess.ID, "summary_length", len(summary), "compaction_cost", compactionCost) - events <- SessionSummary(sess.ID, summary, agentName) -} + _ = r.sessionStore.UpdateSession(ctx, sess) -func hasConversationMessages(messages []chat.Message) bool { - for _, msg := range messages { - if msg.Role != chat.MessageRoleSystem { - return true - } - } - return false + slog.Debug("Generated session summary", "session_id", sess.ID, "summary_length", len(result.Summary), "compaction_cost", result.Cost) + events <- SessionSummary(sess.ID, result.Summary, a.Name()) } diff --git a/pkg/runtime/streaming.go b/pkg/runtime/streaming.go index 3d06a8916..62b132076 100644 --- a/pkg/runtime/streaming.go +++ b/pkg/runtime/streaming.go @@ -232,45 +232,3 @@ func stripImageContent(messages []chat.Message) []chat.Message { } return result } - -// charsPerToken is the average number of characters per token used for -// estimation. A value of 4 is a widely-used heuristic for English text; -// it slightly overestimates token counts for code/JSON (which is ~3.5), -// making compaction trigger earlier — the safe direction. -const charsPerToken = 4 - -// estimateMessageTokens returns a rough token-count estimate for a single -// chat message based on its text length. This is intentionally conservative -// (overestimates) so that proactive compaction fires before we hit the limit. -// The estimate includes the message content, multi-content text parts, and -// a small overhead per message for role/metadata tokens. -func estimateMessageTokens(msg *chat.Message) int64 { - var chars int - - // Primary text content. - chars += len(msg.Content) - - // Multi-content parts (e.g., tool results with image descriptions). - for _, part := range msg.MultiContent { - chars += len(part.Text) - } - - // Reasoning / thinking content. - chars += len(msg.ReasoningContent) - - // Tool call arguments (they count toward input tokens on the next turn). - for _, tc := range msg.ToolCalls { - chars += len(tc.Function.Arguments) - chars += len(tc.Function.Name) - } - - // Per-message overhead: role, ToolCallID, delimiters, etc. - // Models typically use 3-7 tokens for message framing. - const perMessageOverhead = 5 - - if chars == 0 { - return perMessageOverhead - } - - return int64(chars/charsPerToken) + perMessageOverhead -}