From fce935f76d586cb7dcd9de2041648f174b9c9abe Mon Sep 17 00:00:00 2001
From: David Gageot <david.gageot@docker.com>
Date: Sat, 14 Mar 2026 09:02:43 +0100
Subject: [PATCH] Extract compaction logic into dedicated pkg/compaction
 package

Move session compaction helpers (EstimateMessageTokens, ShouldCompact,
HasConversationMessages, BuildPrompt, SystemPrompt) out of pkg/runtime
into the new pkg/compaction package so the logic can be reused and
tested independently.

- Add pkg/compaction with compaction.go, compaction_test.go, and
  embedded prompt files.
- Remove sessionCompactor struct; replace with doCompact method on
  LocalRuntime and a standalone runSummarization function.
- Simplify compactIfNeeded and RunStream to call compaction.ShouldCompact.
- Delete pkg/runtime/prompts/compaction-{system,user}.txt (moved).
- Remove estimateMessageTokens and its tests from pkg/runtime.

Assisted-By: docker-agent
---
 pkg/compaction/compaction.go                  | 134 +++++++++
 pkg/compaction/compaction_test.go             | 280 ++++++++++++++++++
 .../prompts/compaction-system.txt             |   0
 .../prompts/compaction-user.txt               |   0
 pkg/runtime/loop.go                           |  15 +-
 pkg/runtime/runtime.go                        |   8 +-
 pkg/runtime/runtime_test.go                   |  85 ------
 pkg/runtime/session_compaction.go             | 126 +++-----
 pkg/runtime/streaming.go                      |  42 ---
 9 files changed, 466 insertions(+), 224 deletions(-)
 create mode 100644 pkg/compaction/compaction.go
 create mode 100644 pkg/compaction/compaction_test.go
 rename pkg/{runtime => compaction}/prompts/compaction-system.txt (100%)
 rename pkg/{runtime => compaction}/prompts/compaction-user.txt (100%)

diff --git a/pkg/compaction/compaction.go b/pkg/compaction/compaction.go
new file mode 100644
index 000000000..d2af35d99
--- /dev/null
+++ b/pkg/compaction/compaction.go
@@ -0,0 +1,134 @@
+// Package compaction provides conversation compaction (summarization) for
+// chat sessions that approach their model's context window limit.
+//
+// It is designed as a standalone component that can be used independently of
+// the runtime loop. The package exposes:
+//
+//   - [BuildPrompt]: prepares a conversation for summarization by appending
+//     the compaction prompt and sanitizing message costs.
+//   - [ShouldCompact]: decides whether a session needs compaction based on
+//     token usage and context window limits.
+//   - [EstimateMessageTokens]: a fast heuristic for estimating the token
+//     count of a single chat message.
+//   - [HasConversationMessages]: checks whether a message list contains any
+//     non-system messages worth summarizing.
+package compaction
+
+import (
+	_ "embed"
+	"time"
+
+	"github.com/docker/docker-agent/pkg/chat"
+)
+
+var (
+	//go:embed prompts/compaction-system.txt
+	SystemPrompt string
+
+	//go:embed prompts/compaction-user.txt
+	userPrompt string
+)
+
+// contextThreshold is the fraction of the context window at which compaction
+// is triggered. When the estimated token usage exceeds this fraction of the
+// context limit, compaction is recommended.
+const contextThreshold = 0.9
+
+// Result holds the outcome of a compaction operation.
+type Result struct {
+	// Summary is the generated summary text.
+	Summary string
+
+	// InputTokens is the token count reported by the summarization model,
+	// used as an approximation of the new context size after compaction.
+	InputTokens int64
+
+	// Cost is the cost of the summarization request in dollars.
+	Cost float64
+}
+
+// BuildPrompt prepares the messages for a summarization request.
+// It clones the conversation (zeroing per-message costs so they aren't
+// double-counted), then appends a user message containing the compaction
+// prompt. If additionalPrompt is non-empty it is included as extra
+// instructions.
+//
+// Callers should first check [HasConversationMessages] to avoid sending
+// an empty conversation to the model.
+func BuildPrompt(messages []chat.Message, additionalPrompt string) []chat.Message {
+	prompt := userPrompt
+	if additionalPrompt != "" {
+		prompt += "\n\nAdditional instructions from user: " + additionalPrompt
+	}
+
+	out := make([]chat.Message, len(messages), len(messages)+1)
+	for i, msg := range messages {
+		cloned := msg
+		cloned.Cost = 0
+		out[i] = cloned
+	}
+	out = append(out, chat.Message{
+		Role:      chat.MessageRoleUser,
+		Content:   prompt,
+		CreatedAt: time.Now().Format(time.RFC3339),
+	})
+
+	return out
+}
+
+// ShouldCompact reports whether a session's context usage has crossed the
+// compaction threshold. It returns true when the estimated total token count
+// (input + output + addedTokens) exceeds [contextThreshold] (90%) of
+// contextLimit. A non-positive contextLimit is treated as unlimited and
+// always returns false.
+func ShouldCompact(inputTokens, outputTokens, addedTokens, contextLimit int64) bool {
+	if contextLimit <= 0 {
+		return false
+	}
+	estimated := inputTokens + outputTokens + addedTokens
+	return estimated > int64(float64(contextLimit)*contextThreshold)
+}
+
+// EstimateMessageTokens returns a rough token-count estimate for a single
+// chat message based on its text length. This is intentionally conservative
+// (overestimates) so that proactive compaction fires before we hit the limit.
+//
+// The estimate accounts for message content, multi-content text parts,
+// reasoning content, tool call arguments, and a small per-message overhead
+// for role/metadata tokens.
+func EstimateMessageTokens(msg *chat.Message) int64 {
+	// charsPerToken: average characters per token. 4 is a widely-used
+	// heuristic for English; slightly overestimates for code/JSON (~3.5).
+	const charsPerToken = 4
+
+	// perMessageOverhead: role, ToolCallID, delimiters, etc.
+	const perMessageOverhead = 5
+
+	var chars int
+	chars += len(msg.Content)
+	for _, part := range msg.MultiContent {
+		chars += len(part.Text)
+	}
+	chars += len(msg.ReasoningContent)
+	for _, tc := range msg.ToolCalls {
+		chars += len(tc.Function.Arguments)
+		chars += len(tc.Function.Name)
+	}
+
+	if chars == 0 {
+		return perMessageOverhead
+	}
+	return int64(chars/charsPerToken) + perMessageOverhead
+}
+
+// HasConversationMessages reports whether messages contains at least one
+// non-system message. A session with only system prompts has no conversation
+// to summarize.
+func HasConversationMessages(messages []chat.Message) bool {
+	for _, msg := range messages {
+		if msg.Role != chat.MessageRoleSystem {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/compaction/compaction_test.go b/pkg/compaction/compaction_test.go
new file mode 100644
index 000000000..37103aa39
--- /dev/null
+++ b/pkg/compaction/compaction_test.go
@@ -0,0 +1,280 @@
+package compaction
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/docker/docker-agent/pkg/chat"
+	"github.com/docker/docker-agent/pkg/tools"
+)
+
+func TestEstimateMessageTokens(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name     string
+		msg      chat.Message
+		expected int64
+	}{
+		{
+			name:     "empty message returns overhead only",
+			msg:      chat.Message{},
+			expected: 5, // perMessageOverhead
+		},
+		{
+			name:     "text-only message",
+			msg:      chat.Message{Content: "Hello, world!"}, // 13 chars → 13/4 = 3 + 5 = 8
+			expected: 8,
+		},
+		{
+			name: "multi-content text parts",
+			msg: chat.Message{
+				MultiContent: []chat.MessagePart{
+					{Type: chat.MessagePartTypeText, Text: "first part"},  // 10 chars
+					{Type: chat.MessagePartTypeText, Text: "second part"}, // 11 chars
+				},
+			},
+			// 21 total chars → 21/4 = 5 + 5 overhead = 10
+			expected: 10,
+		},
+		{
+			name: "message with tool calls",
+			msg: chat.Message{
+				ToolCalls: []tools.ToolCall{
+					{
+						Function: tools.FunctionCall{
+							Name:      "read_file",                // 9 chars
+							Arguments: `{"path":"/tmp/test.txt"}`, // 24 chars
+						},
+					},
+				},
+			},
+			// 33 chars → 33/4 = 8 + 5 overhead = 13
+			expected: 13,
+		},
+		{
+			name: "message with reasoning content",
+			msg: chat.Message{
+				Content:          "answer",                                         // 6 chars
+				ReasoningContent: "Let me think about this carefully step by step", // 47 chars
+			},
+			// 53 chars → 53/4 = 13 + 5 overhead = 18
+			expected: 18,
+		},
+		{
+			name: "combined content types",
+			msg: chat.Message{
+				Content:          "result",                                   // 6 chars
+				ReasoningContent: "thinking",                                 // 8 chars
+				MultiContent:     []chat.MessagePart{{Text: "extra detail"}}, // 12 chars
+				ToolCalls: []tools.ToolCall{
+					{Function: tools.FunctionCall{Name: "cmd", Arguments: `{"x":"y"}`}}, // 3 + 9 = 12 chars
+				},
+			},
+			// 38 total chars → 38/4 = 9 + 5 overhead = 14
+			expected: 14,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := EstimateMessageTokens(&tt.msg)
+			assert.Equal(t, tt.expected, got)
+		})
+	}
+}
+
+func TestShouldCompact(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name         string
+		input        int64
+		output       int64
+		added        int64
+		contextLimit int64
+		want         bool
+	}{
+		{
+			name:         "below threshold",
+			input:        5000,
+			output:       2000,
+			added:        0,
+			contextLimit: 100000,
+			want:         false,
+		},
+		{
+			name:         "exactly at 90% boundary",
+			input:        90000,
+			output:       0,
+			added:        0,
+			contextLimit: 100000,
+			want:         false, // 90000 == int64(100000*0.9), need > not >=
+		},
+		{
+			name:         "just above 90% threshold",
+			input:        90001,
+			output:       0,
+			added:        0,
+			contextLimit: 100000,
+			want:         true,
+		},
+		{
+			name:         "tool results push past threshold",
+			input:        70000,
+			output:       10000,
+			added:        15000,
+			contextLimit: 100000,
+			want:         true, // 95000 > 90000
+		},
+		{
+			name:         "zero context limit means unlimited",
+			input:        999999,
+			output:       999999,
+			added:        999999,
+			contextLimit: 0,
+			want:         false,
+		},
+		{
+			name:         "negative context limit means unlimited",
+			input:        999999,
+			output:       999999,
+			added:        999999,
+			contextLimit: -1,
+			want:         false,
+		},
+		{
+			name:         "all zeros",
+			input:        0,
+			output:       0,
+			added:        0,
+			contextLimit: 100000,
+			want:         false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := ShouldCompact(tt.input, tt.output, tt.added, tt.contextLimit)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestHasConversationMessages(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name     string
+		messages []chat.Message
+		want     bool
+	}{
+		{
+			name:     "empty",
+			messages: nil,
+			want:     false,
+		},
+		{
+			name: "system only",
+			messages: []chat.Message{
+				{Role: chat.MessageRoleSystem, Content: "You are helpful."},
+			},
+			want: false,
+		},
+		{
+			name: "system and user",
+			messages: []chat.Message{
+				{Role: chat.MessageRoleSystem, Content: "You are helpful."},
+				{Role: chat.MessageRoleUser, Content: "Hello"},
+			},
+			want: true,
+		},
+		{
+			name: "only user",
+			messages: []chat.Message{
+				{Role: chat.MessageRoleUser, Content: "Hello"},
+			},
+			want: true,
+		},
+		{
+			name: "assistant message",
+			messages: []chat.Message{
+				{Role: chat.MessageRoleAssistant, Content: "Hi there"},
+			},
+			want: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			got := HasConversationMessages(tt.messages)
+			assert.Equal(t, tt.want, got)
+		})
+	}
+}
+
+func TestBuildPrompt(t *testing.T) {
+	t.Parallel()
+
+	messages := []chat.Message{
+		{Role: chat.MessageRoleSystem, Content: "You are helpful."},
+		{Role: chat.MessageRoleUser, Content: "Hello", Cost: 0.05},
+		{Role: chat.MessageRoleAssistant, Content: "Hi!", Cost: 0.10},
+	}
+
+	t.Run("basic", func(t *testing.T) {
+		t.Parallel()
+
+		out := BuildPrompt(messages, "")
+
+		// Original messages + appended summarization prompt.
+		require.Len(t, out, 4)
+
+		// Costs are zeroed.
+		for _, msg := range out[:3] {
+			assert.Zero(t, msg.Cost, "cost should be zeroed for %q", msg.Content)
+		}
+
+		// Last message is the summarization prompt.
+		last := out[len(out)-1]
+		assert.Equal(t, chat.MessageRoleUser, last.Role)
+		assert.Contains(t, last.Content, "summary")
+		assert.NotEmpty(t, last.CreatedAt)
+	})
+
+	t.Run("with additional prompt", func(t *testing.T) {
+		t.Parallel()
+
+		out := BuildPrompt(messages, "focus on code changes")
+
+		last := out[len(out)-1]
+		assert.Contains(t, last.Content, "Additional instructions from user: focus on code changes")
+	})
+
+	t.Run("does not modify original messages", func(t *testing.T) {
+		t.Parallel()
+
+		original := []chat.Message{
+			{Role: chat.MessageRoleUser, Content: "Hello", Cost: 0.05},
+		}
+
+		_ = BuildPrompt(original, "")
+
+		assert.InDelta(t, 0.05, original[0].Cost, 1e-9)
+		assert.Len(t, original, 1)
+	})
+}
+
+func TestPromptsAreEmbedded(t *testing.T) {
+	t.Parallel()
+
+	assert.NotEmpty(t, SystemPrompt, "compaction system prompt should be embedded")
+	assert.NotEmpty(t, userPrompt, "compaction user prompt should be embedded")
+	assert.Contains(t, SystemPrompt, "summary")
+	assert.Contains(t, userPrompt, "summary")
+}
diff --git a/pkg/runtime/prompts/compaction-system.txt b/pkg/compaction/prompts/compaction-system.txt
similarity index 100%
rename from pkg/runtime/prompts/compaction-system.txt
rename to pkg/compaction/prompts/compaction-system.txt
diff --git a/pkg/runtime/prompts/compaction-user.txt b/pkg/compaction/prompts/compaction-user.txt
similarity index 100%
rename from pkg/runtime/prompts/compaction-user.txt
rename to pkg/compaction/prompts/compaction-user.txt
diff --git a/pkg/runtime/loop.go b/pkg/runtime/loop.go
index 5cb801c90..10a570ebf 100644
--- a/pkg/runtime/loop.go
+++ b/pkg/runtime/loop.go
@@ -16,6 +16,7 @@ import (
 
 	"github.com/docker/docker-agent/pkg/agent"
 	"github.com/docker/docker-agent/pkg/chat"
+	"github.com/docker/docker-agent/pkg/compaction"
 	"github.com/docker/docker-agent/pkg/model/provider"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
 	"github.com/docker/docker-agent/pkg/modelerrors"
@@ -252,11 +253,8 @@ func (r *LocalRuntime) RunStream(ctx context.Context, sess *session.Session) <-c
 				contextLimit = int64(m.Limit.Context)
 			}
 
-			if m != nil && r.sessionCompaction {
-				contextLength := sess.InputTokens + sess.OutputTokens
-				if contextLength > int64(float64(contextLimit)*0.9) {
-					r.Summarize(ctx, sess, "", events)
-				}
+			if m != nil && r.sessionCompaction && compaction.ShouldCompact(sess.InputTokens, sess.OutputTokens, 0, contextLimit) {
+				r.Summarize(ctx, sess, "", events)
 			}
 
 			messages := sess.GetMessages(a)
@@ -466,11 +464,10 @@ func (r *LocalRuntime) compactIfNeeded(
 	newMessages := sess.GetAllMessages()[messageCountBefore:]
 	var addedTokens int64
 	for _, msg := range newMessages {
-		addedTokens += estimateMessageTokens(&msg.Message)
+		addedTokens += compaction.EstimateMessageTokens(&msg.Message)
 	}
 
-	estimatedTotal := sess.InputTokens + sess.OutputTokens + addedTokens
-	if estimatedTotal <= int64(float64(contextLimit)*0.9) {
+	if !compaction.ShouldCompact(sess.InputTokens, sess.OutputTokens, addedTokens, contextLimit) {
 		return
 	}
 
@@ -479,7 +476,7 @@ func (r *LocalRuntime) compactIfNeeded(
 		"input_tokens", sess.InputTokens,
 		"output_tokens", sess.OutputTokens,
 		"added_estimated_tokens", addedTokens,
-		"estimated_total", estimatedTotal,
+		"estimated_total", sess.InputTokens+sess.OutputTokens+addedTokens,
 		"context_limit", contextLimit,
 	)
 	r.Summarize(ctx, sess, "", events)
diff --git a/pkg/runtime/runtime.go b/pkg/runtime/runtime.go
index fd7e35e7a..5d01d55f8 100644
--- a/pkg/runtime/runtime.go
+++ b/pkg/runtime/runtime.go
@@ -193,7 +193,6 @@ type LocalRuntime struct {
 	elicitationEventsChannel    chan Event             // Current events channel for sending elicitation requests
 	elicitationEventsChannelMux sync.RWMutex           // Protects elicitationEventsChannel
 	ragInitialized              atomic.Bool
-	sessionCompactor            *sessionCompactor
 	sessionStore                session.Store
 	workingDir                  string   // Working directory for hooks execution
 	env                         []string // Environment variables for hooks execution
@@ -327,13 +326,10 @@ func NewLocalRuntime(agents *team.Team, opts ...Opt) (*LocalRuntime, error) {
 		return nil, err
 	}
 
-	model := defaultAgent.Model()
-	if model == nil {
+	if defaultAgent.Model() == nil {
 		return nil, fmt.Errorf("agent %s has no valid model", defaultAgent.Name())
 	}
 
-	r.sessionCompactor = newSessionCompactor(model, r.sessionStore)
-
 	// Register runtime-managed tool handlers once during construction.
 	// This avoids concurrent map writes when multiple goroutines call
 	// RunStream on the same runtime (e.g. background agent sessions).
@@ -1008,7 +1004,7 @@ func (r *LocalRuntime) startSpan(ctx context.Context, name string, opts ...trace
 // for the summarization (e.g., "focus on code changes" or "include action items").
 func (r *LocalRuntime) Summarize(ctx context.Context, sess *session.Session, additionalPrompt string, events chan Event) {
 	a := r.resolveSessionAgent(sess)
-	r.sessionCompactor.Compact(ctx, sess, additionalPrompt, events, a.Name())
+	r.doCompact(ctx, sess, a, additionalPrompt, events)
 
 	// Emit a TokenUsageEvent so the sidebar immediately reflects the
 	// compaction: tokens drop to the summary size, context % drops, and
diff --git a/pkg/runtime/runtime_test.go b/pkg/runtime/runtime_test.go
index 88e5c9824..916f9d3fa 100644
--- a/pkg/runtime/runtime_test.go
+++ b/pkg/runtime/runtime_test.go
@@ -1823,91 +1823,6 @@ func TestStripImageContent(t *testing.T) {
 	}
 }
 
-func TestEstimateMessageTokens(t *testing.T) {
-	t.Parallel()
-
-	tests := []struct {
-		name     string
-		msg      chat.Message
-		expected int64
-	}{
-		{
-			name:     "empty message returns overhead only",
-			msg:      chat.Message{},
-			expected: 5, // perMessageOverhead
-		},
-		{
-			name:     "text-only message",
-			msg:      chat.Message{Content: "Hello, world!"}, // 13 chars → 13/4 = 3 + 5 overhead = 8
-			expected: 8,
-		},
-		{
-			name: "multi-content text parts",
-			msg: chat.Message{
-				MultiContent: []chat.MessagePart{
-					{Type: chat.MessagePartTypeText, Text: "first part"},  // 10 chars
-					{Type: chat.MessagePartTypeText, Text: "second part"}, // 11 chars
-				},
-			},
-			// 21 total chars → 21/4 = 5 + 5 overhead = 10
-			expected: 10,
-		},
-		{
-			name: "message with tool calls",
-			msg: chat.Message{
-				ToolCalls: []tools.ToolCall{
-					{
-						Function: tools.FunctionCall{
-							Name:      "read_file",                // 9 chars
-							Arguments: `{"path":"/tmp/test.txt"}`, // 24 chars
-						},
-					},
-				},
-			},
-			// 33 chars → 33/4 = 8 + 5 overhead = 13
-			expected: 13,
-		},
-		{
-			name: "message with reasoning content",
-			msg: chat.Message{
-				Content:          "answer",                                         // 6 chars
-				ReasoningContent: "Let me think about this carefully step by step", // 47 chars
-			},
-			// 53 chars → 53/4 = 13 + 5 overhead = 18
-			expected: 18,
-		},
-		{
-			name: "combined content types",
-			msg: chat.Message{
-				Content:          "result",                                   // 6 chars
-				ReasoningContent: "thinking",                                 // 8 chars
-				MultiContent:     []chat.MessagePart{{Text: "extra detail"}}, // 12 chars
-				ToolCalls: []tools.ToolCall{
-					{Function: tools.FunctionCall{Name: "cmd", Arguments: `{"x":"y"}`}}, // 3 + 9 = 12 chars
-				},
-			},
-			// 38 chars → 38/4 = 9 + 5 overhead = 14
-			expected: 14,
-		},
-		{
-			name: "large tool result",
-			msg: chat.Message{
-				Content: string(make([]byte, 40000)), // 40000 null bytes
-			},
-			// 40000/4 = 10000 + 5 overhead = 10005
-			expected: 10005,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			t.Parallel()
-			got := estimateMessageTokens(&tt.msg)
-			assert.Equal(t, tt.expected, got, "estimateMessageTokens mismatch")
-		})
-	}
-}
-
 // TestResolveSessionAgent_PinnedAgent verifies that resolveSessionAgent returns
 // the session-pinned agent when AgentName is set, even though the runtime's
 // currentAgent points elsewhere (root). Before the fix, the shared currentAgent
diff --git a/pkg/runtime/session_compaction.go b/pkg/runtime/session_compaction.go
index 62b3d2404..72db1e184 100644
--- a/pkg/runtime/session_compaction.go
+++ b/pkg/runtime/session_compaction.go
@@ -2,121 +2,83 @@ package runtime
 
 import (
 	"context"
-	_ "embed"
 	"log/slog"
-	"time"
 
 	"github.com/docker/docker-agent/pkg/agent"
 	"github.com/docker/docker-agent/pkg/chat"
+	"github.com/docker/docker-agent/pkg/compaction"
 	"github.com/docker/docker-agent/pkg/model/provider"
 	"github.com/docker/docker-agent/pkg/model/provider/options"
 	"github.com/docker/docker-agent/pkg/session"
 	"github.com/docker/docker-agent/pkg/team"
 )
 
-//go:embed prompts/compaction-system.txt
-var compactionSystemPrompt string
+// runSummarization sends the prepared messages through a one-shot runtime
+// and returns the model's summary together with the output token count and
+// cost. The runtime is created with compaction disabled so it cannot recurse.
+func runSummarization(ctx context.Context, model provider.Provider, messages []chat.Message) (compaction.Result, error) {
+	summaryModel := provider.CloneWithOptions(ctx, model, options.WithStructuredOutput(nil))
+	root := agent.New("root", compaction.SystemPrompt, agent.WithModel(summaryModel))
+	t := team.New(team.WithAgents(root))
 
-//go:embed prompts/compaction-user.txt
-var compactionUserPrompt string
-
-type sessionCompactor struct {
-	model        provider.Provider
-	sessionStore session.Store
-}
+	sess := session.New()
+	sess.Title = "Generating summary..."
+	for _, msg := range messages {
+		sess.AddMessage(&session.Message{Message: msg})
+	}
 
-func newSessionCompactor(model provider.Provider, sessionStore session.Store) *sessionCompactor {
-	return &sessionCompactor{
-		model:        model,
-		sessionStore: sessionStore,
+	rt, err := New(t, WithSessionCompaction(false))
+	if err != nil {
+		return compaction.Result{}, err
+	}
+	if _, err = rt.Run(ctx, sess); err != nil {
+		return compaction.Result{}, err
 	}
+
+	return compaction.Result{
+		Summary:     sess.GetLastAssistantMessageContent(),
+		InputTokens: sess.OutputTokens,
+		Cost:        sess.TotalCost(),
+	}, nil
 }
 
-func (c *sessionCompactor) Compact(ctx context.Context, sess *session.Session, additionalPrompt string, events chan Event, agentName string) {
+// doCompact runs compaction on a session and applies the result (events,
+// persistence, token count updates). The agent is used to extract the
+// conversation from the session and to obtain the model for summarization.
+func (r *LocalRuntime) doCompact(ctx context.Context, sess *session.Session, a *agent.Agent, additionalPrompt string, events chan Event) {
 	slog.Debug("Generating summary for session", "session_id", sess.ID)
 
-	events <- SessionCompaction(sess.ID, "started", agentName)
+	events <- SessionCompaction(sess.ID, "started", a.Name())
 	defer func() {
-		events <- SessionCompaction(sess.ID, "completed", agentName)
+		events <- SessionCompaction(sess.ID, "completed", a.Name())
 	}()
 
-	summaryModel := provider.CloneWithOptions(ctx, c.model, options.WithStructuredOutput(nil))
-	root := agent.New("root", compactionSystemPrompt, agent.WithModel(summaryModel))
-	newTeam := team.New(team.WithAgents(root))
-
-	messages := sess.GetMessages(root)
-	if !hasConversationMessages(messages) {
-		events <- Warning("Session is empty. Start a conversation before compacting.", agentName)
+	messages := sess.GetMessages(a)
+	if !compaction.HasConversationMessages(messages) {
+		if additionalPrompt == "" {
+			events <- Warning("Session is empty. Start a conversation before compacting.", a.Name())
+		}
 		return
 	}
 
-	summarySession := session.New()
-	summarySession.Title = "Generating summary..."
-	for _, msg := range messages {
-		// Copy messages without their cost — the summary session should
-		// only track the cost of generating the summary itself, not the
-		// original conversation costs (which are already accounted for
-		// in the parent session).
-		cloned := msg
-		cloned.Cost = 0
-		summarySession.AddMessage(&session.Message{Message: cloned})
-	}
-
-	prompt := compactionUserPrompt
-	if additionalPrompt != "" {
-		prompt += "\n\nAdditional instructions from user: " + additionalPrompt
-	}
-	summarySession.AddMessage(&session.Message{
-		Message: chat.Message{
-			Role:      chat.MessageRoleUser,
-			Content:   prompt,
-			CreatedAt: time.Now().Format(time.RFC3339),
-		},
-	})
-
-	summaryRuntime, err := New(newTeam, WithSessionCompaction(false))
-	if err != nil {
-		slog.Error("Failed to create summary generator runtime", "error", err)
-		events <- Error(err.Error())
-		return
-	}
+	prepared := compaction.BuildPrompt(messages, additionalPrompt)
 
-	_, err = summaryRuntime.Run(ctx, summarySession)
+	result, err := runSummarization(ctx, a.Model(), prepared)
 	if err != nil {
 		slog.Error("Failed to generate session summary", "error", err)
 		events <- Error(err.Error())
 		return
 	}
-
-	summary := summarySession.GetLastAssistantMessageContent()
-	if summary == "" {
+	if result.Summary == "" {
 		return
 	}
 
-	compactionCost := summarySession.TotalCost()
-
-	// Store the compaction cost on the summary item so that TotalCost()
-	// can discover it when walking the session tree.
-	sess.Messages = append(sess.Messages, session.Item{Summary: summary, Cost: compactionCost})
-
-	// Update the parent session's token counts to reflect the compacted
-	// context. The summary model's output tokens approximate the new
-	// context size (system prompt + summary). The old counts reflected
-	// the pre-compaction context and are no longer meaningful.
-	sess.InputTokens = summarySession.OutputTokens
+	sess.Messages = append(sess.Messages, session.Item{Summary: result.Summary, Cost: result.Cost})
+	sess.InputTokens = result.InputTokens
 	sess.OutputTokens = 0
 
-	_ = c.sessionStore.UpdateSession(ctx, sess)
-
-	slog.Debug("Generated session summary", "session_id", sess.ID, "summary_length", len(summary), "compaction_cost", compactionCost)
-	events <- SessionSummary(sess.ID, summary, agentName)
-}
+	_ = r.sessionStore.UpdateSession(ctx, sess)
 
-func hasConversationMessages(messages []chat.Message) bool {
-	for _, msg := range messages {
-		if msg.Role != chat.MessageRoleSystem {
-			return true
-		}
-	}
-	return false
+	slog.Debug("Generated session summary", "session_id", sess.ID, "summary_length", len(result.Summary), "compaction_cost", result.Cost)
+	events <- SessionSummary(sess.ID, result.Summary, a.Name())
 }
diff --git a/pkg/runtime/streaming.go b/pkg/runtime/streaming.go
index 3d06a8916..62b132076 100644
--- a/pkg/runtime/streaming.go
+++ b/pkg/runtime/streaming.go
@@ -232,45 +232,3 @@ func stripImageContent(messages []chat.Message) []chat.Message {
 	}
 	return result
 }
-
-// charsPerToken is the average number of characters per token used for
-// estimation. A value of 4 is a widely-used heuristic for English text;
-// it slightly overestimates token counts for code/JSON (which is ~3.5),
-// making compaction trigger earlier — the safe direction.
-const charsPerToken = 4
-
-// estimateMessageTokens returns a rough token-count estimate for a single
-// chat message based on its text length. This is intentionally conservative
-// (overestimates) so that proactive compaction fires before we hit the limit.
-// The estimate includes the message content, multi-content text parts, and
-// a small overhead per message for role/metadata tokens.
-func estimateMessageTokens(msg *chat.Message) int64 {
-	var chars int
-
-	// Primary text content.
-	chars += len(msg.Content)
-
-	// Multi-content parts (e.g., tool results with image descriptions).
-	for _, part := range msg.MultiContent {
-		chars += len(part.Text)
-	}
-
-	// Reasoning / thinking content.
-	chars += len(msg.ReasoningContent)
-
-	// Tool call arguments (they count toward input tokens on the next turn).
-	for _, tc := range msg.ToolCalls {
-		chars += len(tc.Function.Arguments)
-		chars += len(tc.Function.Name)
-	}
-
-	// Per-message overhead: role, ToolCallID, delimiters, etc.
-	// Models typically use 3-7 tokens for message framing.
-	const perMessageOverhead = 5
-
-	if chars == 0 {
-		return perMessageOverhead
-	}
-
-	return int64(chars/charsPerToken) + perMessageOverhead
-}