Skip to content

Commit b7d4563

Browse files
StackMemory Bot (CLI)claude
andcommitted
fix(build): improve build command validation, types, and codex integration
CLI improvements: - Require task description (no misleading default) - Add --dry-run flag, clarify --execute default - Add proper types (HarnessResult, PlanStep) to printInteractionLog - Extract duplicate task resolution logic Harness improvements: - Make heuristicPlan generic (not hardcoded to multi-agent harness) - Implement all plan steps in prompt (not just first one) - Increase max_tokens from 1200 to 4096 Provider improvements: - Fix codex binary detection (check codex-sm, codex, codex-cli) - Use first available binary instead of hardcoded codex-sm - Fix unsafe type coercion with proper type guard Wrapper improvements: - Use exec for full TTY control in claude-code-wrapper.sh - Fix argument parsing for wrapper-specific flags Also renames mm-spike paths to build for consistency. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d885cfb commit b7d4563

7 files changed

Lines changed: 248 additions & 102 deletions

File tree

docs/SETUP.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ CLI equivalents for quick checks:
9797

9898
```bash
9999
# Quiet JSON (UI-friendly)
100-
stackmemory mm-spike --task "Refactor config loader" --json
100+
stackmemory build --task "Refactor config loader" --json
101101

102102
# Execute implementer and record as frame
103103
stackmemory skills spike --task "Refactor" --execute --max-iters 3 --json --record-frame

docs/mcp.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,14 @@ Response content is a single `text` item containing a JSON string:
6969

7070
## Notes
7171
- Implementer `codex` calls `codex-sm` (must be on PATH). Use `--execute` in CLI, or `execute: true` in MCP, to actually run it; otherwise it’s a dry‑run.
72-
- Audit files are saved to `.stackmemory/mm-spike/spike-<timestamp>.json` to support review/debugging.
72+
- Audit files are saved to `.stackmemory/build/spike-<timestamp>.json` to support review/debugging.
7373
- You can compare models:
7474
- Planner/critic: override with `STACKMEMORY_MM_PLANNER_MODEL` / `STACKMEMORY_MM_REVIEWER_MODEL`.
7575
- Implementer: set to `claude` to A/B against Codex, or keep `codex` (default).
7676

7777
## CLI equivalents (for quick checks)
7878
- Quiet JSON output:
79-
- `stackmemory mm-spike --task "Refactor config loader" --json`
79+
- `stackmemory build "Refactor config loader" --json`
8080
- `stackmemory skills spike --task "Refactor config loader" --json`
8181
- Execute implementer and record as frame:
8282
- `stackmemory skills spike --task "Refactor" --execute --max-iters 3 --json --record-frame`
@@ -154,7 +154,7 @@ Response (content[0].text is a JSON string):
154154
Notes:
155155
- `recordFrame: true` creates a real StackMemory frame + anchors (plan summary, commands, issues, suggestions).
156156
- `execute: true` actually invokes the implementer; otherwise it’s a dry‑run.
157-
- Approval IDs are persisted to `.stackmemory/mm-spike/pending.json` so editor restarts don’t lose pending approvals.
157+
- Approval IDs are persisted to `.stackmemory/build/pending.json` so editor restarts don’t lose pending approvals.
158158

159159
### Optional helper tools
160160
- `plan_only`: Returns a plan JSON without running code.

scripts/claude-code-wrapper.sh

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -3,62 +3,50 @@
33
# Claude Code wrapper with StackMemory integration
44
# Usage: Add alias to ~/.zshrc: alias claude='~/Dev/stackmemory/scripts/claude-code-wrapper.sh'
55

6-
# Check for auto-sync flag
6+
# Check for auto-sync flag and filter wrapper-specific args
77
AUTO_SYNC=false
88
SYNC_INTERVAL=5
9+
CLAUDE_ARGS=()
10+
911
for arg in "$@"; do
1012
case $arg in
1113
--auto-sync)
1214
AUTO_SYNC=true
13-
shift
1415
;;
1516
--sync-interval=*)
1617
SYNC_INTERVAL="${arg#*=}"
17-
shift
18+
;;
19+
*)
20+
CLAUDE_ARGS+=("$arg")
1821
;;
1922
esac
2023
done
2124

22-
# Start Linear auto-sync in background if requested
23-
SYNC_PID=""
25+
# Start Linear auto-sync in background if requested (survives exec)
2426
if [ "$AUTO_SYNC" = true ] && [ -n "$LINEAR_API_KEY" ]; then
2527
echo "🔄 Starting Linear auto-sync (${SYNC_INTERVAL}min intervals)..."
26-
(
28+
nohup bash -c "
2729
while true; do
2830
sleep $((SYNC_INTERVAL * 60))
29-
if [ -d ".stackmemory" ]; then
31+
if [ -d \"$PWD/.stackmemory\" ]; then
3032
stackmemory linear sync --quiet 2>/dev/null || true
3133
fi
3234
done
33-
) &
34-
SYNC_PID=$!
35+
" > /dev/null 2>&1 &
36+
disown
3537
fi
3638

37-
cleanup() {
38-
echo "📝 Saving StackMemory context..."
39-
40-
# Kill auto-sync if running
41-
if [ -n "$SYNC_PID" ] && kill -0 $SYNC_PID 2>/dev/null; then
42-
echo "🛑 Stopping auto-sync..."
43-
kill $SYNC_PID 2>/dev/null || true
44-
fi
45-
46-
# Check if in a git repo with stackmemory
47-
if [ -d ".stackmemory" ] && [ -f "stackmemory.json" ]; then
48-
# Save current context (without sync)
49-
stackmemory status 2>/dev/null || true
50-
echo "✅ StackMemory context saved"
51-
fi
52-
}
39+
# Note: Cleanup is now handled by Claude hooks instead of this wrapper
40+
# See: stackmemory setup-hooks --cleanup
5341

54-
# Set trap for exit signals
55-
trap cleanup EXIT INT TERM
42+
# Run Claude Code with exec for full TTY control (interactive mode)
43+
# This replaces the shell process, ensuring stdin works properly
44+
# Note: cleanup trap won't run with exec - use Claude hooks for session cleanup instead
5645

57-
# Run Claude Code (try multiple possible command names)
5846
if command -v claude-code &> /dev/null; then
59-
claude-code "$@"
47+
exec claude-code "${CLAUDE_ARGS[@]}"
6048
elif command -v claude &> /dev/null; then
61-
claude "$@"
49+
exec claude "${CLAUDE_ARGS[@]}"
6250
else
6351
echo "❌ Claude Code not found. Please install it first."
6452
echo " Visit: https://github.com/anthropics/claude-code"

src/cli/index.ts

Lines changed: 138 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,12 @@ import Database from 'better-sqlite3';
6767
import { join } from 'path';
6868
import { existsSync, mkdirSync } from 'fs';
6969
import inquirer from 'inquirer';
70-
import chalk from 'chalk';
7170
import { enableChromaDB } from '../core/config/storage-config.js';
7271
import { spawn } from 'child_process';
72+
import type {
73+
HarnessResult,
74+
PlanStep,
75+
} from '../orchestrators/multimodal/types.js';
7376
import { homedir } from 'os';
7477

7578
// Read version from package.json - works from both src/ and dist/src/
@@ -903,16 +906,124 @@ program
903906
}
904907
});
905908

906-
function printInteractionLog(
907-
meta: {
908-
task: string;
909-
plannerModel: string;
910-
reviewerModel: string;
911-
implementer: string;
912-
execute: boolean;
913-
},
914-
result: any
915-
) {
909+
// Alias: build (same behavior as mm-spike)
910+
program
911+
.command('build')
912+
.description(
913+
'Plan + code: planner (Claude), implementer (Codex/Claude), critic (Claude) with optional log/json output'
914+
)
915+
.argument('[task]', 'Task description (positional)')
916+
.option(
917+
'-t, --task <desc>',
918+
'Task description (required if no positional arg)'
919+
)
920+
.option(
921+
'--planner-model <name>',
922+
'Claude model for planning',
923+
'claude-3-5-sonnet-latest'
924+
)
925+
.option(
926+
'--reviewer-model <name>',
927+
'Claude model for review',
928+
'claude-3-5-sonnet-latest'
929+
)
930+
.option('--execute', 'Execute implementer (default: true)', true)
931+
.option('--dry-run', 'Skip execution, show commands only')
932+
.option('--implementer <name>', 'codex|claude', 'codex')
933+
.option('--max-iters <n>', 'Retry loop iterations', '2')
934+
.option('--audit-dir <path>', 'Persist spike results to directory')
935+
.option('--record-frame', 'Record as real frame with anchors')
936+
.option('--record', 'Record plan & critique into StackMemory context')
937+
.option('--json', 'Emit single JSON result (UI-friendly)')
938+
.option('--quiet', 'Minimal output')
939+
.option('--verbose', 'Verbose sectioned output')
940+
.option('--log', 'Pretty print interaction log (default: true)', true)
941+
.action(async (taskArg, opts) => {
942+
try {
943+
// Resolve task from positional arg or --task option
944+
const task =
945+
typeof taskArg === 'string' && taskArg.length > 0 ? taskArg : opts.task;
946+
947+
if (!task) {
948+
console.error(
949+
chalk.red(
950+
'Error: Task description required. Provide as argument or --task option.'
951+
)
952+
);
953+
console.error(
954+
chalk.gray(' Example: stackmemory build "Add user authentication"')
955+
);
956+
process.exit(1);
957+
}
958+
959+
const { runSpike } =
960+
await import('../orchestrators/multimodal/harness.js');
961+
const dryRun = opts.dryRun === true || opts.execute === false;
962+
const result = await runSpike(
963+
{ task, repoPath: process.cwd() },
964+
{
965+
plannerModel: opts.plannerModel,
966+
reviewerModel: opts.reviewerModel,
967+
implementer: opts.implementer,
968+
maxIters: parseInt(opts.maxIters),
969+
dryRun,
970+
auditDir: opts.auditDir,
971+
recordFrame: Boolean(opts.recordFrame),
972+
record: Boolean(opts.record),
973+
}
974+
);
975+
976+
if (opts.log) {
977+
printInteractionLog(
978+
{
979+
task,
980+
plannerModel: opts.plannerModel,
981+
reviewerModel: opts.reviewerModel,
982+
implementer: opts.implementer,
983+
execute: !dryRun,
984+
},
985+
result
986+
);
987+
return;
988+
}
989+
990+
if (opts.json) {
991+
console.log(JSON.stringify(result));
992+
return;
993+
}
994+
if (opts.verbose) {
995+
console.log('\n=== Plan ===');
996+
console.log(JSON.stringify(result.plan, null, 2));
997+
console.log('\n=== Iterations ===');
998+
(result.iterations || []).forEach((it, i) => {
999+
console.log(`\n[Attempt ${i + 1}] ${it.command}`);
1000+
console.log('OK:', it.ok);
1001+
console.log('Critique:', JSON.stringify(it.critique));
1002+
});
1003+
console.log('\n=== Implementation ===');
1004+
console.log(JSON.stringify(result.implementation, null, 2));
1005+
console.log('\n=== Critique ===');
1006+
console.log(JSON.stringify(result.critique, null, 2));
1007+
} else if (!opts.quiet) {
1008+
console.log(
1009+
`Plan steps: ${result.plan.steps.length}, Approved: ${result.critique.approved}`
1010+
);
1011+
}
1012+
} catch (error) {
1013+
console.error('build failed:', (error as Error).message);
1014+
process.exit(1);
1015+
}
1016+
});
1017+
1018+
interface BuildLogMeta {
1019+
task: string;
1020+
plannerModel: string;
1021+
reviewerModel: string;
1022+
implementer: string;
1023+
execute: boolean;
1024+
}
1025+
1026+
function printInteractionLog(meta: BuildLogMeta, result: HarnessResult): void {
9161027
const divider = chalk.gray(
9171028
'────────────────────────────────────────────────'
9181029
);
@@ -928,65 +1039,55 @@ function printInteractionLog(
9281039
console.log(divider);
9291040

9301041
// Plan summary
931-
if (result?.plan) {
1042+
if (result.plan) {
9321043
console.log(
9331044
chalk.bold('Plan Summary: '),
9341045
result.plan.summary || '(no summary)'
9351046
);
936-
const steps = Array.isArray(result.plan.steps)
937-
? result.plan.steps.slice(0, 6)
938-
: [];
1047+
const steps: PlanStep[] = result.plan.steps.slice(0, 6);
9391048
if (steps.length) {
9401049
console.log(chalk.bold('\nSteps:'));
941-
steps.forEach((s: any, idx: number) => {
1050+
steps.forEach((s, idx) => {
9421051
console.log(`${chalk.gray(String(idx + 1) + '.')} ${s.title || s.id}`);
943-
const ac = Array.isArray(s.acceptanceCriteria)
944-
? s.acceptanceCriteria
945-
: [];
1052+
const ac = s.acceptanceCriteria || [];
9461053
if (ac.length) {
947-
ac.slice(0, 3).forEach((c: string) =>
948-
console.log(chalk.gray(` - ${c}`))
949-
);
1054+
ac.slice(0, 3).forEach((c) => console.log(chalk.gray(` - ${c}`)));
9501055
if (ac.length > 3) console.log(chalk.gray(' - ...'));
9511056
}
9521057
});
9531058
}
954-
if (Array.isArray(result.plan.risks) && result.plan.risks.length) {
1059+
if (result.plan.risks?.length) {
9551060
console.log(chalk.bold('\nRisks:'));
9561061
result.plan.risks
9571062
.slice(0, 5)
958-
.forEach((r: string) => console.log(chalk.gray(` - ${r}`)));
1063+
.forEach((r) => console.log(chalk.gray(` - ${r}`)));
9591064
}
9601065
}
9611066

9621067
console.log(`\n${divider}`);
963-
const iters = Array.isArray(result?.iterations) ? result.iterations : [];
1068+
const iters = result.iterations || [];
9641069
if (iters.length) {
965-
iters.forEach((it: any, i: number) => {
1070+
iters.forEach((it, i) => {
9661071
console.log(chalk.magenta(`Attempt ${i + 1}`));
9671072
console.log(`${chalk.gray('Command:')} ${it.command}`);
9681073
console.log(
9691074
`${chalk.gray('OK:')} ${it.ok ? chalk.green('true') : chalk.red('false')}`
9701075
);
971-
const issues = it?.critique?.issues || [];
972-
const sugg = it?.critique?.suggestions || [];
1076+
const issues = it.critique?.issues || [];
1077+
const sugg = it.critique?.suggestions || [];
9731078
if (issues.length) {
9741079
console.log(chalk.bold('Issues:'));
975-
issues
976-
.slice(0, 5)
977-
.forEach((x: string) => console.log(chalk.red(` - ${x}`)));
1080+
issues.slice(0, 5).forEach((x) => console.log(chalk.red(` - ${x}`)));
9781081
}
9791082
if (sugg.length) {
9801083
console.log(chalk.bold('Suggestions:'));
981-
sugg
982-
.slice(0, 5)
983-
.forEach((x: string) => console.log(chalk.yellow(` - ${x}`)));
1084+
sugg.slice(0, 5).forEach((x) => console.log(chalk.yellow(` - ${x}`)));
9841085
}
9851086
console.log(divider);
9861087
});
9871088
}
9881089

989-
const approved = !!result?.critique?.approved;
1090+
const approved = result.critique?.approved ?? false;
9901091
console.log(
9911092
`${chalk.bold('Final:')} ${
9921093
approved ? chalk.green('Approved') : chalk.yellow('Needs changes')
@@ -999,7 +1100,7 @@ function printInteractionLog(
9991100
program
10001101
.command('pending:list')
10011102
.description(
1002-
'List pending approval-gated plans (from .stackmemory/mm-spike/pending.json)'
1103+
'List pending approval-gated plans (from .stackmemory/build/pending.json)'
10031104
)
10041105
.option('--task-contains <substr>', 'Filter tasks containing this substring')
10051106
.option('--older-than-ms <number>', 'Only items older than this age (ms)')
@@ -1012,7 +1113,7 @@ program
10121113
const storePath = path.join(
10131114
process.cwd(),
10141115
'.stackmemory',
1015-
'mm-spike',
1116+
'build',
10161117
'pending.json'
10171118
);
10181119
let pending: Record<string, any> = {};

0 commit comments

Comments
 (0)