From 71d6d0af6c9c7f0907addcc40931f507ff26601a Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 16:54:54 +0900
Subject: [PATCH 01/11] fix(sandbox,depfile): harden against
 attacker-controlled package metadata
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Four defense-in-depth boundaries around inputs derived from
attacker-controlled package metadata. None is currently exploitable —
they're forward-looking guards that make the trust boundaries explicit
so future refactors can't regress silently.

1. npmLifecycleScript now single-quotes the cd target via a new
   shQuote() helper. The previous double-quoted form still expanded
   $(...), backticks, and \, so a package.json key like "foo$(id)"
   could break out of the cd argument and run on the sandbox shell.
   Single quotes are literal — only ' is special and is escaped via
   the standard '\'' idiom.

2. depfile.Parse rejects names outside the registry-allowed character
   sets at parse time (npm: lowercase + optional @scope/, PyPI:
   PEP 508 ASCII). Hard error so a malformed name surfaces clearly
   instead of producing a degenerate shell command or
   KOJUTO_SCAN_PKGS env var. PyPI's ';' is not exercised in tests —
   the parser already strips PEP 508 environment markers before
   validation runs.

3. sitecustomize.py's _w() strips \n/\r/\0 from every emission. The
   Go parser splits on \n, so an attacker-controlled co_filename or
   compile() filename arg containing \n could smuggle a fake KOJUTO:
   line onto the wire. _t() already escapes the snippet field; this
   covers the filename field (concatenated raw from _origin()) plus
   any future tag.

4. The _FRAMES=8 cap on _origin()'s stack walk is removed. A
   malicious package burying its own frame under >8 stdlib wrappers
   would have escaped the "+" user-origin marker, letting the parser
   drop the resulting dynamic_exec event as benign. The walk is now
   unbounded; the common-case fast path (early return on the first
   user frame) is unchanged, so practical cost is the same.

Tests:
- TestParse_RejectsInvalidNpmName, TestParse_AcceptsScopedNpmName
- TestParse_RejectsInvalidPyPIName
- TestShQuote, TestNpmLifecycleScript_QuotesShellMetachars
- TestNpmLifecycleScript_ScopedPackage updated for single-quote form

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/depfile/depfile.go             | 21 ++++++++
 internal/depfile/depfile_test.go        | 67 +++++++++++++++++++++++++
 internal/sandbox/hooks/sitecustomize.py | 32 ++++++++----
 internal/sandbox/sandbox.go             | 20 ++++++--
 internal/sandbox/sandbox_extra_test.go  | 35 ++++++++++++-
 5 files changed, 159 insertions(+), 16 deletions(-)

diff --git a/internal/depfile/depfile.go b/internal/depfile/depfile.go
index eae4503..56cc0d8 100644
--- a/internal/depfile/depfile.go
+++ b/internal/depfile/depfile.go
@@ -7,11 +7,23 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"regexp"
 	"strings"
 
 	"github.com/RalianENG/kojuto/internal/types"
 )
 
+// Package-name validators. Names flow into shell scripts (npm lifecycle
+// hooks) and environment variables (KOJUTO_SCAN_PKGS) inside the sandbox,
+// so we reject anything outside the registry-allowed character sets at
+// parse time. The sandbox also single-quotes names at the use site, but
+// validation here gives the user a clear error instead of a silently
+// malformed command.
+var (
+	npmNameRe  = regexp.MustCompile(`^(@[a-z0-9][a-z0-9._~-]*\/)?[a-z0-9][a-z0-9._~-]*$`)
+	pypiNameRe = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]*$`)
+)
+
 // Dep represents a single dependency extracted from a file.
 type Dep struct {
 	Name    string
@@ -81,6 +93,9 @@ func parseRequirementsTxt(path string) ([]Dep, error) {
 		}
 
 		if dep.Name != "" {
+			if !pypiNameRe.MatchString(dep.Name) {
+				return nil, fmt.Errorf("invalid package name %q in %s", dep.Name, path)
+			}
 			deps = append(deps, dep)
 		}
 	}
@@ -106,9 +121,15 @@ func parsePackageJSON(path string) ([]Dep, error) {
 	var deps []Dep
 
 	for name, ver := range pkg.Dependencies {
+		if !npmNameRe.MatchString(name) {
+			return nil, fmt.Errorf("invalid package name %q in %s", name, path)
+		}
 		deps = append(deps, Dep{Name: name, Version: cleanNpmVersion(ver)})
 	}
 	for name, ver := range pkg.DevDependencies {
+		if !npmNameRe.MatchString(name) {
+			return nil, fmt.Errorf("invalid package name %q in %s", name, path)
+		}
 		deps = append(deps, Dep{Name: name, Version: cleanNpmVersion(ver)})
 	}
 
diff --git a/internal/depfile/depfile_test.go b/internal/depfile/depfile_test.go
index 14b761e..78a3f6f 100644
--- a/internal/depfile/depfile_test.go
+++ b/internal/depfile/depfile_test.go
@@ -167,6 +167,73 @@ func TestParse_InvalidJSON(t *testing.T) {
 	}
 }
 
+func TestParse_RejectsInvalidNpmName(t *testing.T) {
+	cases := map[string]string{
+		"shell metachars":    `{"dependencies": {"foo;rm -rf /": "1.0.0"}}`,
+		"command sub":        `{"dependencies": {"foo$(id)": "1.0.0"}}`,
+		"backtick":           "{\"dependencies\": {\"foo`id`\": \"1.0.0\"}}",
+		"newline":            "{\"dependencies\": {\"foo\\nbar\": \"1.0.0\"}}",
+		"uppercase":          `{"dependencies": {"FooBar": "1.0.0"}}`,
+		"leading dot":        `{"dependencies": {".hidden": "1.0.0"}}`,
+		"space":              `{"dependencies": {"a b": "1.0.0"}}`,
+		"dev shell metachar": `{"devDependencies": {"foo|cat": "1.0.0"}}`,
+	}
+
+	for name, content := range cases {
+		t.Run(name, func(t *testing.T) {
+			dir := t.TempDir()
+			path := filepath.Join(dir, "package.json")
+			if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+				t.Fatal(err)
+			}
+			if _, _, err := Parse(path); err == nil {
+				t.Errorf("expected error for invalid name, got nil (content=%q)", content)
+			}
+		})
+	}
+}
+
+func TestParse_AcceptsScopedNpmName(t *testing.T) {
+	content := `{"dependencies": {"@scope/foo-bar": "1.0.0", "lodash": "4.0.0"}}`
+	dir := t.TempDir()
+	path := filepath.Join(dir, "package.json")
+	if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	deps, _, err := Parse(path)
+	if err != nil {
+		t.Fatalf("Parse failed: %v", err)
+	}
+	if len(deps) != 2 {
+		t.Errorf("expected 2 deps, got %d", len(deps))
+	}
+}
+
+func TestParse_RejectsInvalidPyPIName(t *testing.T) {
+	// Note: `;` is not exercised here because the parser strips PEP 508
+	// environment markers (everything after `;`) before validation runs,
+	// so a shell `;` is never seen as part of the name.
+	cases := map[string]string{
+		"command sub": "foo$(id)==1.0.0\n",
+		"backtick":    "foo`id`==1.0.0\n",
+		"pipe":        "foo|cat==1.0.0\n",
+		"space":       "a b==1.0.0\n",
+	}
+
+	for name, content := range cases {
+		t.Run(name, func(t *testing.T) {
+			dir := t.TempDir()
+			path := filepath.Join(dir, "requirements.txt")
+			if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+				t.Fatal(err)
+			}
+			if _, _, err := Parse(path); err == nil {
+				t.Errorf("expected error for invalid name, got nil (content=%q)", content)
+			}
+		})
+	}
+}
+
 func TestCleanNpmVersion(t *testing.T) {
 	cases := []struct {
 		input string
diff --git a/internal/sandbox/hooks/sitecustomize.py b/internal/sandbox/hooks/sitecustomize.py
index 1eff6d7..295a96c 100644
--- a/internal/sandbox/hooks/sitecustomize.py
+++ b/internal/sandbox/hooks/sitecustomize.py
@@ -8,7 +8,6 @@
 import sys
 
 _MAX = 200
-_FRAMES = 8
 _P = chr(75) + chr(79) + chr(74) + chr(85) + chr(84) + chr(79) + ":"
 
 # Frames whose co_filename starts with one of these are treated as
@@ -49,6 +48,15 @@ def _t(s):
 
 
 def _w(tag, body):
+    # Wire-format invariant: each KOJUTO: emission must occupy exactly
+    # one stderr line. The Go parser splits on '\n' and treats every
+    # KOJUTO:-prefixed line as an independent event. Strip line and null
+    # bytes from `body` so an attacker-controlled co_filename or compile
+    # filename arg cannot smuggle a fake follow-up event onto the wire.
+    # _t() already escapes newlines in the snippet field, so this is a
+    # defense for the filename field (which is concatenated raw from
+    # _origin()) and a belt-and-braces guard for any future tag.
+    body = body.replace("\n", "").replace("\r", "").replace("\0", "")
     sys.stderr.write(_P + tag + ":" + body + "\n")
     sys.stderr.flush()
 
@@ -57,28 +65,32 @@ def _origin(fallback):
     """Walk the Python call stack and return the .py file responsible
     for the compile/exec call.
 
-    Returns "+<path>" when the deepest user-code frame is found — the
-    leading "+" tells the Go analyzer this event originates in audited
-    code and must NOT be filtered by the path-based benign list. When
-    no user frame appears, returns the deepest non-internal frame so
-    the existing benign filter (stdlib/site-packages substring match)
-    still works for compat-library noise.
+    Returns "+<path>" when a user-code frame is found anywhere on the
+    stack — the leading "+" tells the Go analyzer this event originates
+    in audited code and must NOT be filtered by the path-based benign
+    list. When no user frame appears, returns the first non-internal
+    frame so the existing benign filter (stdlib/site-packages substring
+    match) still works for compat-library noise.
+
+    The walk is unbounded by design: a depth cap would let a malicious
+    package bury its own frame under stdlib wrappers and evade the
+    user-origin marker. Stack walking is O(depth) on a linked list and
+    early-returns on the first user frame, so the practical cost is
+    negligible compared with the surrounding compile/exec work.
     """
     try:
         f = sys._getframe(2)  # skip _origin + _h
     except ValueError:
         return fallback
     me = __file__
-    seen = 0
     deepest = ""
-    while f is not None and seen < _FRAMES:
+    while f is not None:
         fn = f.f_code.co_filename
         if fn != me:
             if _is_user(fn):
                 return "+" + fn
             if not deepest:
                 deepest = fn
-            seen += 1
         f = f.f_back
     if deepest:
         return deepest
diff --git a/internal/sandbox/sandbox.go b/internal/sandbox/sandbox.go
index 50a3acf..49bd6e4 100644
--- a/internal/sandbox/sandbox.go
+++ b/internal/sandbox/sandbox.go
@@ -696,17 +696,27 @@ func npmLifecycleScript(pkgs []string) string {
 		if i > 0 {
 			b.WriteString("; ")
 		}
-		// Quote the path so scoped packages like @scope/pkg work
-		// without word-splitting.
-		b.WriteString(`(cd "/install/node_modules/`)
-		b.WriteString(p)
-		b.WriteString(`" && `)
+		// Single-quote the full cd target so any shell metacharacters
+		// in p (which originates from package.json keys, i.e. attacker-
+		// controllable input) cannot break out of the argument. depfile
+		// validates names at parse time; this is defense-in-depth for
+		// any future code path that populates pkgs from another source.
+		b.WriteString(`(cd `)
+		b.WriteString(shQuote("/install/node_modules/" + p))
+		b.WriteString(` && `)
 		b.WriteString(hooks)
 		b.WriteString(`) 2>&1`)
 	}
 	return b.String()
 }
 
+// shQuote wraps s in POSIX single quotes, escaping any embedded single
+// quote with the standard '\'' close-escape-reopen idiom. Safe for use
+// in /bin/sh, dash, and bash.
+func shQuote(s string) string {
+	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"
+}
+
 // WriteProbeScriptsMulti writes one combined import probe script per OS identity.
 // This reduces Python/Node process launches from N*3 to just 3.
 func (s *Sandbox) WriteProbeScriptsMulti(ctx context.Context, pkgs []string) {
diff --git a/internal/sandbox/sandbox_extra_test.go b/internal/sandbox/sandbox_extra_test.go
index 0b9546d..3b5a143 100644
--- a/internal/sandbox/sandbox_extra_test.go
+++ b/internal/sandbox/sandbox_extra_test.go
@@ -347,12 +347,45 @@ func TestNpmLifecycleScript_ScopedPackage(t *testing.T) {
 	// shell command. Regression guard for a bug that would have hidden
 	// scoped-dep lifecycle behavior from strace.
 	got := npmLifecycleScript([]string{"@scope/lib"})
-	want := `"/install/node_modules/@scope/lib"`
+	want := `'/install/node_modules/@scope/lib'`
 	if !strings.Contains(got, want) {
 		t.Errorf("scoped path missing %q in:\n%s", want, got)
 	}
 }
 
+func TestShQuote(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		{"foo", "'foo'"},
+		{"a b", "'a b'"},
+		{"@scope/pkg", "'@scope/pkg'"},
+		{"$(id)", "'$(id)'"},
+		{"`id`", "'`id`'"},
+		{"a'b", `'a'\''b'`},
+		{"", "''"},
+	}
+	for _, tc := range cases {
+		if got := shQuote(tc.in); got != tc.want {
+			t.Errorf("shQuote(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}
+
+func TestNpmLifecycleScript_QuotesShellMetachars(t *testing.T) {
+	// Defense-in-depth: even if a malformed name slips past the depfile
+	// validator, the cd target must be safely single-quoted so shell
+	// metacharacters cannot break out and execute on the sandbox shell.
+	got := npmLifecycleScript([]string{`foo$(id)`})
+	if !strings.Contains(got, `'/install/node_modules/foo$(id)'`) {
+		t.Errorf("metachar payload not single-quoted in:\n%s", got)
+	}
+	// The bare token must NOT appear unquoted between cd and &&.
+	if strings.Contains(got, `cd /install/node_modules/foo$(id) `) {
+		t.Errorf("metachar payload unquoted in:\n%s", got)
+	}
+}
+
 // ---------------------------------------------------------------------------
 // WriteProbeScriptsMulti
 // ---------------------------------------------------------------------------

From 7269ecf0376ce97315b513de530862bde803cc1c Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 16:58:42 +0900
Subject: [PATCH 02/11] feat(action): expose config, quiet, no-color as Action
 inputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The GitHub Action wrapper was missing wiring for three CLI flags
that already work on the binary itself: `--config`, `--quiet`, and
`--no-color`. Workflows that wanted these had to drop down to a
custom shell step.

- `config` — passes through to `--config` so an alternative
  `kojuto.yml` path can be used (CI matrices with per-language
  configs, or a vendored config kept under a non-default path).
- `quiet` — suppresses phase progress lines so CI logs only contain
  the final verdict block. Pairs well with PR-comment workflows
  that post the report as a comment.
- `no-color` — disables ANSI escapes; mirrors the `NO_COLOR`
  environment variable spec, useful for log aggregators that don't
  strip escapes.

`runtime` description is also tightened to drop the now-obsolete
"empty string also resolves to auto" parenthetical (the auto path
is the documented default since #14) and the YAML default is set
to `auto` explicitly so the rendered docs match the CLI.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md |  1 +
 action.yml   | 28 ++++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ec3ad3..1c8be19 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **System binary write detection** — `openat` with write flags to trusted system binaries (`python3`, `node`, `pip`, `sh`, etc.) in `/usr/local/bin/` or `/usr/bin/` classified as `binary_hijacking`, preventing benignPaths bypass via tmpfs overwrite
 - **gVisor auto-detection** — `--runtime` default changed from empty (runc) to `auto`: probes `docker info` for runsc availability and uses gVisor if registered, falls back to runc otherwise
 - **npm test package** — `testdata/probe-npm-0.1.0/` with lifecycle hook payloads (preinstall/postinstall) and require-time import phase covering 31 TTPs across 9 attack categories including audit hook validation
+- **GitHub Action inputs** — `config`, `quiet`, and `no-color` exposed as Action inputs to match CLI flags (`--config`, `--quiet`, `--no-color`)
 
 ### Changed
 - Go version requirement lowered from 1.25.0 to 1.24.0 (stable release); `golang.org/x/sys` downgraded from v0.43.0 to v0.41.0
diff --git a/action.yml b/action.yml
index 51025ea..a88e97f 100644
--- a/action.yml
+++ b/action.yml
@@ -29,10 +29,14 @@ inputs:
     description: 'Scan a local package file (.whl, .tgz) or directory'
     required: false
     default: ''
-  runtime:
-    description: 'Container runtime: auto (default, use gVisor if available), runsc (force gVisor), runc (force Docker default). Empty string also resolves to auto on the CLI.'
+  config:
+    description: 'Path to kojuto.yml config file (default: kojuto.yml in current directory)'
     required: false
     default: ''
+  runtime:
+    description: 'Container runtime: auto (use gVisor if available), runsc (force gVisor), runc (force Docker default)'
+    required: false
+    default: 'auto'
   probe-method:
     description: 'Probe method: auto, ebpf, strace, strace-container'
     required: false
@@ -45,6 +49,14 @@ inputs:
     description: 'Ignore sensitive_paths.exclude from config (recommended for CI)'
     required: false
     default: 'true'
+  quiet:
+    description: 'Suppress phase progress output; emit only the final verdict block'
+    required: false
+    default: 'false'
+  no-color:
+    description: 'Disable colored output (NO_COLOR env is also respected)'
+    required: false
+    default: 'false'
 
 outputs:
   verdict:
@@ -77,10 +89,13 @@ runs:
         KOJUTO_FILE: ${{ inputs.file }}
         KOJUTO_PIN: ${{ inputs.pin }}
         KOJUTO_LOCAL: ${{ inputs.local }}
+        KOJUTO_CONFIG: ${{ inputs.config }}
         KOJUTO_RUNTIME: ${{ inputs.runtime }}
         KOJUTO_PROBE_METHOD: ${{ inputs.probe-method }}
         KOJUTO_TIMEOUT: ${{ inputs.timeout }}
         KOJUTO_STRICT: ${{ inputs.strict }}
+        KOJUTO_QUIET: ${{ inputs.quiet }}
+        KOJUTO_NO_COLOR: ${{ inputs['no-color'] }}
       run: |
         ARGS=()
 
@@ -111,9 +126,18 @@ runs:
         if [ -n "$KOJUTO_RUNTIME" ]; then
           ARGS+=("--runtime" "$KOJUTO_RUNTIME")
         fi
+        if [ -n "$KOJUTO_CONFIG" ]; then
+          ARGS+=("--config" "$KOJUTO_CONFIG")
+        fi
         if [ "$KOJUTO_STRICT" = "true" ]; then
           ARGS+=("--strict")
         fi
+        if [ "$KOJUTO_QUIET" = "true" ]; then
+          ARGS+=("--quiet")
+        fi
+        if [ "$KOJUTO_NO_COLOR" = "true" ]; then
+          ARGS+=("--no-color")
+        fi
 
         sudo "${{ github.action_path }}/kojuto" scan "${ARGS[@]}" \
           -o /tmp/kojuto-report.json

From 02643788a0151b5a42c1587d0a5d7b9878074c9a Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 16:59:10 +0900
Subject: [PATCH 03/11] docs: backfill changelog for severity tiering,
 caller-aware hook + path count

The two unreleased fixes that shipped in #18 (a5cab0e) were missing
from CHANGELOG.md, and the README's credential-access table still
quoted the older "~50 sensitive paths" figure. This commit catches
the docs up to the merged code; no behavior change.

- CHANGELOG: add Severity-tiered verdict and Caller-aware audit hook
  entries under [Unreleased] / Added.
- README: bump the sensitive-path count from ~50 to ~60 to reflect
  the current entry list.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md | 2 ++
 README.md    | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c8be19..ee840b3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - **Audit hooks for dynamic code execution detection** — Python PEP 578 hook (`sitecustomize.py`) intercepts `compile`/`exec`/`import`/`ctypes.dlopen`; Node.js `--require` hook (`kojuto-require.js`) intercepts `eval`/`Function`/`vm.runInNewContext`/`vm.runInThisContext`/`vm.Script`. New `dynamic_code_execution` category and `EventDynamicExec` event type
+- **Severity-tiered verdict** — `types.CategorySeverity` classifies each detection category as HIGH (one event raises the verdict to SUSPICIOUS), MEDIUM (two-or-more raise it), or LOW (never raises the verdict alone). `dynamic_code_execution` is LOW, `dns_tunneling` and `evasion` are MEDIUM, all other categories stay HIGH. Unmapped categories fail closed (treated as HIGH). LOW events still appear in `report.events` for forensics — verdict reflects severity, not raw event count. Stops legitimate Python compat libraries (`six`, `attrs`, `future`) from flipping to SUSPICIOUS on benign internal `compile`/`exec` calls
+- **Caller-aware audit hook** — `sitecustomize.py` now walks the Python call stack and reports the actual `.py` file invoking `compile`/`exec`, not the user-controllable `filename` argument (which `six` deliberately sets to `<string>`). When the deepest frame lives inside the scanned package's `site-packages` directory the hook prefixes the wire payload with `+` so the analyzer bypasses path-based benign filtering. Sandbox passes the scan list via `KOJUTO_SCAN_PKGS` so the hook knows which paths count as "user code"
 - **System binary write detection** — `openat` with write flags to trusted system binaries (`python3`, `node`, `pip`, `sh`, etc.) in `/usr/local/bin/` or `/usr/bin/` classified as `binary_hijacking`, preventing benignPaths bypass via tmpfs overwrite
 - **gVisor auto-detection** — `--runtime` default changed from empty (runc) to `auto`: probes `docker info` for runsc availability and uses gVisor if registered, falls back to runc otherwise
 - **npm test package** — `testdata/probe-npm-0.1.0/` with lifecycle hook payloads (preinstall/postinstall) and require-time import phase covering 31 TTPs across 9 attack categories including audit hook validation
diff --git a/README.md b/README.md
index 62f9c7b..6a2140f 100644
--- a/README.md
+++ b/README.md
@@ -253,7 +253,7 @@ Of the 300 malicious samples, 238 failed to install (dependencies already remove
 |----------|----------|-----------------|
 | C2 communication (`c2_communication`) | `aiogram-types-v3` → `147.45.124.42:80` | `connect`/`sendto` to non-loopback IPs |
 | Data exfiltration (`data_exfiltration`) | DNS resolution of Discord/Telegram/Pastebin | `sendto` port 53 resolving known exfil services |
-| Credential access (`credential_access`) | `axios-attack-demo` → `.ssh/id_rsa`, `.aws/credentials`, `.solana/id.json` | `openat` on ~50 sensitive paths (SSH, cloud, crypto wallets, browser data) |
+| Credential access (`credential_access`) | `axios-attack-demo` → `.ssh/id_rsa`, `.aws/credentials`, `.solana/id.json` | `openat` on ~60 sensitive paths (SSH, cloud, crypto wallets, browser data) |
 | Code execution (`code_execution`) | `advpruebitaa` → `type nul > prueba11.txt`, `/tmp/ld.py` | `execve` with inline `-c`/`-e` flags or from `/tmp`, `/dev/shm` |
 | Memory execution (`memory_execution`) | `ctypes.mmap(RWX)` shellcode injection | `mmap`/`mprotect` with simultaneous PROT_WRITE+PROT_EXEC |
 | Binary hijacking (`binary_hijacking`) | `rename /tmp/evil /usr/local/bin/python3` | `rename` targeting trusted system binaries |

From 131bb3f9da42e331b06cadba3cd2de7236a456c1 Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 17:12:48 +0900
Subject: [PATCH 04/11] fix(sandbox,cmd): close root-in-container heredoc
 injection on --local scans
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dockerExecRoot heredoc pattern used to write probe scripts and
honeypot credentials had a stray-terminator weakness:

    sh -c "cat > <path> << 'KOJUTO_EOF'\n<body>KOJUTO_EOF"

If <body> contained a line consisting only of `KOJUTO_EOF`, the shell
terminated the heredoc early and parsed the rest of the body as
commands — running as root inside the sandbox via `docker exec
--user=root`. <body> includes the package name interpolated into a
JS/Python `'%s'` literal in WriteProbeScripts*; a name carrying
"\n" + "KOJUTO_EOF" + "\n" + "<cmd>" produced exactly the lines needed
to terminate and execute <cmd>.

Reachability:

- `kojuto scan <pkg>`         gated by downloaderValidate (regex blocks \n)
- `kojuto scan -f <file>`     gated by depfile validators (71d6d0a)
- `kojuto scan --local <path>` was NOT gated — pkg derived from
  filepath.Base(localPath) carried any byte the filesystem allowed,
  so a malware sample distributed with a crafted filename would have
  achieved root in the sandbox during WriteProbeScripts. The most
  damaging payload would `rm /usr/local/lib/python3.12/site-packages/
  sitecustomize.py`, neutralising kojuto's own audit hook before the
  import phase actually ran the malicious code — defeating detection
  with no surface visibility.

Two-layer fix:

1. dockerWriteFile() helper replaces all 9 heredoc sites (5 honeypot
   credentials + 4 probe scripts). Content is delivered via stdin to
   `cat`, so the body never appears on the shell command line. Path
   is single-quoted as defense-in-depth though all current callers
   pass constants.

2. runLocalScan validates the derived pkg with downloaderValidate
   before any sandbox interaction. Newlines and shell metacharacters
   surface as a clear error rather than silently producing a
   degenerate container command.

Tests:

- TestDockerWriteFile_StructureNoHeredoc pins that the new helper
  emits `docker exec -i ... sh -c "cat > '<path>'"` with no `<<` or
  `KOJUTO_EOF` token anywhere in the args.
- TestDockerWriteFile_QuotesPath confirms shQuote is applied to the
  path slot for future callers.
- TestRunLocalScan_RejectsUnsafeFilename creates a `.tgz` whose
  basename contains "\nKOJUTO_EOF\n" and asserts the scan errors
  out before reaching Docker.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 cmd/root.go                            | 11 +++++
 cmd/root_mock_test.go                  | 34 ++++++++++++++
 internal/sandbox/sandbox.go            | 61 ++++++++++++++-----------
 internal/sandbox/sandbox_extra_test.go | 62 ++++++++++++++++++++++++++
 4 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index aa89a6c..270fadc 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -646,6 +646,17 @@ func runLocalScan(_ []string) error {
 		pkg = detectPackageName(filepath.Base(localPath))
 	}
 
+	// Validate the derived package name with the same regex as registry
+	// scans. Local mode is the only path where pkg is built from a user-
+	// supplied filesystem path, so the name can carry any byte the OS
+	// allows — including newlines and shell metacharacters that downstream
+	// sandbox helpers used to interpolate into shell heredocs and Python/
+	// JS string literals. The dockerWriteFile refactor closed the heredoc
+	// path, but defending at the boundary keeps every future use site safe.
+	if err := downloaderValidate(pkg, ""); err != nil {
+		return fmt.Errorf("local package name derived from %q is unsafe: %w", localPath, err)
+	}
+
 	// Auto-detect ecosystem from file extension, but only if -e was not explicitly set.
 	// .tgz is always npm; .tar.gz is ambiguous (could be PyPI sdist), so only
 	// override to npm for .tgz, not .tar.gz.
diff --git a/cmd/root_mock_test.go b/cmd/root_mock_test.go
index 5819caf..37badf0 100644
--- a/cmd/root_mock_test.go
+++ b/cmd/root_mock_test.go
@@ -443,6 +443,40 @@ func TestRunLocalScan_NpmAutoDetect(t *testing.T) {
 	}
 }
 
+// TestRunLocalScan_RejectsUnsafeFilename pins the security invariant that
+// a filename whose derived package name contains shell metacharacters or
+// newlines is rejected at the boundary, before any sandbox interaction.
+// Without this gate, a malicious .tgz/.whl filename containing a newline
+// followed by "KOJUTO_EOF" used to terminate dockerWriteFile's predecessor
+// heredoc and execute arbitrary commands as root inside the container.
+// The dockerWriteFile refactor closed the heredoc; this test guards the
+// belt-and-braces input check.
+func TestRunLocalScan_RejectsUnsafeFilename(t *testing.T) {
+	saveAndRestoreFlags(t)
+
+	dir := t.TempDir()
+	// "\n" + "KOJUTO_EOF" payload baked into a .tgz filename. macOS APFS
+	// and Linux ext4/xfs all permit '\n' in filenames; if the underlying
+	// FS rejects it, skip rather than fail.
+	badName := "evil\nKOJUTO_EOF\nrm-1.0.0.tgz"
+	badFile := filepath.Join(dir, badName)
+	if err := os.WriteFile(badFile, []byte("x"), 0o644); err != nil {
+		t.Skipf("filesystem refuses newlines in filenames: %v", err)
+	}
+
+	flagLocal = badFile
+	flagEcosystem = types.EcosystemNpm
+	flagTimeout = 5 * time.Second
+
+	err := runLocalScan(nil)
+	if err == nil {
+		t.Fatal("expected validation error for unsafe local filename")
+	}
+	if !strings.Contains(err.Error(), "unsafe") {
+		t.Errorf("expected 'unsafe' in error, got: %v", err)
+	}
+}
+
 // --- prepareLocalNpm tests ---
 
 func TestPrepareLocalNpm_NoTgz(t *testing.T) {
diff --git a/internal/sandbox/sandbox.go b/internal/sandbox/sandbox.go
index 49bd6e4..f12fd3a 100644
--- a/internal/sandbox/sandbox.go
+++ b/internal/sandbox/sandbox.go
@@ -519,44 +519,39 @@ func (s *Sandbox) plantHoneypotFiles(ctx context.Context) {
 
 	// SSH key pair.
 	s.dockerExecRoot(ctx, "mkdir", "-p", home+"/.ssh")
-	s.dockerExecRoot(ctx, "sh", "-c", "cat > "+home+"/.ssh/id_rsa << 'KOJUTO_EOF'\n"+
+	s.dockerWriteFile(ctx, home+"/.ssh/id_rsa",
 		"-----BEGIN OPENSSH PRIVATE KEY-----\n"+
-		"b3BlbnNzaC1rZXktdjEAAAAAFAAAAAAAAAEAAAAzAAAAC3NzaC1lZDI1NTE5\n"+
-		"AAAAI"+sshKeyBody+"\n"+
-		"-----END OPENSSH PRIVATE KEY-----\n"+
-		"KOJUTO_EOF")
+			"b3BlbnNzaC1rZXktdjEAAAAAFAAAAAAAAAEAAAAzAAAAC3NzaC1lZDI1NTE5\n"+
+			"AAAAI"+sshKeyBody+"\n"+
+			"-----END OPENSSH PRIVATE KEY-----\n")
 	s.dockerExecRoot(ctx, "chmod", "600", home+"/.ssh/id_rsa")
 
 	// AWS credentials.
 	s.dockerExecRoot(ctx, "mkdir", "-p", home+"/.aws")
-	s.dockerExecRoot(ctx, "sh", "-c", "cat > "+home+"/.aws/credentials << 'KOJUTO_EOF'\n"+
+	s.dockerWriteFile(ctx, home+"/.aws/credentials",
 		"[default]\n"+
-		"aws_access_key_id = "+awsKey+"\n"+
-		"aws_secret_access_key = "+awsSecret+"\n"+
-		"KOJUTO_EOF")
+			"aws_access_key_id = "+awsKey+"\n"+
+			"aws_secret_access_key = "+awsSecret+"\n")
 
 	// Git credentials.
-	s.dockerExecRoot(ctx, "sh", "-c", "cat > "+home+"/.git-credentials << 'KOJUTO_EOF'\n"+
-		"https://dev:"+ghToken+"@github.com\n"+
-		"KOJUTO_EOF")
+	s.dockerWriteFile(ctx, home+"/.git-credentials",
+		"https://dev:"+ghToken+"@github.com\n")
 	s.dockerExecRoot(ctx, "chmod", "600", home+"/.git-credentials")
 
 	// Netrc.
-	s.dockerExecRoot(ctx, "sh", "-c", "cat > "+home+"/.netrc << 'KOJUTO_EOF'\n"+
+	s.dockerWriteFile(ctx, home+"/.netrc",
 		"machine github.com\n"+
-		"login dev\n"+
-		"password "+ghToken+"\n"+
-		"KOJUTO_EOF")
+			"login dev\n"+
+			"password "+ghToken+"\n")
 	s.dockerExecRoot(ctx, "chmod", "600", home+"/.netrc")
 
 	// GitHub CLI config.
 	s.dockerExecRoot(ctx, "mkdir", "-p", home+"/.config/gh")
-	s.dockerExecRoot(ctx, "sh", "-c", "cat > "+home+"/.config/gh/hosts.yml << 'KOJUTO_EOF'\n"+
+	s.dockerWriteFile(ctx, home+"/.config/gh/hosts.yml",
 		"github.com:\n"+
-		"    oauth_token: "+ghToken+"\n"+
-		"    user: dev\n"+
-		"    git_protocol: https\n"+
-		"KOJUTO_EOF")
+			"    oauth_token: "+ghToken+"\n"+
+			"    user: dev\n"+
+			"    git_protocol: https\n")
 
 	// Fix ownership so the container user (dev) owns the files.
 	s.dockerExecRoot(ctx, "chown", "-R", "1000:1000", home+"/.ssh", home+"/.aws",
@@ -586,6 +581,22 @@ func (s *Sandbox) dockerExecRoot(ctx context.Context, args ...string) {
 	_ = cmd.Run()
 }
 
+// dockerWriteFile writes content into the container at path as root.
+// The body is delivered via stdin to `cat`, which keeps it out of the
+// shell command line entirely — unlike a `<< 'EOF'` heredoc, an attacker
+// who controls part of the body cannot smuggle a stray terminator line
+// to break out of the heredoc and execute arbitrary commands as root in
+// the container. path is single-quoted as defense-in-depth even though
+// every current caller passes a constant.
+func (s *Sandbox) dockerWriteFile(ctx context.Context, path, content string) {
+	cmdArgs := []string{"exec", "-i", "--user=root", s.containerID, "sh", "-c", "cat > " + shQuote(path)}
+	cmd := execCommand(ctx, "docker", cmdArgs...)
+	cmd.Stdin = strings.NewReader(content)
+	cmd.Stdout = io.Discard
+	cmd.Stderr = io.Discard
+	_ = cmd.Run()
+}
+
 // Exec runs a command inside the sandbox container and returns the combined output.
 func (s *Sandbox) Exec(ctx context.Context, command []string) ([]byte, error) {
 	args := append([]string{"exec", s.containerID}, command...)
@@ -733,7 +744,7 @@ func (s *Sandbox) WriteProbeScriptsMulti(ctx context.Context, pkgs []string) {
 				p, requires.String(),
 			)
 			filename := "/tmp/_kojuto_probe_all_" + p + ".js"
-			s.dockerExecRoot(ctx, "sh", "-c", "cat > "+filename+" << 'KOJUTO_EOF'\n"+script+"KOJUTO_EOF")
+			s.dockerWriteFile(ctx, filename, script)
 		}
 		return
 	}
@@ -775,7 +786,7 @@ func (s *Sandbox) WriteProbeScriptsMulti(ctx context.Context, pkgs []string) {
 			p.sep, p.pathsep, p.linesep, imports.String(),
 		)
 		filename := "/tmp/_kojuto_probe_all_" + p.sysplatform + ".py"
-		s.dockerExecRoot(ctx, "sh", "-c", "cat > "+filename+" << 'KOJUTO_EOF'\n"+script+"KOJUTO_EOF")
+		s.dockerWriteFile(ctx, filename, script)
 	}
 }
 
@@ -860,7 +871,7 @@ func (s *Sandbox) WriteProbeScripts(ctx context.Context) {
 			p.sep, p.pathsep, p.linesep, importName,
 		)
 		filename := "/tmp/_kojuto_probe_" + p.sysplatform + ".py"
-		s.dockerExecRoot(ctx, "sh", "-c", "cat > "+filename+" << 'KOJUTO_EOF'\n"+script+"KOJUTO_EOF")
+		s.dockerWriteFile(ctx, filename, script)
 	}
 
 	jsPlatforms := []string{"linux", "win32", "darwin"}
@@ -872,7 +883,7 @@ func (s *Sandbox) WriteProbeScripts(ctx context.Context) {
 			p, s.pkg,
 		)
 		filename := "/tmp/_kojuto_probe_" + p + ".js"
-		s.dockerExecRoot(ctx, "sh", "-c", "cat > "+filename+" << 'KOJUTO_EOF'\n"+script+"KOJUTO_EOF")
+		s.dockerWriteFile(ctx, filename, script)
 	}
 }
 
diff --git a/internal/sandbox/sandbox_extra_test.go b/internal/sandbox/sandbox_extra_test.go
index 3b5a143..9735504 100644
--- a/internal/sandbox/sandbox_extra_test.go
+++ b/internal/sandbox/sandbox_extra_test.go
@@ -2,6 +2,8 @@ package sandbox
 
 import (
 	"context"
+	"os/exec"
+	"reflect"
 	"strings"
 	"testing"
 
@@ -539,3 +541,63 @@ func TestFakeTokens_NotHexOnly(t *testing.T) {
 		t.Error("10 consecutive fakeAWSKeyID tokens were all hex-only")
 	}
 }
+
+// TestDockerWriteFile_StructureNoHeredoc pins the security invariant that
+// dockerWriteFile delivers content via stdin (`docker exec -i`) instead of
+// the previous `cat << 'KOJUTO_EOF'` heredoc. The heredoc form was a root-
+// in-container injection sink: an attacker-controlled package name with
+// an embedded "\nKOJUTO_EOF\n<cmd>" sequence used to terminate the
+// heredoc early and run <cmd> as root. Stdin delivery removes the body
+// from the shell command line entirely.
+func TestDockerWriteFile_StructureNoHeredoc(t *testing.T) {
+	var captured []string
+	orig := execCommand
+	execCommand = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+		captured = append([]string{name}, args...)
+		return exec.CommandContext(ctx, "true")
+	}
+	t.Cleanup(func() { execCommand = orig })
+
+	sb := &Sandbox{containerID: "test-container"}
+	sb.dockerWriteFile(context.Background(), "/tmp/probe.js",
+		"any content with KOJUTO_EOF baked in\nstill not parsed by shell")
+
+	want := []string{
+		"docker", "exec", "-i", "--user=root", "test-container",
+		"sh", "-c", "cat > '/tmp/probe.js'",
+	}
+	if !reflect.DeepEqual(captured, want) {
+		t.Fatalf("docker args mismatch:\n got %q\nwant %q", captured, want)
+	}
+
+	for _, a := range captured {
+		if strings.Contains(a, "<<") {
+			t.Errorf("heredoc syntax leaked into arg %q", a)
+		}
+		if strings.Contains(a, "KOJUTO_EOF") {
+			t.Errorf("KOJUTO_EOF terminator leaked into arg %q", a)
+		}
+	}
+}
+
+// TestDockerWriteFile_QuotesPath confirms the path is single-quoted so a
+// future caller passing a path with shell metacharacters cannot break the
+// command. Every current caller passes a constant path; this test pins
+// the defense-in-depth contract for future code.
+func TestDockerWriteFile_QuotesPath(t *testing.T) {
+	var captured []string
+	orig := execCommand
+	execCommand = func(ctx context.Context, name string, args ...string) *exec.Cmd {
+		captured = append([]string{name}, args...)
+		return exec.CommandContext(ctx, "true")
+	}
+	t.Cleanup(func() { execCommand = orig })
+
+	sb := &Sandbox{containerID: "test-container"}
+	sb.dockerWriteFile(context.Background(), "/tmp/$(rm -rf /)/x", "x")
+
+	last := captured[len(captured)-1]
+	if !strings.Contains(last, `'/tmp/$(rm -rf /)/x'`) {
+		t.Errorf("path not single-quoted in shell arg: %q", last)
+	}
+}

From ba9449b66c380bb1609f6e8e63359a5dbc239d56 Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 17:20:59 +0900
Subject: [PATCH 05/11] fix(sandbox): sanitize getHostUsername to match
 getHostHostname
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

getHostUsername read USER/USERNAME/LOGNAME and returned the raw value;
getHostHostname routed os.Hostname through sanitizeDockerArg before
returning. The two are used in symmetric slots —

  s.mountPoint = "/home/" + hostUser + "/projects"
  hostname     = sanitizeDockerArg(hostHostname)

— and mountPoint then flows into a `-v <packageDir>:<mountPoint>:ro`
docker volume spec and a `pip install --no-index --no-deps
--no-build-isolation <mountPoint>/*` shell glob. A USER value
containing `:` would break the volume separator, and one containing
shell metacharacters would derail the glob expansion.

Env vars are normally trusted, so this is not a concrete vulnerability
in any threat model kojuto currently supports — but the inconsistency
between the two helpers was load-bearing only because nobody set USER
to anything weird. Mirroring the hostname sanitizer removes the
implicit trust dependency at zero cost.

Side change: sanitizeDockerArg now takes an explicit fallback string
instead of hardcoding "localhost". The hostname caller passes
"localhost"; the new username caller passes "user". Removes the
hostname-specific bias from a function that is otherwise a generic
charset filter, and lets the test parameterise both fallback paths.

Tests: TestGetHostUsername_Sanitizes pins the new sanitization with
the same metacharacter cases the hostname test already covered, plus
non-ASCII fallback. TestSanitizeDockerArg expanded to exercise
parameterised fallback and the volume-spec / glob metacharacters.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/sandbox/sandbox.go            | 23 +++++++++++-------
 internal/sandbox/sandbox_extra_test.go | 26 ++++++++++++++++++++
 internal/sandbox/sandbox_test.go       | 33 ++++++++++++++++----------
 3 files changed, 62 insertions(+), 20 deletions(-)

diff --git a/internal/sandbox/sandbox.go b/internal/sandbox/sandbox.go
index f12fd3a..f51d4e1 100644
--- a/internal/sandbox/sandbox.go
+++ b/internal/sandbox/sandbox.go
@@ -223,14 +223,17 @@ func (s *Sandbox) containerArgs() ([]string, error) {
 // The result is sanitized to prevent Docker flag injection via hostile hostnames.
 func getHostHostname() string {
 	if h, err := os.Hostname(); err == nil && h != "" {
-		return sanitizeDockerArg(h)
+		return sanitizeDockerArg(h, "localhost")
 	}
 	return "localhost"
 }
 
-// sanitizeDockerArg strips characters that could break Docker CLI arguments.
-// Docker hostnames must match [a-zA-Z0-9][a-zA-Z0-9_.-].
-func sanitizeDockerArg(s string) string {
+// sanitizeDockerArg strips characters that could break Docker CLI
+// arguments. Output is restricted to [a-zA-Z0-9._-] — the intersection
+// of the Docker hostname rule and the chars that are safe to interpolate
+// into shell command lines and -v <src>:<dst> volume specs without
+// quoting. fallback is returned when every byte was stripped.
+func sanitizeDockerArg(s, fallback string) string {
 	var b strings.Builder
 	for _, c := range s {
 		if (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '-' || c == '.' || c == '_' {
@@ -238,7 +241,7 @@ func sanitizeDockerArg(s string) string {
 		}
 	}
 	if b.Len() == 0 {
-		return "localhost"
+		return fallback
 	}
 	return b.String()
 }
@@ -285,12 +288,16 @@ func getHostResources() (cpus, memory string) {
 	return cpus, memory
 }
 
-// getHostUsername returns the current OS username.
+// getHostUsername returns the current OS username, sanitized to the same
+// charset as getHostHostname. The result flows into mountPoint =
+// "/home/<user>/projects", which is interpolated into a -v <src>:<dst>:ro
+// volume spec and a `pip install ... <mountPoint>/*` shell glob. Even
+// though env vars are normally trusted, mirroring the hostname sanitizer
+// removes any case where a future runner sets USER to something exotic.
 func getHostUsername() string {
-	// Try common env vars (works on Linux, macOS, Windows).
 	for _, key := range []string{"USER", "USERNAME", "LOGNAME"} {
 		if u := os.Getenv(key); u != "" {
-			return u
+			return sanitizeDockerArg(u, "user")
 		}
 	}
 	return "user"
diff --git a/internal/sandbox/sandbox_extra_test.go b/internal/sandbox/sandbox_extra_test.go
index 9735504..8c60b13 100644
--- a/internal/sandbox/sandbox_extra_test.go
+++ b/internal/sandbox/sandbox_extra_test.go
@@ -176,6 +176,32 @@ func TestGetHostUsername(t *testing.T) {
 	}
 }
 
+// TestGetHostUsername_Sanitizes pins that an exotic USER value (shell
+// metacharacters, path separators, non-ASCII) is normalised before it
+// flows into mountPoint and from there into -v volume specs and shell
+// command lines.
+func TestGetHostUsername_Sanitizes(t *testing.T) {
+	cases := []struct {
+		userEnv, want string
+	}{
+		{"alice", "alice"},
+		{"a;rm -rf /", "arm-rf"},
+		{"$(id)", "id"},
+		{"a:b", "ab"},
+		{"a/b", "ab"},
+		{"日本語", "user"},
+	}
+
+	for _, tc := range cases {
+		t.Setenv("USER", tc.userEnv)
+		t.Setenv("USERNAME", "")
+		t.Setenv("LOGNAME", "")
+		if got := getHostUsername(); got != tc.want {
+			t.Errorf("USER=%q: getHostUsername() = %q, want %q", tc.userEnv, got, tc.want)
+		}
+	}
+}
+
 func TestGetHostResources(t *testing.T) {
 	cpus, mem := getHostResources()
 
diff --git a/internal/sandbox/sandbox_test.go b/internal/sandbox/sandbox_test.go
index b95f325..1a9779f 100644
--- a/internal/sandbox/sandbox_test.go
+++ b/internal/sandbox/sandbox_test.go
@@ -128,23 +128,32 @@ func TestHoneypotEnvVars(t *testing.T) {
 
 func TestSanitizeDockerArg(t *testing.T) {
 	cases := []struct {
-		input string
-		want  string
+		input    string
+		fallback string
+		want     string
 	}{
-		{"my-host", "my-host"},
-		{"host.local", "host.local"},
-		{"host name", "hostname"},
-		{"--inject", "--inject"},
-		{"$(evil)", "evil"},
-		{"", "localhost"},
-		{"日本語ホスト", "localhost"},
-		{"valid_host-123.local", "valid_host-123.local"},
+		{"my-host", "localhost", "my-host"},
+		{"host.local", "localhost", "host.local"},
+		{"host name", "localhost", "hostname"},
+		{"--inject", "localhost", "--inject"},
+		{"$(evil)", "localhost", "evil"},
+		{"", "localhost", "localhost"},
+		{"日本語ホスト", "localhost", "localhost"},
+		{"valid_host-123.local", "localhost", "valid_host-123.local"},
+		// Verify the fallback is parameterised, not hardcoded to "localhost".
+		{"", "user", "user"},
+		{"$(evil)", "user", "evil"},
+		{"日本語", "user", "user"},
+		// Shell / volume-spec metacharacters get stripped.
+		{"a;rm -rf /", "user", "arm-rf"},
+		{"a/b", "user", "ab"},
+		{"a:b", "user", "ab"},
 	}
 
 	for _, tc := range cases {
-		got := sanitizeDockerArg(tc.input)
+		got := sanitizeDockerArg(tc.input, tc.fallback)
 		if got != tc.want {
-			t.Errorf("sanitizeDockerArg(%q) = %q, want %q", tc.input, got, tc.want)
+			t.Errorf("sanitizeDockerArg(%q, %q) = %q, want %q", tc.input, tc.fallback, got, tc.want)
 		}
 	}
 }

From 9fc198ff632983b4569c69940c25b7d73aed9c2e Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 17:32:13 +0900
Subject: [PATCH 06/11] fix(config): warn on stderr when lenient mode honors
 kojuto.yml excludes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`--strict` defaults to false on the CLI but true in the GitHub Action.
Local interactive runs therefore silently honor `sensitive_paths.exclude`
in the cwd's kojuto.yml. An untrusted repo (or extracted tarball) can
plant a kojuto.yml that shrinks the detection surface without the user
noticing — a "clean" verdict from such a run is not actually clean for
the excluded categories.

`--strict` already exists as the right answer for that threat, but the
existing UX only surfaces exclusion activity when --strict is ON
(printing the paths it's overriding). The lenient path was silent.

Now preRunLoadConfig emits a stderr warning whenever exclusions are
honored, listing the count and the actual paths and pointing at
--strict. The honoring behavior is unchanged — this is purely a
visibility fix so an attacker-planted exclude appears in CI logs and
interactive output before the verdict is trusted.

Test: TestPreRunLoadConfig_LenientWarnsOnExclude captures stderr
across a temp config with two excluded paths and asserts the warning
contains the count, both paths, and the --strict pointer.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 cmd/root.go      | 19 +++++++++++++++----
 cmd/root_test.go | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index 270fadc..713cbf3 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -165,10 +165,21 @@ func preRunLoadConfig(_ *cobra.Command, _ []string) error {
 		return fmt.Errorf("loading config %s: %w", cfgPath, err)
 	}
 
-	if flagStrict && len(cfg.SensitivePaths.Exclude) > 0 {
-		fmt.Fprintf(os.Stderr, "warning: --strict ignoring %d excluded path(s) from config: %v\n",
-			len(cfg.SensitivePaths.Exclude), cfg.SensitivePaths.Exclude)
-		cfg.SensitivePaths.Exclude = nil
+	if len(cfg.SensitivePaths.Exclude) > 0 {
+		if flagStrict {
+			fmt.Fprintf(os.Stderr, "warning: --strict ignoring %d excluded path(s) from config: %v\n",
+				len(cfg.SensitivePaths.Exclude), cfg.SensitivePaths.Exclude)
+			cfg.SensitivePaths.Exclude = nil
+		} else {
+			// Without --strict, an attacker-planted or otherwise
+			// untrusted kojuto.yml in the cwd silently shrinks the
+			// detection surface. Surface the exclusions on stderr so
+			// the user can see them in CI logs and interactive output
+			// before trusting a "clean" verdict.
+			fmt.Fprintf(os.Stderr, "warning: %s excludes %d sensitive path(s) from monitoring: %v\n",
+				cfgPath, len(cfg.SensitivePaths.Exclude), cfg.SensitivePaths.Exclude)
+			fmt.Fprintln(os.Stderr, "         pass --strict to ignore these exclusions")
+		}
 	}
 
 	paths := config.MergeSensitivePaths(cfg)
diff --git a/cmd/root_test.go b/cmd/root_test.go
index 7786b62..ec1cfcb 100644
--- a/cmd/root_test.go
+++ b/cmd/root_test.go
@@ -634,6 +634,55 @@ func TestPreRunLoadConfig_StrictIgnoresExclude(t *testing.T) {
 	}
 }
 
+// TestPreRunLoadConfig_LenientWarnsOnExclude pins the visibility
+// guarantee added for surface C: when --strict is OFF and the loaded
+// kojuto.yml carries `exclude` entries, a stderr warning surfaces the
+// reduction in coverage so a CI log or interactive run cannot silently
+// trust a "clean" verdict produced under a tampered config. Without
+// this, an attacker who plants kojuto.yml in the cwd of a target repo
+// can shrink the detection surface invisibly.
+func TestPreRunLoadConfig_LenientWarnsOnExclude(t *testing.T) {
+	origConfig := flagConfig
+	origStrict := flagStrict
+	defer func() { flagConfig = origConfig; flagStrict = origStrict }()
+
+	dir := t.TempDir()
+	cfgPath := filepath.Join(dir, "kojuto.yml")
+	if err := os.WriteFile(cfgPath, []byte("sensitive_paths:\n  exclude:\n    - \"/.ssh/\"\n    - \"/.aws/\"\n"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	flagConfig = cfgPath
+	flagStrict = false
+
+	oldStderr := os.Stderr
+	r, w, err := os.Pipe()
+	if err != nil {
+		t.Fatal(err)
+	}
+	os.Stderr = w
+	t.Cleanup(func() { os.Stderr = oldStderr })
+
+	runErr := preRunLoadConfig(nil, nil)
+	w.Close()
+
+	data, _ := io.ReadAll(r)
+	got := string(data)
+
+	if runErr != nil {
+		t.Fatalf("preRunLoadConfig returned error: %v", runErr)
+	}
+	if !strings.Contains(got, "excludes 2 sensitive path(s)") {
+		t.Errorf("stderr missing exclusion-count warning, got:\n%s", got)
+	}
+	if !strings.Contains(got, "/.ssh/") || !strings.Contains(got, "/.aws/") {
+		t.Errorf("stderr missing the actual excluded paths, got:\n%s", got)
+	}
+	if !strings.Contains(got, "--strict") {
+		t.Errorf("stderr missing pointer to --strict flag, got:\n%s", got)
+	}
+}
+
 func TestPreRunLoadConfig_InvalidConfig(t *testing.T) {
 	origConfig := flagConfig
 	origStrict := flagStrict

From f62339195d3a2abaf239ec56ef1bb558c702b012 Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 17:39:39 +0900
Subject: [PATCH 07/11] fix(report,hooks): escape control bytes in
 attacker-controllable strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A malicious package could embed ANSI escape sequences (or any C0
control byte) into source compiled via `compile()` or `exec()`. The
audit hook captured the snippet via repr() / str() and emitted it on
the wire; the Go side put it in `code_snippet` and json.Marshal
encoded it as `...`. Structurally the JSON was fine, but a
common downstream consumer pattern is `jq -r .events[].code_snippet`,
which decodes the JSON string literally — handing a real ESC byte to
the user's terminal. The attacker could then clear the screen and
paint a fake "verdict: clean" line in green, hiding the real
suspicious verdict.

Same vector exists for `comm` (a malicious process can `prctl(PR_SET_
NAME, "evil\x1b[31m")` with control bytes) and any future attacker-
reachable string field.

Two-layer fix:

1. sitecustomize.py's _t() now escapes every C0 control byte and DEL
   (0x00-0x1f, 0x7f) to a printable `\xNN` form, with the conventional
   \n / \r / \t shorthand for the common cases. The wire-side _w()
   already strips line terminators for framing safety; this extends
   the same idea to the snippet field's content for raw-consumer
   safety. Closes the gap at the source.

2. report.WriteJSON now walks every event and runs each string field
   through sanitizeControl before encoding. Reflection picks up every
   string field of types.SyscallEvent so future fields are covered
   automatically. kojuto-generated strings (Syscall, Category, Phase,
   etc.) never contain control bytes, so the pass is a no-op for them.

Tests:

- TestSanitizeControl exercises the helper across plain ASCII, UTF-8
  multibyte, individual control bytes, and a mixed-byte vector.
- TestWriteJSON_StripsAnsiFromAttackerFields plants control bytes in
  CodeSnippet, Comm, Cmdline, and FilePath; round-trips through
  WriteJSON + json.Unmarshal; asserts no decoded field contains a raw
  byte below 0x20 or 0x7f, and that the on-the-wire JSON contains a
  printable backslash-escaped form for the ESC byte.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/report/report.go               | 71 ++++++++++++++++++++++-
 internal/report/report_test.go          | 76 +++++++++++++++++++++++++
 internal/sandbox/hooks/sitecustomize.py | 22 ++++++-
 3 files changed, 167 insertions(+), 2 deletions(-)

diff --git a/internal/report/report.go b/internal/report/report.go
index daae717..acb9fc7 100644
--- a/internal/report/report.go
+++ b/internal/report/report.go
@@ -4,6 +4,8 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"reflect"
+	"strings"
 	"time"
 
 	"github.com/RalianENG/kojuto/internal/types"
@@ -29,8 +31,19 @@ func Generate(pkg, version, ecosystem, verdict, probeMethod string, events []typ
 	}
 }
 
-// WriteJSON writes the report as indented JSON to w.
+// WriteJSON writes the report as indented JSON to w. Every string field
+// of every event is run through sanitizeControl first so a downstream
+// consumer that decodes the JSON and prints a string field raw (e.g.
+// `jq -r .events[].code_snippet`) cannot be fed an attacker-controlled
+// ANSI escape sequence smuggled in via a malicious package's audit-hook
+// snippet, prctl-set comm name, or future attacker-reachable string
+// slot. The Python audit hook performs the same escape on its own side;
+// this is the host-side belt-and-braces guard.
 func WriteJSON(r *types.Report, w io.Writer) error {
+	for i := range r.Events {
+		sanitizeEventStrings(&r.Events[i])
+	}
+
 	enc := json.NewEncoder(w)
 	enc.SetIndent("", "  ")
 
@@ -40,3 +53,59 @@ func WriteJSON(r *types.Report, w io.Writer) error {
 
 	return nil
 }
+
+// sanitizeEventStrings rewrites every string field of e in place so any
+// C0 control byte (0x00-0x1f) or DEL (0x7f) that an attacker placed in
+// an event field is rendered as a printable `\xNN` escape rather than
+// passing through to the report consumer's terminal.
+//
+// Reflection is used so future string fields added to SyscallEvent are
+// covered automatically. Non-string fields are skipped. Fields kojuto
+// itself populates (Syscall, Category, Phase, etc.) are no-ops because
+// their values never contain control bytes.
+func sanitizeEventStrings(e *types.SyscallEvent) {
+	v := reflect.ValueOf(e).Elem()
+	for i := 0; i < v.NumField(); i++ {
+		f := v.Field(i)
+		if f.Kind() == reflect.String && f.CanSet() {
+			f.SetString(sanitizeControl(f.String()))
+		}
+	}
+}
+
+// sanitizeControl escapes C0 control bytes and DEL to printable forms.
+// Common whitespace bytes use the conventional \n / \r / \t shorthand;
+// all others render as \xNN. Any other byte (printable ASCII, UTF-8
+// multibyte, etc.) passes through unchanged.
+func sanitizeControl(s string) string {
+	if !needsControlEscape(s) {
+		return s
+	}
+	var b strings.Builder
+	b.Grow(len(s))
+	for _, r := range s {
+		switch {
+		case r == '\n':
+			b.WriteString(`\n`)
+		case r == '\r':
+			b.WriteString(`\r`)
+		case r == '\t':
+			b.WriteString(`\t`)
+		case r < 0x20 || r == 0x7f:
+			fmt.Fprintf(&b, `\x%02x`, r)
+		default:
+			b.WriteRune(r)
+		}
+	}
+	return b.String()
+}
+
+func needsControlEscape(s string) bool {
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c < 0x20 || c == 0x7f {
+			return true
+		}
+	}
+	return false
+}
diff --git a/internal/report/report_test.go b/internal/report/report_test.go
index c757610..88f73d0 100644
--- a/internal/report/report_test.go
+++ b/internal/report/report_test.go
@@ -85,3 +85,79 @@ func TestWriteJSON(t *testing.T) {
 		t.Errorf("expected clean, got %s", decoded.Verdict)
 	}
 }
+
+func TestSanitizeControl(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		{"plain ascii", "plain ascii"},
+		{"日本語", "日本語"},
+		{"with\nnewline", `with\nnewline`},
+		{"with\ttab", `with\ttab`},
+		{"esc\x1b[2J", `esc\x1b[2J`},
+		{"\x00null", `\x00null`},
+		{"\x07bell", `\x07bell`},
+		{"del\x7f", `del\x7f`},
+		{"all\x00\x01\x1b\x7fclean", `all\x00\x01\x1b\x7fclean`},
+	}
+	for _, tc := range cases {
+		if got := sanitizeControl(tc.in); got != tc.want {
+			t.Errorf("sanitizeControl(%q) = %q, want %q", tc.in, got, tc.want)
+		}
+	}
+}
+
+// TestWriteJSON_StripsAnsiFromAttackerFields pins the security
+// invariant: any attacker-controlled string field (CodeSnippet, Comm,
+// Cmdline, etc.) carrying ANSI escape sequences is rendered into the
+// JSON as a printable backslash escape, so a downstream `jq -r` style
+// raw-string consumer cannot be fed terminal control bytes.
+func TestWriteJSON_StripsAnsiFromAttackerFields(t *testing.T) {
+	events := []types.SyscallEvent{
+		{
+			Timestamp:   time.Now(),
+			PID:         1,
+			Syscall:     types.EventDynamicExec,
+			AuditEvent:  "exec",
+			CodeSnippet: "payload\x1b[2J\x1b[Hverdict: clean",
+			Comm:        "evil\x1b[31m",
+			Cmdline:     "sh\x07-c\x00boom",
+			FilePath:    "/tmp/\x1bweird",
+		},
+	}
+	r := Generate("badpkg", "1.0.0", types.EcosystemPyPI, types.VerdictSuspicious, "ebpf", events, 0, 0, nil)
+
+	var buf bytes.Buffer
+	if err := WriteJSON(&r, &buf); err != nil {
+		t.Fatalf("WriteJSON failed: %v", err)
+	}
+
+	// The serialised JSON must not contain a literal ESC byte (0x1b)
+	// or BEL/NUL anywhere, even after JSON decoding.
+	var decoded types.Report
+	if err := json.Unmarshal(buf.Bytes(), &decoded); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+
+	for _, e := range decoded.Events {
+		fields := []struct{ name, val string }{
+			{"CodeSnippet", e.CodeSnippet},
+			{"Comm", e.Comm},
+			{"Cmdline", e.Cmdline},
+			{"FilePath", e.FilePath},
+		}
+		for _, f := range fields {
+			for i := 0; i < len(f.val); i++ {
+				c := f.val[i]
+				if c < 0x20 || c == 0x7f {
+					t.Errorf("%s contains raw control byte 0x%02x: %q", f.name, c, f.val)
+					break
+				}
+			}
+		}
+		// Spot-check the encoded form is human-readable.
+		if !bytes.Contains(buf.Bytes(), []byte(`\\x1b`)) {
+			t.Errorf("expected printable backslash-escaped ESC in JSON output, got:\n%s", buf.String())
+		}
+	}
+}
diff --git a/internal/sandbox/hooks/sitecustomize.py b/internal/sandbox/hooks/sitecustomize.py
index 295a96c..a1924f7 100644
--- a/internal/sandbox/hooks/sitecustomize.py
+++ b/internal/sandbox/hooks/sitecustomize.py
@@ -41,7 +41,27 @@ def _is_user(fn):
 
 
 def _t(s):
-    s = str(s).replace("\n", "\\n").replace("\r", "\\r")
+    # Escape every C0 control byte (0x00-0x1f) and DEL (0x7f) to a
+    # printable form. The wire format only needs newline-stripping for
+    # framing safety (handled in _w too), but the snippet field
+    # eventually surfaces in report.json's code_snippet — a downstream
+    # consumer that decodes the JSON and prints the raw string (e.g.
+    # `jq -r .events[].code_snippet`) used to receive an unescaped
+    # ESC (0x1b) byte and let an attacker-supplied payload paint
+    # arbitrary text on the user's terminal via ANSI sequences.
+    out = []
+    for ch in str(s):
+        if ch == "\n":
+            out.append("\\n")
+        elif ch == "\r":
+            out.append("\\r")
+        elif ch == "\t":
+            out.append("\\t")
+        elif ord(ch) < 0x20 or ord(ch) == 0x7f:
+            out.append("\\x%02x" % ord(ch))
+        else:
+            out.append(ch)
+    s = "".join(out)
     if len(s) > _MAX:
         return s[:_MAX] + "..."
     return s

From 09f223ffca4be085d656b999495633cb917588da Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 17:47:23 +0900
Subject: [PATCH 08/11] fix(cmd): owner-only mode for scan report and pinned
 manifest outputs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gosec G306 flagged three output writers using 0o644 (or os.Create's
default 0o666 then umask-clipped). All three carry data that
shouldn't leak to other accounts on a multi-user host:

- openOutput / -o report.json: events embed attacker-supplied code
  snippets, dst addrs, file paths, and the full dependency tree of
  the scanned package — anything a malicious package touches at
  install/import time. Switched os.Create to os.OpenFile with an
  explicit 0o600 so the report is owner-only by default; users who
  want to share can chmod after the fact.

- writePinnedPyPI / writePinnedNpm: --pin output captures the
  resolved manifest from the last clean scan. Internal/private
  registry deps and version metadata can be load-bearing for
  inferring the user's stack, so default 0o600 too.

The other gosec G306 hits (sandbox/staging package.json files,
seccomp.json, the in-flight tarball copy) all stay at world-readable
intentionally — the docker daemon and the in-container `dev` user
need to read them, and the tests would silently miss every install-
phase event without the wider mode. Those sites are out of scope
here.

Tests: TestOpenOutput_File and a new TestOutputFiles_OwnerOnly
syscall.Umask(0) the test process and stat the resulting files,
asserting Mode().Perm() == 0o600.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 cmd/root.go      | 19 +++++++++++++++---
 cmd/root_test.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index 713cbf3..1842100 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -581,7 +581,12 @@ func writePinnedPyPI(path string, deps []pinnedDep) error {
 			fmt.Fprintf(&b, "%s\n", dep.Name)
 		}
 	}
-	return os.WriteFile(path, []byte(b.String()), 0o644)
+	// 0o600 — pinned files can carry private package names (internal
+	// registry deps) and embedded version metadata; default to user-
+	// only readable so a multi-user host doesn't leak the manifest to
+	// other accounts. Caller can chmod after the fact if they want to
+	// share.
+	return os.WriteFile(path, []byte(b.String()), 0o600)
 }
 
 func writePinnedNpm(path string, deps []pinnedDep) error {
@@ -606,7 +611,9 @@ func writePinnedNpm(path string, deps []pinnedDep) error {
 	}
 	jsonBytes = append(jsonBytes, '\n')
 
-	return os.WriteFile(path, jsonBytes, 0o644)
+	// 0o600 for the same reason as writePinnedPyPI — keep the pinned
+	// manifest from leaking to other users on a shared host.
+	return os.WriteFile(path, jsonBytes, 0o600)
 }
 
 // runLocalScan scans a local package file (.whl, .tgz) or directory.
@@ -1137,7 +1144,13 @@ func openOutput() (*os.File, error) {
 		return os.Stdout, nil
 	}
 
-	f, err := os.Create(flagOutput)
+	// 0o600 — the report can carry attacker-supplied code snippets,
+	// inferred file paths, and any internal package names from the
+	// scanned dependency tree. os.Create defaults to 0o666 (then
+	// umask-clipped, typically 0o644), which would leak the report
+	// to other users on a multi-user host. Owner-only by default;
+	// users who explicitly want to share can chmod after the fact.
+	f, err := os.OpenFile(flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
 	if err != nil {
 		return nil, fmt.Errorf("creating output file: %w", err)
 	}
diff --git a/cmd/root_test.go b/cmd/root_test.go
index ec1cfcb..678d118 100644
--- a/cmd/root_test.go
+++ b/cmd/root_test.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"syscall"
 	"testing"
 
 	"github.com/RalianENG/kojuto/internal/types"
@@ -350,8 +351,53 @@ func TestOpenOutput_File(t *testing.T) {
 	}
 
 	// Verify the file was actually created.
-	if _, err := os.Stat(flagOutput); err != nil {
-		t.Errorf("output file was not created: %v", err)
+	info, err := os.Stat(flagOutput)
+	if err != nil {
+		t.Fatalf("output file was not created: %v", err)
+	}
+	// Owner-only mode: gosec G306. Reports embed attacker-supplied
+	// snippets / paths that shouldn't leak to other users on a shared
+	// host. Test under a known umask so we measure the requested mode
+	// rather than the system umask's clipping.
+	oldUmask := syscall.Umask(0)
+	defer syscall.Umask(oldUmask)
+	if mode := info.Mode().Perm(); mode != 0o600 {
+		t.Errorf("output file mode = %o, want 0o600", mode)
+	}
+}
+
+// TestOutputFiles_OwnerOnly pins the 0o600 contract for both pinned-
+// manifest writers as well — they share the same threat model as the
+// main report (multi-user host, package metadata leakage).
+func TestOutputFiles_OwnerOnly(t *testing.T) {
+	oldUmask := syscall.Umask(0)
+	defer syscall.Umask(oldUmask)
+
+	dir := t.TempDir()
+	deps := []pinnedDep{{Name: "lodash", Version: "4.17.21"}}
+
+	pyPath := filepath.Join(dir, "requirements.txt")
+	if err := writePinnedPyPI(pyPath, deps); err != nil {
+		t.Fatalf("writePinnedPyPI: %v", err)
+	}
+	pyInfo, err := os.Stat(pyPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mode := pyInfo.Mode().Perm(); mode != 0o600 {
+		t.Errorf("pinned requirements.txt mode = %o, want 0o600", mode)
+	}
+
+	npmPath := filepath.Join(dir, "package.json")
+	if err := writePinnedNpm(npmPath, deps); err != nil {
+		t.Fatalf("writePinnedNpm: %v", err)
+	}
+	npmInfo, err := os.Stat(npmPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mode := npmInfo.Mode().Perm(); mode != 0o600 {
+		t.Errorf("pinned package.json mode = %o, want 0o600", mode)
 	}
 }
 

From 64788b29c49d670eecad13a989ea651364c753d6 Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 18:15:17 +0900
Subject: [PATCH 09/11] fix(probe,analyzer): close three detection-bypass paths
 in strace + eBPF
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broad re-audit (post-PR-#18 sweep) surfaced three concrete bypasses
where a malicious package could escape kojuto's monitoring without
tripping any current detector. All three are now closed; each ships
with a regression test pinning the security invariant.

1. strace stderr scanner used bufio.Scanner with the default 64 KiB
   line cap and never checked scanner.Err(). The docker-exec stderr
   is a multiplex of strace trace lines AND the tracee's own writes
   to fd 2, so a single `process.stderr.write("A".repeat(70000))` in
   a preinstall script tripped ErrTooLong, the for-loop exited, and
   every subsequent rename / openat / connect / execve recorded by
   strace was silently dropped — yielding a CLEAN verdict on a
   package that did anything observable after the long write.

   Both call sites (container_strace.parseStraceOutput and
   fallback.StartWithPID) now `Buffer()` to a 16 MiB cap and treat
   any post-loop scanner.Err() as `dropped++`. The dropped counter
   already converts the verdict to inconclusive in cmd/root, so the
   bypass is closed without changing the verdict logic.

2. straceRenameRe / straceRenameatRe used `"([^"]+)"` to capture
   filenames, which fails on strace's C-style backslash-escaped
   quotes. A package that renames `/tmp/x"y` (a legal Unix filename
   containing `"`) over `/usr/local/bin/python3` produced an strace
   line `rename("/tmp/x\"y", "/usr/local/bin/python3") = 0` that the
   regex did not match — no event reached the analyzer, the binary
   hijack category never fired, and a subsequent execve of the
   hijacked path was filtered as benign by `isBenignExec`.

   The capture is now `((?:\\.|[^"\\])+)` for both `rename` and
   `renameat[2]` paths.

3. The eBPF vfs_rename probe writes `d_name` (a qstr basename) into
   evt.path / evt.path2, so eBPF mode delivers a rename event with
   `DstPath = "python3"` and no parent directory. `isBenignRename`
   computed `destDir = path.Dir("python3") + "/" = "./"` which never
   matched the allowed dirs (e.g. `/usr/local/bin/`) — the function
   fell through to `return true` and treated every rename as benign.
   Result: in `--probe-method=ebpf`, NO rename was ever flagged,
   regardless of its target.

   `isBenignRename` now fail-safes when DstPath has no leading "/":
   a basename matching a tracked binary stays suspicious. Strace
   mode emits absolute paths and is unaffected.

Tests:

- TestParseStraceOutput_LongLineFlipsToInconclusive feeds a
  straceMaxLine+1024-byte single line via a chunkReader and asserts
  cs.dropped > 0 after parseStraceOutput returns.
- TestParseStraceLine_RenameWithEscapedQuotes exercises rename and
  renameat lines whose src/dst contain `\"`, asserting the event is
  produced with the expected SrcPath/DstPath.
- TestAnalyze_RenameTrustedBinary gains three basename-only sub-cases
  covering python3/sh as suspicious and an unknown name as clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/analyzer/analyzer.go             | 26 +++++++++---
 internal/analyzer/analyzer_test.go        | 10 +++++
 internal/probe/container_strace.go        | 21 +++++++++
 internal/probe/container_strace_test.go   | 52 +++++++++++++++++++++++
 internal/probe/fallback.go                |  9 ++++
 internal/probe/strace_parse.go            | 10 ++++-
 internal/probe/strace_parse_extra_test.go | 52 +++++++++++++++++++++++
 7 files changed, 172 insertions(+), 8 deletions(-)

diff --git a/internal/analyzer/analyzer.go b/internal/analyzer/analyzer.go
index c5a27de..9ee4392 100644
--- a/internal/analyzer/analyzer.go
+++ b/internal/analyzer/analyzer.go
@@ -942,13 +942,27 @@ var fileOpCommands = map[string]bool{
 // Renames to other destinations (e.g. pip installing a new CLI script) are benign.
 func isBenignRename(evt *types.SyscallEvent) bool {
 	destBase := path.Base(evt.DstPath)
-	destDir := path.Dir(evt.DstPath) + "/"
+	allowedDirs, ok := benignPaths[destBase]
+	if !ok {
+		return true
+	}
 
-	if allowedDirs, ok := benignPaths[destBase]; ok {
-		for _, d := range allowedDirs {
-			if destDir == d {
-				return false
-			}
+	// Basename-only DstPath (no leading "/") means the probe captured
+	// only the dentry name and not the parent directory — the eBPF
+	// probe takes this shape because vfs_rename's renamedata gives us
+	// d_name (a qstr basename) rather than a full path. Without the
+	// parent we cannot prove the rename targets a trusted directory,
+	// so fail-safe: a rename whose basename matches a tracked binary
+	// (python3, node, sh, ...) stays suspicious. Strace mode emits
+	// absolute paths and falls through to the directory check below.
+	if !strings.HasPrefix(evt.DstPath, "/") {
+		return false
+	}
+
+	destDir := path.Dir(evt.DstPath) + "/"
+	for _, d := range allowedDirs {
+		if destDir == d {
+			return false
 		}
 	}
 
diff --git a/internal/analyzer/analyzer_test.go b/internal/analyzer/analyzer_test.go
index 047ad55..50aac42 100644
--- a/internal/analyzer/analyzer_test.go
+++ b/internal/analyzer/analyzer_test.go
@@ -279,6 +279,16 @@ func TestAnalyze_RenameTrustedBinary(t *testing.T) {
 		{"new CLI script", "/usr/local/bin/my-tool", types.VerdictClean},
 		{"install dir", "/install/lib/module.so", types.VerdictClean},
 		{"tmp rename", "/tmp/a", types.VerdictClean},
+		// Basename-only DstPath: the eBPF probe captures only the
+		// dentry name from vfs_rename's renamedata, not the parent
+		// path. Without the parent we can't verify the rename is in
+		// a trusted dir, so a basename matching a system binary must
+		// stay suspicious. Otherwise an eBPF-mode scan of a malware
+		// sample that renames over /usr/local/bin/python3 silently
+		// reports CLEAN.
+		{"basename-only python3 (eBPF mode)", "python3", types.VerdictSuspicious},
+		{"basename-only sh (eBPF mode)", "sh", types.VerdictSuspicious},
+		{"basename-only unknown (eBPF mode)", "myapp", types.VerdictClean},
 	}
 
 	for _, tc := range cases {
diff --git a/internal/probe/container_strace.go b/internal/probe/container_strace.go
index f5655da..b50fc5e 100644
--- a/internal/probe/container_strace.go
+++ b/internal/probe/container_strace.go
@@ -6,11 +6,21 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"os"
 	"os/exec"
 
 	"github.com/RalianENG/kojuto/internal/types"
 )
 
+// straceMaxLine bounds the size of a single strace stderr line. With
+// `-s 256` the per-arg cap is 256 bytes, so realistic execve+argv lines
+// stay well under 1 MiB. We allow 16 MiB to accommodate exotic kernels
+// or tracee writes interleaved on the shared docker exec stderr; lines
+// longer than this are treated as adversarial (a malicious package
+// trying to overflow bufio.Scanner's default 64 KiB cap so the scan
+// loop exits and subsequent strace events are silently dropped).
+const straceMaxLine = 16 * 1024 * 1024
+
 // ContainerStrace monitors connect(2) syscalls by running strace inside the Docker container.
 // This works on all platforms where Docker is available (Linux, macOS, Windows).
 // ContainerStrace monitors connect(2) syscalls by running strace inside the Docker container.
@@ -96,6 +106,7 @@ func (c *ContainerStrace) parseStraceOutput(stderr io.ReadCloser, done chan<- st
 
 	state := NewParseState()
 	scanner := bufio.NewScanner(stderr)
+	scanner.Buffer(make([]byte, 64*1024), straceMaxLine)
 	for scanner.Scan() {
 		evt, ok := parseStraceLine(scanner.Text(), state)
 		if !ok {
@@ -112,6 +123,16 @@ func (c *ContainerStrace) parseStraceOutput(stderr io.ReadCloser, done chan<- st
 			c.dropped++
 		}
 	}
+	if err := scanner.Err(); err != nil {
+		// bufio.ErrTooLong here means the tracee wrote a >16 MiB
+		// chunk to the shared docker-exec stderr without a newline,
+		// most plausibly to disable parsing of subsequent strace
+		// trace lines. Any other error means the pipe died early.
+		// Either way we lost an unknown number of events, so flip
+		// the verdict to inconclusive via the dropped counter.
+		c.dropped++
+		fmt.Fprintf(os.Stderr, "warning: strace stderr scanner aborted: %v\n", err)
+	}
 }
 
 // drainReader reads from r up to 10MB and returns the content.
diff --git a/internal/probe/container_strace_test.go b/internal/probe/container_strace_test.go
index 92e36e3..77de5e2 100644
--- a/internal/probe/container_strace_test.go
+++ b/internal/probe/container_strace_test.go
@@ -265,3 +265,55 @@ func TestParseStraceOutput_Done(_ *testing.T) {
 
 	<-parseDone // should complete without hanging
 }
+
+// chunkReader returns the same chunk repeatedly until `remaining` bytes
+// have been served, then EOF. Used to feed bufio.Scanner a giant single
+// "line" without allocating it all up front.
+type chunkReader struct {
+	chunk     []byte
+	remaining int
+}
+
+func (r *chunkReader) Read(p []byte) (int, error) {
+	if r.remaining <= 0 {
+		return 0, io.EOF
+	}
+	n := copy(p, r.chunk)
+	if n > r.remaining {
+		n = r.remaining
+	}
+	r.remaining -= n
+	return n, nil
+}
+
+func (r *chunkReader) Close() error { return nil }
+
+// TestParseStraceOutput_LongLineFlipsToInconclusive pins the security
+// fix for the prior detection-bypass: a malicious package could write
+// >64 KiB without a newline to the shared docker-exec stderr (which
+// carries strace trace output too), trip bufio.Scanner's default cap,
+// silently terminate the parser loop, and let every subsequent strace
+// event evade the analyzer. The fix raises the cap to straceMaxLine
+// and turns any scanner error into a `dropped++` so the verdict logic
+// flips to inconclusive instead of trusting the truncated trace.
+func TestParseStraceOutput_LongLineFlipsToInconclusive(t *testing.T) {
+	// Slightly more than straceMaxLine, no newlines, so bufio.Scanner
+	// returns ErrTooLong. Use a chunkReader to avoid allocating the
+	// full payload — straceMaxLine is 16 MiB.
+	reader := &chunkReader{
+		chunk:     bytes.Repeat([]byte{'A'}, 4096),
+		remaining: straceMaxLine + 1024,
+	}
+
+	cs := &ContainerStrace{
+		events: make(chan types.SyscallEvent, 16),
+		done:   make(chan struct{}),
+	}
+	parseDone := make(chan struct{})
+	go cs.parseStraceOutput(reader, parseDone)
+	<-parseDone
+
+	if cs.dropped == 0 {
+		t.Error("expected dropped > 0 after scanner overflow, got 0")
+	}
+}
diff --git a/internal/probe/fallback.go b/internal/probe/fallback.go
index 3428967..da1633f 100644
--- a/internal/probe/fallback.go
+++ b/internal/probe/fallback.go
@@ -6,6 +6,7 @@ import (
 	"bufio"
 	"errors"
 	"fmt"
+	"os"
 	"os/exec"
 	"strconv"
 	"sync"
@@ -59,6 +60,7 @@ func (s *StraceFallback) StartWithPID(pid uint32) error {
 		defer close(s.events)
 		state := NewParseState()
 		scanner := bufio.NewScanner(stderr)
+		scanner.Buffer(make([]byte, 64*1024), straceMaxLine)
 		for scanner.Scan() {
 			evt, ok := parseStraceLine(scanner.Text(), state)
 			if ok {
@@ -71,6 +73,13 @@ func (s *StraceFallback) StartWithPID(pid uint32) error {
 				}
 			}
 		}
+		if err := scanner.Err(); err != nil {
+			// See straceMaxLine: an unrecoverable scanner failure
+			// here means subsequent trace lines are lost. Flip to
+			// inconclusive via dropped.
+			s.dropped++
+			fmt.Fprintf(os.Stderr, "warning: strace stderr scanner aborted: %v\n", err)
+		}
 	}()
 
 	return nil
diff --git a/internal/probe/strace_parse.go b/internal/probe/strace_parse.go
index 986e7d5..5e8fd3e 100644
--- a/internal/probe/strace_parse.go
+++ b/internal/probe/strace_parse.go
@@ -87,13 +87,19 @@ var (
 	)
 
 	// rename("/tmp/evil", "/usr/local/bin/python3") = 0.
+	// `(?:\\.|[^"\\])+` accepts strace's C-style backslash escapes
+	// inside the captured filename. The previous `[^"]+` failed on
+	// strace lines whose filename contained a literal `"` (rendered
+	// as `\"` by strace), causing the whole regex to drop the event
+	// — a malicious package could rename `/tmp/x"y` over a trusted
+	// system binary and leave no rename event for the analyzer.
 	straceRenameRe = regexp.MustCompile(
-		`rename\("([^"]+)",\s*"([^"]+)"\)`,
+		`rename\("((?:\\.|[^"\\])+)",\s*"((?:\\.|[^"\\])+)"\)`,
 	)
 
 	// renameat(AT_FDCWD, "old", AT_FDCWD, "new") or renameat2(...)
 	straceRenameatRe = regexp.MustCompile(
-		`renameat2?\([^,]+,\s*"([^"]+)",\s*[^,]+,\s*"([^"]+)"`,
+		`renameat2?\([^,]+,\s*"((?:\\.|[^"\\])+)",\s*[^,]+,\s*"((?:\\.|[^"\\])+)"`,
 	)
 
 	// mmap(NULL, 4096, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f...
diff --git a/internal/probe/strace_parse_extra_test.go b/internal/probe/strace_parse_extra_test.go
index fe11e70..114decd 100644
--- a/internal/probe/strace_parse_extra_test.go
+++ b/internal/probe/strace_parse_extra_test.go
@@ -869,3 +869,55 @@ func TestParseAuditHook_IntegratedWithParseStraceLine(t *testing.T) {
 		t.Errorf("audit_event = %q, want eval", evt.AuditEvent)
 	}
 }
+
+
+// TestParseStraceLine_RenameWithEscapedQuotes pins the security fix for
+// the prior `"([^"]+)"` capture pattern, which silently dropped the
+// rename event whenever a filename contained a literal `"` (escaped by
+// strace as `\"`). Without an event the analyzer never sees the hijack
+// and the verdict stays clean — a malicious package could rename
+// `/tmp/x"y` over `/usr/local/bin/python3` invisibly.
+func TestParseStraceLine_RenameWithEscapedQuotes(t *testing.T) {
+	cases := []struct {
+		name    string
+		line    string
+		wantSrc string
+		wantDst string
+	}{
+		{
+			name:    "rename with escaped quote in src",
+			line:    `[pid 100] rename("/tmp/x\"y", "/usr/local/bin/python3") = 0`,
+			wantSrc: `/tmp/x\"y`,
+			wantDst: "/usr/local/bin/python3",
+		},
+		{
+			name:    "renameat with escaped quote in dst",
+			line:    `[pid 101] renameat(AT_FDCWD, "/tmp/legit", AT_FDCWD, "/etc/x\"y") = 0`,
+			wantSrc: "/tmp/legit",
+			wantDst: `/etc/x\"y`,
+		},
+		{
+			name:    "plain rename still parses",
+			line:    `[pid 102] rename("/tmp/a", "/usr/local/bin/node") = 0`,
+			wantSrc: "/tmp/a",
+			wantDst: "/usr/local/bin/node",
+		},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			evt, ok := parseStraceLine(tc.line, NewParseState())
+			if !ok {
+				t.Fatalf("expected rename to parse, line=%q", tc.line)
+			}
+			if evt.Syscall != types.EventRename {
+				t.Errorf("syscall = %q, want %q", evt.Syscall, types.EventRename)
+			}
+			if evt.SrcPath != tc.wantSrc {
+				t.Errorf("SrcPath = %q, want %q", evt.SrcPath, tc.wantSrc)
+			}
+			if evt.DstPath != tc.wantDst {
+				t.Errorf("DstPath = %q, want %q", evt.DstPath, tc.wantDst)
+			}
+		})
+	}
+}

From e71d423ca9c05a4efdd4ddf44c71275db59895a9 Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 18:28:27 +0900
Subject: [PATCH 10/11] fix: clear golangci-lint violations from the hardening
 series
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hardening commits (71d6d0a..64788b2) collected nine linter
violations the local go test pass didn't surface. Local CI parity
run with golangci-lint v2.1.6 — the version pinned in .github/
workflows/ci.yml — turned them up; this commit clears them all.

- gocritic regexpSimplify in depfile.go: drop the unnecessary `\/`
  escape inside a non-character-class regex.
- gofmt in strace_parse_extra_test.go and sandbox.go: trailing
  blank line / shQuote comment alignment.
- govet shadow in cmd/root.go and cmd/root_test.go: rename the
  inner err binding so it doesn't shadow an outer one.
- intrange in report.go (×2) and report_test.go (×1): switch
  bounded for-loops to `for i := range N` (Go 1.22+).
- misspell in report_test.go: serialised → serialized.

Re-running `golangci-lint run` reports `0 issues.`. Cross-compile
across all 5 GOOS/GOARCH matrix entries, `go test -race ./...`,
`go mod verify`, and `govulncheck ./...` all pass locally too.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 cmd/root.go                               | 4 ++--
 cmd/root_test.go                          | 4 ++--
 internal/depfile/depfile.go               | 2 +-
 internal/probe/strace_parse_extra_test.go | 1 -
 internal/report/report.go                 | 4 ++--
 internal/report/report_test.go            | 4 ++--
 internal/sandbox/sandbox.go               | 2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index 1842100..a45e40f 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -671,8 +671,8 @@ func runLocalScan(_ []string) error {
 	// sandbox helpers used to interpolate into shell heredocs and Python/
 	// JS string literals. The dockerWriteFile refactor closed the heredoc
 	// path, but defending at the boundary keeps every future use site safe.
-	if err := downloaderValidate(pkg, ""); err != nil {
-		return fmt.Errorf("local package name derived from %q is unsafe: %w", localPath, err)
+	if validateErr := downloaderValidate(pkg, ""); validateErr != nil {
+		return fmt.Errorf("local package name derived from %q is unsafe: %w", localPath, validateErr)
 	}
 
 	// Auto-detect ecosystem from file extension, but only if -e was not explicitly set.
diff --git a/cmd/root_test.go b/cmd/root_test.go
index 678d118..c0fe972 100644
--- a/cmd/root_test.go
+++ b/cmd/root_test.go
@@ -389,8 +389,8 @@ func TestOutputFiles_OwnerOnly(t *testing.T) {
 	}
 
 	npmPath := filepath.Join(dir, "package.json")
-	if err := writePinnedNpm(npmPath, deps); err != nil {
-		t.Fatalf("writePinnedNpm: %v", err)
+	if writeErr := writePinnedNpm(npmPath, deps); writeErr != nil {
+		t.Fatalf("writePinnedNpm: %v", writeErr)
 	}
 	npmInfo, err := os.Stat(npmPath)
 	if err != nil {
diff --git a/internal/depfile/depfile.go b/internal/depfile/depfile.go
index 56cc0d8..17e85cb 100644
--- a/internal/depfile/depfile.go
+++ b/internal/depfile/depfile.go
@@ -20,7 +20,7 @@ import (
 // validation here gives the user a clear error instead of a silently
 // malformed command.
 var (
-	npmNameRe  = regexp.MustCompile(`^(@[a-z0-9][a-z0-9._~-]*\/)?[a-z0-9][a-z0-9._~-]*$`)
+	npmNameRe  = regexp.MustCompile(`^(@[a-z0-9][a-z0-9._~-]*/)?[a-z0-9][a-z0-9._~-]*$`)
 	pypiNameRe = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]*$`)
 )
 
diff --git a/internal/probe/strace_parse_extra_test.go b/internal/probe/strace_parse_extra_test.go
index 114decd..f27c6c0 100644
--- a/internal/probe/strace_parse_extra_test.go
+++ b/internal/probe/strace_parse_extra_test.go
@@ -870,7 +870,6 @@ func TestParseAuditHook_IntegratedWithParseStraceLine(t *testing.T) {
 	}
 }
 
-
 // TestParseStraceLine_RenameWithEscapedQuotes pins the security fix for
 // the prior `"([^"]+)"` capture pattern, which silently dropped the
 // rename event whenever a filename contained a literal `"` (escaped by
diff --git a/internal/report/report.go b/internal/report/report.go
index acb9fc7..02de4cb 100644
--- a/internal/report/report.go
+++ b/internal/report/report.go
@@ -65,7 +65,7 @@ func WriteJSON(r *types.Report, w io.Writer) error {
 // their values never contain control bytes.
 func sanitizeEventStrings(e *types.SyscallEvent) {
 	v := reflect.ValueOf(e).Elem()
-	for i := 0; i < v.NumField(); i++ {
+	for i := range v.NumField() {
 		f := v.Field(i)
 		if f.Kind() == reflect.String && f.CanSet() {
 			f.SetString(sanitizeControl(f.String()))
@@ -101,7 +101,7 @@ func sanitizeControl(s string) string {
 }
 
 func needsControlEscape(s string) bool {
-	for i := 0; i < len(s); i++ {
+	for i := range len(s) {
 		c := s[i]
 		if c < 0x20 || c == 0x7f {
 			return true
diff --git a/internal/report/report_test.go b/internal/report/report_test.go
index 88f73d0..f9e8e71 100644
--- a/internal/report/report_test.go
+++ b/internal/report/report_test.go
@@ -132,7 +132,7 @@ func TestWriteJSON_StripsAnsiFromAttackerFields(t *testing.T) {
 		t.Fatalf("WriteJSON failed: %v", err)
 	}
 
-	// The serialised JSON must not contain a literal ESC byte (0x1b)
+	// The serialized JSON must not contain a literal ESC byte (0x1b)
 	// or BEL/NUL anywhere, even after JSON decoding.
 	var decoded types.Report
 	if err := json.Unmarshal(buf.Bytes(), &decoded); err != nil {
@@ -147,7 +147,7 @@ func TestWriteJSON_StripsAnsiFromAttackerFields(t *testing.T) {
 			{"FilePath", e.FilePath},
 		}
 		for _, f := range fields {
-			for i := 0; i < len(f.val); i++ {
+			for i := range len(f.val) {
 				c := f.val[i]
 				if c < 0x20 || c == 0x7f {
 					t.Errorf("%s contains raw control byte 0x%02x: %q", f.name, c, f.val)
diff --git a/internal/sandbox/sandbox.go b/internal/sandbox/sandbox.go
index f51d4e1..75da4c0 100644
--- a/internal/sandbox/sandbox.go
+++ b/internal/sandbox/sandbox.go
@@ -729,7 +729,7 @@ func npmLifecycleScript(pkgs []string) string {
 }
 
 // shQuote wraps s in POSIX single quotes, escaping any embedded single
-// quote with the standard '\'' close-escape-reopen idiom. Safe for use
+// quote with the standard '\” close-escape-reopen idiom. Safe for use
 // in /bin/sh, dash, and bash.
 func shQuote(s string) string {
 	return "'" + strings.ReplaceAll(s, "'", `'\''`) + "'"

From 5e3cca3361283353ef33a7e76f4d63f0d841b178 Mon Sep 17 00:00:00 2001
From: "Ralian.ENG" <ralianengineering@gmail.com>
Date: Fri, 8 May 2026 18:37:05 +0900
Subject: [PATCH 11/11] fix(cmd): split file-mode tests behind !windows build
 tag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI on windows-latest failed to compile cmd/root_test.go because
syscall.Umask is undefined on Windows. The umask call exists in
TestOpenOutput_File and TestOutputFiles_OwnerOnly so the asserted
mode is the requested 0o600 rather than the user's umask-clipped
result — Windows has no umask and Mode().Perm() doesn't reflect
Unix-style bits there, so the assertion is meaningless on Windows
even if it built.

Move the perm-check tests to a new cmd/root_unix_test.go behind
`//go:build !windows`. TestOpenOutput_File stays in root_test.go
in its original cross-platform shape (verifies the file is created
and is not os.Stdout); the mode assertion now lives in the new
TestOpenOutput_FileMode in the unix file. TestOutputFiles_OwnerOnly
moves wholesale.

Verified locally:
- GOOS=windows GOARCH=amd64 go test -c ./cmd/ now compiles
- GOOS=linux   GOARCH=amd64 go test -c ./cmd/ still compiles
- go test -race ./cmd/ green at 72.6% coverage
- golangci-lint v2.1.6 reports 0 issues

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 cmd/root_test.go      | 55 ++++-------------------------
 cmd/root_unix_test.go | 80 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 49 deletions(-)
 create mode 100644 cmd/root_unix_test.go

diff --git a/cmd/root_test.go b/cmd/root_test.go
index c0fe972..ba56d7f 100644
--- a/cmd/root_test.go
+++ b/cmd/root_test.go
@@ -8,7 +8,6 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"syscall"
 	"testing"
 
 	"github.com/RalianENG/kojuto/internal/types"
@@ -350,54 +349,12 @@ func TestOpenOutput_File(t *testing.T) {
 		t.Error("expected a file, not os.Stdout")
 	}
 
-	// Verify the file was actually created.
-	info, err := os.Stat(flagOutput)
-	if err != nil {
-		t.Fatalf("output file was not created: %v", err)
-	}
-	// Owner-only mode: gosec G306. Reports embed attacker-supplied
-	// snippets / paths that shouldn't leak to other users on a shared
-	// host. Test under a known umask so we measure the requested mode
-	// rather than the system umask's clipping.
-	oldUmask := syscall.Umask(0)
-	defer syscall.Umask(oldUmask)
-	if mode := info.Mode().Perm(); mode != 0o600 {
-		t.Errorf("output file mode = %o, want 0o600", mode)
-	}
-}
-
-// TestOutputFiles_OwnerOnly pins the 0o600 contract for both pinned-
-// manifest writers as well — they share the same threat model as the
-// main report (multi-user host, package metadata leakage).
-func TestOutputFiles_OwnerOnly(t *testing.T) {
-	oldUmask := syscall.Umask(0)
-	defer syscall.Umask(oldUmask)
-
-	dir := t.TempDir()
-	deps := []pinnedDep{{Name: "lodash", Version: "4.17.21"}}
-
-	pyPath := filepath.Join(dir, "requirements.txt")
-	if err := writePinnedPyPI(pyPath, deps); err != nil {
-		t.Fatalf("writePinnedPyPI: %v", err)
-	}
-	pyInfo, err := os.Stat(pyPath)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if mode := pyInfo.Mode().Perm(); mode != 0o600 {
-		t.Errorf("pinned requirements.txt mode = %o, want 0o600", mode)
-	}
-
-	npmPath := filepath.Join(dir, "package.json")
-	if writeErr := writePinnedNpm(npmPath, deps); writeErr != nil {
-		t.Fatalf("writePinnedNpm: %v", writeErr)
-	}
-	npmInfo, err := os.Stat(npmPath)
-	if err != nil {
-		t.Fatal(err)
-	}
-	if mode := npmInfo.Mode().Perm(); mode != 0o600 {
-		t.Errorf("pinned package.json mode = %o, want 0o600", mode)
+	// Verify the file was actually created. The owner-only (0o600)
+	// permission contract is exercised by TestOpenOutput_FileMode and
+	// TestOutputFiles_OwnerOnly in root_unix_test.go — those tests
+	// rely on syscall.Umask, which is not defined on Windows.
+	if _, err := os.Stat(flagOutput); err != nil {
+		t.Errorf("output file was not created: %v", err)
 	}
 }
 
diff --git a/cmd/root_unix_test.go b/cmd/root_unix_test.go
new file mode 100644
index 0000000..14f0d15
--- /dev/null
+++ b/cmd/root_unix_test.go
@@ -0,0 +1,80 @@
+//go:build !windows
+
+package cmd
+
+import (
+	"os"
+	"path/filepath"
+	"syscall"
+	"testing"
+)
+
+// TestOpenOutput_FileMode pins the 0o600 contract for the main scan
+// report file. Reports embed attacker-supplied code snippets, file
+// paths, and the dependency tree of the scanned package; on a multi-
+// user host that data must not be readable by other accounts. The
+// test runs under syscall.Umask(0) so the assertion measures the
+// requested mode rather than the user's umask-clipped result.
+//
+// Windows has no umask and no Unix-style permission bits, and Go's
+// os.File.Mode().Perm() reports a synthetic 0o666/0o444 there, so
+// this test lives in a !windows build-tagged file.
+func TestOpenOutput_FileMode(t *testing.T) {
+	oldUmask := syscall.Umask(0)
+	defer syscall.Umask(oldUmask)
+
+	original := flagOutput
+	defer func() { flagOutput = original }()
+
+	dir := t.TempDir()
+	flagOutput = filepath.Join(dir, "test-output.json")
+
+	f, err := openOutput()
+	if err != nil {
+		t.Fatalf("openOutput() error: %v", err)
+	}
+	defer f.Close()
+
+	info, err := os.Stat(flagOutput)
+	if err != nil {
+		t.Fatalf("output file was not created: %v", err)
+	}
+	if mode := info.Mode().Perm(); mode != 0o600 {
+		t.Errorf("output file mode = %o, want 0o600", mode)
+	}
+}
+
+// TestOutputFiles_OwnerOnly pins the 0o600 contract for both pinned-
+// manifest writers — they share the same threat model as the main
+// report (multi-user host, package metadata leakage).
+func TestOutputFiles_OwnerOnly(t *testing.T) {
+	oldUmask := syscall.Umask(0)
+	defer syscall.Umask(oldUmask)
+
+	dir := t.TempDir()
+	deps := []pinnedDep{{Name: "lodash", Version: "4.17.21"}}
+
+	pyPath := filepath.Join(dir, "requirements.txt")
+	if err := writePinnedPyPI(pyPath, deps); err != nil {
+		t.Fatalf("writePinnedPyPI: %v", err)
+	}
+	pyInfo, err := os.Stat(pyPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mode := pyInfo.Mode().Perm(); mode != 0o600 {
+		t.Errorf("pinned requirements.txt mode = %o, want 0o600", mode)
+	}
+
+	npmPath := filepath.Join(dir, "package.json")
+	if writeErr := writePinnedNpm(npmPath, deps); writeErr != nil {
+		t.Fatalf("writePinnedNpm: %v", writeErr)
+	}
+	npmInfo, err := os.Stat(npmPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if mode := npmInfo.Mode().Perm(); mode != 0o600 {
+		t.Errorf("pinned package.json mode = %o, want 0o600", mode)
+	}
+}