diff --git a/cmd/brief/main.go b/cmd/brief/main.go index 60b1924..0088f82 100644 --- a/cmd/brief/main.go +++ b/cmd/brief/main.go @@ -69,6 +69,7 @@ func cmdScan(args []string) { dir := fs.String("dir", "", "Directory to clone remote source into") scanDepth := fs.Int("scan-depth", 0, "Max directory depth for language detection (default 4)") skip := fs.String("skip", "", "Additional directories to skip, comma-separated") + tracked := fs.Bool("tracked", false, "Only consider files tracked by git") version := fs.Bool("version", false, "Print version and exit") _ = fs.Parse(args) @@ -93,12 +94,12 @@ func cmdScan(args []string) { os.Exit(1) } - code := runScan(src.Dir, *scanDepth, *skip, *category, *jsonFlag, *humanFlag, *markdownFlag, *verbose) + code := runScan(src.Dir, *scanDepth, *skip, *category, *tracked, *jsonFlag, *humanFlag, *markdownFlag, *verbose) src.Cleanup() os.Exit(code) } -func runScan(dir string, scanDepth int, skip, category string, jsonFlag, humanFlag, markdownFlag, verbose bool) int { +func runScan(dir string, scanDepth int, skip, category string, tracked, jsonFlag, humanFlag, markdownFlag, verbose bool) int { knowledgeBase, err := kb.Load(brief.KnowledgeFS) if err != nil { _, _ = fmt.Fprintf(os.Stderr, "error loading knowledge base: %v\n", err) @@ -107,6 +108,7 @@ func runScan(dir string, scanDepth int, skip, category string, jsonFlag, humanFl engine := detect.New(knowledgeBase, dir) engine.ScanDepth = scanDepth + engine.TrackedOnly = tracked if skip != "" { engine.SkipDirs = strings.Split(skip, ",") } diff --git a/detect/detect.go b/detect/detect.go index 3bdd20e..9f56639 100644 --- a/detect/detect.go +++ b/detect/detect.go @@ -42,6 +42,7 @@ type Engine struct { Root string ScanDepth int // max directory depth for recursive detection (0 = default 4) SkipDirs []string // additional directories to skip during walks + TrackedOnly bool // only consider files tracked by git filesChecked int toolsChecked int toolsMatched int @@ -49,7 +50,9 @@ type Engine struct { detectedEcosystems map[string]bool // ecosystems whose language was detected // Lazily populated caches - fileExts map[string]int // cached file extension counts in the project + tracked map[string]bool // git-tracked files relative to Root, nil when TrackedOnly is off + trackedDirs map[string]bool // directories that contain at least one tracked file + fileExts map[string]int // cached file extension counts in the project dirCache map[string][]string depsLoaded bool runtimeDeps map[string]bool // all runtime/unscoped dependency names @@ -111,6 +114,43 @@ var skipDirs = map[string]bool{ "coverage": true, } +// loadTracked populates the set of git-tracked files under Root by running +// git ls-files once. Paths are stored relative to Root using the OS separator. +func (e *Engine) loadTracked(abs string) error { + out, err := e.git(abs, "ls-files", "-z") + if err != nil { + return fmt.Errorf("-tracked: %s is not a git repository (or git is not installed)", abs) + } + e.tracked = make(map[string]bool) + e.trackedDirs = make(map[string]bool) + for p := range strings.SplitSeq(string(out), "\x00") { + if p == "" { + continue + } + p = filepath.FromSlash(p) + e.tracked[p] = true + for d := filepath.Dir(p); d != "."; d = filepath.Dir(d) { + if e.trackedDirs[d] { + break + } + e.trackedDirs[d] = true + } + } + return nil +} + +// isTracked reports whether a path relative to Root should be considered. +// Always true when TrackedOnly is off. The root itself is always allowed. +func (e *Engine) isTracked(rel string) bool { + if e.tracked == nil { + return true + } + if rel == "" || rel == "." { + return true + } + return e.tracked[rel] || e.trackedDirs[rel] +} + // shouldSkipDir returns true if a directory should be skipped during walks. func (e *Engine) shouldSkipDir(name string) bool { if strings.HasPrefix(name, ".") { @@ -149,6 +189,12 @@ func (e *Engine) Run() (*brief.Report, error) { return nil, fmt.Errorf("path is not a directory: %s", abs) } + if e.TrackedOnly { + if err := e.loadTracked(abs); err != nil { + return nil, err + } + } + report := &brief.Report{ Version: brief.Version, Path: abs, @@ -425,7 +471,7 @@ func (e *Engine) exists(pattern string) bool { return e.globMatches(dir, true) } info, err := os.Stat(filepath.Join(e.Root, dir)) - return err == nil && info.IsDir() + return err == nil && info.IsDir() && e.isTracked(filepath.FromSlash(dir)) } // Handle recursive glob patterns like "**/*.py" @@ -438,7 +484,7 @@ func (e *Engine) exists(pattern string) bool { } _, err := os.Stat(filepath.Join(e.Root, pattern)) - return err == nil + return err == nil && e.isTracked(filepath.FromSlash(pattern)) } // globMatches reports whether a root-level glob pattern matches at least one @@ -450,7 +496,14 @@ func (e *Engine) globMatches(pattern string, wantDir bool) bool { } for _, m := range matches { info, err := os.Stat(m) - if err == nil && info.IsDir() == wantDir { + if err != nil || info.IsDir() != wantDir { + continue + } + rel, err := filepath.Rel(e.Root, m) + if err != nil { + continue + } + if e.isTracked(rel) { return true } } @@ -482,16 +535,23 @@ func (e *Engine) recursiveGlob(pattern string) bool { if err != nil { return nil } + rel, _ := filepath.Rel(e.Root, path) if d.IsDir() { name := d.Name() if name != "." && e.shouldSkipDir(name) { return filepath.SkipDir } + if !e.isTracked(rel) { + return filepath.SkipDir + } return nil } if d.Type()&os.ModeSymlink != 0 { return nil } + if !e.isTracked(rel) { + return nil + } matched, _ := filepath.Match(suffix, d.Name()) if matched { found = true @@ -530,11 +590,17 @@ func (e *Engine) loadFileExts() { if depth > maxDepth { return filepath.SkipDir } + if !e.isTracked(strings.TrimPrefix(rel, string(filepath.Separator))) { + return filepath.SkipDir + } return nil } if d.Type()&os.ModeSymlink != 0 { return nil } + if !e.isTracked(strings.TrimPrefix(path[rootLen:], string(filepath.Separator))) { + return nil + } ext := filepath.Ext(d.Name()) if ext != "" { e.fileExts[ext]++ diff --git a/detect/detect_test.go b/detect/detect_test.go index cc2ee0a..c35621b 100644 --- a/detect/detect_test.go +++ b/detect/detect_test.go @@ -2,6 +2,7 @@ package detect import ( "os" + "os/exec" "path/filepath" "testing" @@ -813,6 +814,111 @@ func TestDirectoryGlobPattern(t *testing.T) { } } +func TestTrackedOnly(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not installed") + } + + dir := t.TempDir() + gitCmd := func(args ...string) { + cmd := exec.Command("git", args...) + cmd.Dir = dir + cmd.Env = append(os.Environ(), + "GIT_AUTHOR_NAME=t", "GIT_AUTHOR_EMAIL=t@t", + "GIT_COMMITTER_NAME=t", "GIT_COMMITTER_EMAIL=t@t", + ) + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("git %v: %v\n%s", args, err, out) + } + } + write := func(name, body string) { + p := filepath.Join(dir, name) + if err := os.MkdirAll(filepath.Dir(p), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(p, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + } + + gitCmd("init", "-q") + write("main.go", "package main\n") + write("go.mod", "module example.com/m\n\ngo 1.22\n") + gitCmd("add", "main.go", "go.mod") + gitCmd("commit", "-q", "-m", "init") + + // Untracked noise that would normally trigger npm/JS detection. + write("package.json", `{"name":"x"}`) + write("package-lock.json", `{"name":"x","lockfileVersion":3}`) + write("htmlcov/index.html", "") + write("htmlcov/app.js", "var x = 1;") + + engine := New(loadKB(t), dir) + engine.TrackedOnly = true + r, err := engine.Run() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + for _, lang := range r.Languages { + if lang.Name == "JavaScript" { + t.Errorf("untracked .js files should not trigger JavaScript detection, got languages: %v", langNames(r)) + } + } + for _, pm := range r.PackageManagers { + if pm.Name == "npm" { + t.Errorf("untracked package.json should not trigger npm detection, got package managers: %v", pmNames(r)) + } + } + if len(r.Languages) == 0 || r.Languages[0].Name != "Go" { + t.Errorf("expected Go as primary language, got %v", langNames(r)) + } + + // Without TrackedOnly the untracked files should be picked up. + engine = New(loadKB(t), dir) + r, err = engine.Run() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + sawNpm := false + for _, pm := range r.PackageManagers { + if pm.Name == "npm" { + sawNpm = true + } + } + if !sawNpm { + t.Errorf("expected npm to be detected without -tracked, got %v", pmNames(r)) + } +} + +func TestTrackedOnlyNotARepo(t *testing.T) { + if _, err := exec.LookPath("git"); err != nil { + t.Skip("git not installed") + } + dir := t.TempDir() + engine := New(loadKB(t), dir) + engine.TrackedOnly = true + if _, err := engine.Run(); err == nil { + t.Error("expected error when -tracked is used outside a git repository") + } +} + +func langNames(r *brief.Report) []string { + names := make([]string, len(r.Languages)) + for i, l := range r.Languages { + names[i] = l.Name + } + return names +} + +func pmNames(r *brief.Report) []string { + names := make([]string, len(r.PackageManagers)) + for i, p := range r.PackageManagers { + names[i] = p.Name + } + return names +} + func assertToolDetected(t *testing.T, r *brief.Report, category, name string) { t.Helper() tools, ok := r.Tools[category]