google · andrew · Mar 15, 2026 · Mar 15, 2026 · Mar 15, 2026 · May 2, 2026
diff --git a/.golangci.yml b/.golangci.yml
@@ -0,0 +1,17 @@
+version: "2"
+
+linters:
+  enable:
+    - gocritic
+    - gocognit
+    - gocyclo
+    - maintidx
+    - dupl
+    - mnd
+    - unparam
+    - ireturn
+    - goconst
+    - errcheck
+  settings:
+    goconst:
+      ignore-tests: true
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ The `licensecheck` package scans source texts for known licenses.
 The design aims never to give a false positive.
 It also reports matches of known license URLs.
 
-See the [package documentation](https://pkg.go.dev/github.com/google/licensecheck)
+See the [package documentation](https://pkg.go.dev/github.com/git-pkgs/licensecheck)
 for API details.
 
 The license scanner recognizes nearly all the licenses gathered by the SPDX project,

diff --git a/builtin.dfa b/builtin.dfa
diff --git a/builtin.dfa.triv b/builtin.dfa.triv
diff --git a/data.gen.go b/data.gen.go
diff --git a/gen_data.go b/gen_data.go
@@ -2,9 +2,9 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build ignore
+//go:build ignore
 
-// This file generates data.gen.go.
+// This file generates data.gen.go and builtin.dfa.
 // It embeds the text of all the licenses in the subdirectory "licenses"
 // and constructs the data structures to represent them.
 // Run by a "go:generate" comment in license.go.
@@ -16,14 +16,15 @@ import (
 	"flag"
 	"fmt"
 	"go/format"
-	"io/ioutil"
 	"log"
+	"os"
 	"path/filepath"
 	"sort"
 	"strings"
 	"text/template"
 
-	"github.com/google/licensecheck"
+	"github.com/git-pkgs/licensecheck"
+	"github.com/git-pkgs/licensecheck/internal/match"
 )
 
 var outFile = flag.String("o", "data.gen.go", "`file` to write")
@@ -59,7 +60,7 @@ func main() {
 
 	src, err := format.Source([]byte(code))
 	if err != nil {
-		fd, err1 := ioutil.TempFile("", "license-data")
+		fd, err1 := os.CreateTemp("", "license-data")
 		if err1 == nil {
 			_, err1 = fd.Write([]byte(code))
 			if err1 == nil {
@@ -69,10 +70,32 @@ func main() {
 		}
 		log.Fatal("parsing output:", err)
 	}
-	err = ioutil.WriteFile(*outFile, src, 0644)
+	err = os.WriteFile(*outFile, src, 0644)
 	if err != nil {
 		log.Fatal(err)
 	}
+
+	// Build and write the precomputed DFA.
+	d := new(match.Dict)
+	d.Insert("copyright")
+	d.Insert("http")
+	var lres []*match.LRE
+	for _, file := range builtLRE {
+		re, err := match.ParseLRE(d, file.Name, string(file.Data))
+		if err != nil {
+			log.Fatalf("parsing LRE %s for DFA: %v", file.Name, err)
+		}
+		lres = append(lres, re)
+	}
+	multi, err := match.NewMultiLRE(lres)
+	if err != nil {
+		log.Fatal("building MultiLRE:", err)
+	}
+	dfaData := match.MarshalMultiLRE(multi)
+	if err := os.WriteFile("builtin.dfa", dfaData, 0644); err != nil {
+		log.Fatal("writing builtin.dfa:", err)
+	}
+	log.Printf("builtin.dfa: %d bytes", len(dfaData))
 }
 
 // varName returns the basename of the file, sanitized for use as a variable name,

diff --git a/go.mod b/go.mod
@@ -1,3 +1,3 @@
-module github.com/google/licensecheck
+module github.com/git-pkgs/licensecheck
 
-go 1.12
+go 1.25
diff --git a/internal/match/dict.go b/internal/match/dict.go
@@ -383,11 +383,11 @@ func markdownAnchorSize(t string) int {
 		return 0
 	}
 	i := 2
-	for ; i < len(t); i++ {
+	for ; i < len(t) && i < 256; i++ {
 		switch t[i] {
 		case '}':
 			return i + 1
-		case ' ', '\r', '\n':
+		case ' ', '\r', '\n', '{':
 			return 0
 		}
 	}
@@ -419,9 +419,9 @@ func markdownLinkSize(t string) int {
 		return 0
 	}
 
-	for i := 2; i < len(t); i++ {
+	for i := 2; i < len(t) && i < 2048; i++ {
 		c := t[i]
-		if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
+		if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ']' {
 			return 0
 		}
 		if c == ')' {

diff --git a/internal/match/dict_test.go b/internal/match/dict_test.go
@@ -5,11 +5,13 @@
 package match
 
 import (
-	"io/ioutil"
+	"os"
 	"path/filepath"
 	"reflect"
 	"regexp"
+	"strings"
 	"testing"
+	"time"
 )
 
 func TestDict(t *testing.T) {
@@ -18,7 +20,7 @@ func TestDict(t *testing.T) {
 	indexes := []WordID{0, 1, 2, 1, 3, 0}
 
 	var d Dict
-	for j := 0; j < 2; j++ {
+	for range 2 {
 		for i, w := range words {
 			id := d.Insert(w)
 			if id != indexes[i] {
@@ -100,6 +102,8 @@ var markdownAnchorSizeTests = []struct {
 	{"{#abc def}", 0},
 	{"{#abc\ndef}", 0},
 	{"{#abc\rdef}", 0},
+	{"{#abc{#def}", 0},
+	{"{#" + strings.Repeat("a", 300) + "}", 0},
 }
 
 func TestMarkdownAnchorSize(t *testing.T) {
@@ -111,6 +115,25 @@ func TestMarkdownAnchorSize(t *testing.T) {
 	}
 }
 
+var markdownLinkSizeTests = []struct {
+	in  string
+	out int
+}{
+	{"](http://abc)", 13},
+	{"](#abc)", 7},
+	{"](http://abc](http://def)", 0},
+	{"](http://" + strings.Repeat("a", 3000) + ")", 0},
+}
+
+func TestMarkdownLinkSize(t *testing.T) {
+	for _, tt := range markdownLinkSizeTests {
+		out := markdownLinkSize(tt.in)
+		if out != tt.out {
+			t.Errorf("markdownLinkSize(%q) = %d want %d", tt.in, out, tt.out)
+		}
+	}
+}
+
 var insertSplitTests = []struct {
 	in  string
 	out string
@@ -245,6 +268,24 @@ func rot13(s string) string {
 	return string(b)
 }
 
+func TestSplitMarkdownQuadratic(t *testing.T) {
+	// Repeated trigger sequences without closers used to make
+	// markdownAnchorSize and markdownLinkSize scan to the end of the
+	// input on every call, giving O(n^2) total work.
+	inputs := []string{
+		strings.Repeat("{#x", 100000),
+		strings.Repeat("](http://x", 50000),
+	}
+	var d Dict
+	for _, in := range inputs {
+		start := time.Now()
+		d.Split(in)
+		if elapsed := time.Since(start); elapsed > time.Second {
+			t.Errorf("Split(%d bytes %q...) took %v, want < 1s", len(in), in[:10], elapsed)
+		}
+	}
+}
+
 var bench struct {
 	data []byte
 	str  string
@@ -260,7 +301,7 @@ func benchSetup(b *testing.B) {
 		b.Fatal(err)
 	}
 	for _, file := range files {
-		data, err := ioutil.ReadFile(file)
+		data, err := os.ReadFile(file)
 		if err != nil {
 			b.Fatal(err)
 		}

diff --git a/internal/match/regexp_test.go b/internal/match/regexp_test.go
@@ -27,7 +27,7 @@ func TestMultiLREMatch(t *testing.T) {
 	for id, tt := range multiMatchTests {
 		t.Run(fmt.Sprint(id), func(t *testing.T) {
 			var list []*LRE
-			for _, expr := range strings.Split(tt.re, "/") {
+			for expr := range strings.SplitSeq(tt.re, "/") {
 				re, err := ParseLRE(&d, "x", expr)
 				if err != nil {
 					t.Fatalf("Parse(%q): %v", expr, err)

diff --git a/internal/match/rematch.go b/internal/match/rematch.go
@@ -200,6 +200,7 @@ package match
 import (
 	"encoding/binary"
 	"fmt"
+	"slices"
 	"sort"
 	"strings"
 )
@@ -436,12 +437,7 @@ func (c *reCompile) mergeCut(cut1, cut2 []reCut) []reCut {
 func canMatchEmpty(re *reSyntax) bool {
 	switch re.op {
 	case opAlternate:
-		for _, sub := range re.sub {
-			if canMatchEmpty(sub) {
-				return true
-			}
-		}
-		return false
+		return slices.ContainsFunc(re.sub, canMatchEmpty)
 
 	case opConcat:
 		for _, sub := range re.sub {
@@ -512,10 +508,8 @@ func (s *nfaState) add(prog reProg, pc int32) {
 	// where we are in the list. If this ever showed up as expensive
 	// on a profile, we could switch to a sparse set instead;
 	// see https://research.swtch.com/sparse.
-	for _, old := range *s {
-		if old == pc {
-			return
-		}
+	if slices.Contains(*s, pc) {
+		return
 	}
 
 	*s = append(*s, pc)
@@ -660,18 +654,17 @@ func (s nfaState) appendEncoding(enc []byte) []byte {
 //
 // The encoding of this state information is:
 //
-//	-  a one-word header M | N<<1, where M is 0 for a non-match, 1 for a match,
-//	   and N is the number of words in the table.
-//	   This header is conveniently also the number of words that follow in the encoding.
-//
-//	- if M == 1, a one-word value V that is the match value to report,
-//	  identifying which of a set of regexps has been matched.
+//   - a one-word header M | N<<1, where M is 0 for a non-match, 1 for a match,
+//     and N is the number of words in the table.
+//     This header is conveniently also the number of words that follow in the encoding.
 //
-//	- N two-word pairs W:NEXT indicating that if word W is seen, the DFA should
-//	  move to the state at offset NEXT. The pairs are sorted by W. An entry for W == AnyWord
-//	  is treated as matching any input word; an exact match later in the list takes priority.
-//	  The list is sorted by W, so AnyWord is always first if present.
+//   - if M == 1, a one-word value V that is the match value to report,
+//     identifying which of a set of regexps has been matched.
 //
+//   - N two-word pairs W:NEXT indicating that if word W is seen, the DFA should
+//     move to the state at offset NEXT. The pairs are sorted by W. An entry for W == AnyWord
+//     is treated as matching any input word; an exact match later in the list takes priority.
+//     The list is sorted by W, so AnyWord is always first if present.
 type reDFA []int32
 
 // A dfaBuilder holds state for building a DFA from a reProg.
@@ -784,7 +777,6 @@ func (dfa reDFA) string(d *Dict) string {
 //			off = dnext
 //		}
 //	}
-//
 func (dfa reDFA) stateAt(off int32) (match int32, delta []int32) {
 	hdr := dfa[off]
 	off++
@@ -908,10 +900,7 @@ Words:
 			// the last time we saw a matching state),
 			// print information about it.
 			if TraceDFA > 0 && i-end >= TraceDFA {
-				start := i - 10
-				if start < 0 {
-					start = 0
-				}
+				start := max(i-10, 0)
 				print("DFA mismatch at «",
 					text[words[start].Lo:words[i].Lo], "|",
 					text[words[i].Lo:words[i].Hi], "»\n")
@@ -932,25 +921,18 @@ Words:
 		end = len(words)
 	}
 	if i := len(words); TraceDFA > 0 && i-end >= TraceDFA {
-		start := i - 10
-		if start < 0 {
-			start = 0
-		}
+		start := max(i-10, 0)
 		println("DFA ran out of input at «", text[words[start].Lo:], "|", "EOF", "»\n")
 	}
 	return match, end
 }
 
 func sortInt32s(x []int32) {
-	sort.Slice(x, func(i, j int) bool {
-		return x[i] < x[j]
-	})
+	slices.Sort(x)
 }
 
 func sortWordIDs(x []WordID) {
-	sort.Slice(x, func(i, j int) bool {
-		return x[i] < x[j]
-	})
+	slices.Sort(x)
 }
 
 // canMisspell reports whether want can be misspelled as have.

diff --git a/internal/match/rematch_test.go b/internal/match/rematch_test.go
@@ -337,10 +337,10 @@ The name __10__ may not be used.
 
 func TestCompile(t *testing.T) {
 	var d Dict
-	for _, tt := range strings.Split(compileTests, "\n\n") {
+	for tt := range strings.SplitSeq(compileTests, "\n\n") {
 		tt = strings.TrimSpace(tt) + "\n"
-		i := strings.Index(tt, "\n")
-		in, want := tt[:i], tt[i+1:]
+		before, after, _ := strings.Cut(tt, "\n")
+		in, want := before, after
 
 		prog := testProg(t, &d, in)
 		if prog == nil {
@@ -356,7 +356,7 @@ func TestCompile(t *testing.T) {
 func testProg(t *testing.T, dict *Dict, expr string) reProg {
 	if strings.Contains(expr, "/") {
 		var list []reProg
-		for _, str := range strings.Split(expr, "/") {
+		for str := range strings.SplitSeq(expr, "/") {
 			re, err := reParse(dict, str, false)
 			if err != nil {
 				t.Errorf("Parse(%q): %v", expr, err)
@@ -567,10 +567,10 @@ The name __10__ may not be used
 
 func TestCompileDFA(t *testing.T) {
 	var d Dict
-	for _, tt := range strings.Split(compileDFATests, "\n\n") {
+	for tt := range strings.SplitSeq(compileDFATests, "\n\n") {
 		tt = strings.TrimSpace(tt) + "\n"
-		i := strings.Index(tt, "\n")
-		in, want := tt[:i], tt[i+1:]
+		before, after, _ := strings.Cut(tt, "\n")
+		in, want := before, after
 
 		prog := testProg(t, &d, in)
 		if prog == nil {