Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
version: "2"

linters:
enable:
- gocritic
- gocognit
- gocyclo
- maintidx
- dupl
- mnd
- unparam
- ireturn
- goconst
- errcheck
settings:
goconst:
ignore-tests: true
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The `licensecheck` package scans source texts for known licenses.
The design aims never to give a false positive.
It also reports matches of known license URLs.

See the [package documentation](https://pkg.go.dev/github.com/google/licensecheck)
See the [package documentation](https://pkg.go.dev/github.com/git-pkgs/licensecheck)
for API details.

The license scanner recognizes nearly all the licenses gathered by the SPDX project,
Expand Down
Binary file added builtin.dfa
Binary file not shown.
Binary file added builtin.dfa.triv
Binary file not shown.
2 changes: 1 addition & 1 deletion data.gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 29 additions & 6 deletions gen_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore
//go:build ignore

// This file generates data.gen.go.
// This file generates data.gen.go and builtin.dfa.
// It embeds the text of all the licenses in the subdirectory "licenses"
// and constructs the data structures to represent them.
// Run by a "go:generate" comment in license.go.
Expand All @@ -16,14 +16,15 @@ import (
"flag"
"fmt"
"go/format"
"io/ioutil"
"log"
"os"
"path/filepath"
"sort"
"strings"
"text/template"

"github.com/google/licensecheck"
"github.com/git-pkgs/licensecheck"
"github.com/git-pkgs/licensecheck/internal/match"
)

var outFile = flag.String("o", "data.gen.go", "`file` to write")
Expand Down Expand Up @@ -59,7 +60,7 @@ func main() {

src, err := format.Source([]byte(code))
if err != nil {
fd, err1 := ioutil.TempFile("", "license-data")
fd, err1 := os.CreateTemp("", "license-data")
if err1 == nil {
_, err1 = fd.Write([]byte(code))
if err1 == nil {
Expand All @@ -69,10 +70,32 @@ func main() {
}
log.Fatal("parsing output:", err)
}
err = ioutil.WriteFile(*outFile, src, 0644)
err = os.WriteFile(*outFile, src, 0644)
if err != nil {
log.Fatal(err)
}

// Build and write the precomputed DFA.
d := new(match.Dict)
d.Insert("copyright")
d.Insert("http")
var lres []*match.LRE
for _, file := range builtLRE {
re, err := match.ParseLRE(d, file.Name, string(file.Data))
if err != nil {
log.Fatalf("parsing LRE %s for DFA: %v", file.Name, err)
}
lres = append(lres, re)
}
multi, err := match.NewMultiLRE(lres)
if err != nil {
log.Fatal("building MultiLRE:", err)
}
dfaData := match.MarshalMultiLRE(multi)
if err := os.WriteFile("builtin.dfa", dfaData, 0644); err != nil {
log.Fatal("writing builtin.dfa:", err)
}
log.Printf("builtin.dfa: %d bytes", len(dfaData))
}

// varName returns the basename of the file, sanitized for use as a variable name,
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module github.com/google/licensecheck
module github.com/git-pkgs/licensecheck

go 1.12
go 1.25
8 changes: 4 additions & 4 deletions internal/match/dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,11 +383,11 @@ func markdownAnchorSize(t string) int {
return 0
}
i := 2
for ; i < len(t); i++ {
for ; i < len(t) && i < 256; i++ {
switch t[i] {
case '}':
return i + 1
case ' ', '\r', '\n':
case ' ', '\r', '\n', '{':
return 0
}
}
Expand Down Expand Up @@ -419,9 +419,9 @@ func markdownLinkSize(t string) int {
return 0
}

for i := 2; i < len(t); i++ {
for i := 2; i < len(t) && i < 2048; i++ {
c := t[i]
if c == ' ' || c == '\t' || c == '\r' || c == '\n' {
if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ']' {
return 0
}
if c == ')' {
Expand Down
47 changes: 44 additions & 3 deletions internal/match/dict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
package match

import (
"io/ioutil"
"os"
"path/filepath"
"reflect"
"regexp"
"strings"
"testing"
"time"
)

func TestDict(t *testing.T) {
Expand All @@ -18,7 +20,7 @@ func TestDict(t *testing.T) {
indexes := []WordID{0, 1, 2, 1, 3, 0}

var d Dict
for j := 0; j < 2; j++ {
for range 2 {
for i, w := range words {
id := d.Insert(w)
if id != indexes[i] {
Expand Down Expand Up @@ -100,6 +102,8 @@ var markdownAnchorSizeTests = []struct {
{"{#abc def}", 0},
{"{#abc\ndef}", 0},
{"{#abc\rdef}", 0},
{"{#abc{#def}", 0},
{"{#" + strings.Repeat("a", 300) + "}", 0},
}

func TestMarkdownAnchorSize(t *testing.T) {
Expand All @@ -111,6 +115,25 @@ func TestMarkdownAnchorSize(t *testing.T) {
}
}

var markdownLinkSizeTests = []struct {
in string
out int
}{
{"](http://abc)", 13},
{"](#abc)", 7},
{"](http://abc](http://def)", 0},
{"](http://" + strings.Repeat("a", 3000) + ")", 0},
}

func TestMarkdownLinkSize(t *testing.T) {
for _, tt := range markdownLinkSizeTests {
out := markdownLinkSize(tt.in)
if out != tt.out {
t.Errorf("markdownLinkSize(%q) = %d want %d", tt.in, out, tt.out)
}
}
}

var insertSplitTests = []struct {
in string
out string
Expand Down Expand Up @@ -245,6 +268,24 @@ func rot13(s string) string {
return string(b)
}

func TestSplitMarkdownQuadratic(t *testing.T) {
// Repeated trigger sequences without closers used to make
// markdownAnchorSize and markdownLinkSize scan to the end of the
// input on every call, giving O(n^2) total work.
inputs := []string{
strings.Repeat("{#x", 100000),
strings.Repeat("](http://x", 50000),
}
var d Dict
for _, in := range inputs {
start := time.Now()
d.Split(in)
if elapsed := time.Since(start); elapsed > time.Second {
t.Errorf("Split(%d bytes %q...) took %v, want < 1s", len(in), in[:10], elapsed)
}
}
}

var bench struct {
data []byte
str string
Expand All @@ -260,7 +301,7 @@ func benchSetup(b *testing.B) {
b.Fatal(err)
}
for _, file := range files {
data, err := ioutil.ReadFile(file)
data, err := os.ReadFile(file)
if err != nil {
b.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion internal/match/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestMultiLREMatch(t *testing.T) {
for id, tt := range multiMatchTests {
t.Run(fmt.Sprint(id), func(t *testing.T) {
var list []*LRE
for _, expr := range strings.Split(tt.re, "/") {
for expr := range strings.SplitSeq(tt.re, "/") {
re, err := ParseLRE(&d, "x", expr)
if err != nil {
t.Fatalf("Parse(%q): %v", expr, err)
Expand Down
52 changes: 17 additions & 35 deletions internal/match/rematch.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ package match
import (
"encoding/binary"
"fmt"
"slices"
"sort"
"strings"
)
Expand Down Expand Up @@ -436,12 +437,7 @@ func (c *reCompile) mergeCut(cut1, cut2 []reCut) []reCut {
func canMatchEmpty(re *reSyntax) bool {
switch re.op {
case opAlternate:
for _, sub := range re.sub {
if canMatchEmpty(sub) {
return true
}
}
return false
return slices.ContainsFunc(re.sub, canMatchEmpty)

case opConcat:
for _, sub := range re.sub {
Expand Down Expand Up @@ -512,10 +508,8 @@ func (s *nfaState) add(prog reProg, pc int32) {
// where we are in the list. If this ever showed up as expensive
// on a profile, we could switch to a sparse set instead;
// see https://research.swtch.com/sparse.
for _, old := range *s {
if old == pc {
return
}
if slices.Contains(*s, pc) {
return
}

*s = append(*s, pc)
Expand Down Expand Up @@ -660,18 +654,17 @@ func (s nfaState) appendEncoding(enc []byte) []byte {
//
// The encoding of this state information is:
//
// - a one-word header M | N<<1, where M is 0 for a non-match, 1 for a match,
// and N is the number of words in the table.
// This header is conveniently also the number of words that follow in the encoding.
//
// - if M == 1, a one-word value V that is the match value to report,
// identifying which of a set of regexps has been matched.
// - a one-word header M | N<<1, where M is 0 for a non-match, 1 for a match,
// and N is the number of words in the table.
// This header is conveniently also the number of words that follow in the encoding.
//
// - N two-word pairs W:NEXT indicating that if word W is seen, the DFA should
// move to the state at offset NEXT. The pairs are sorted by W. An entry for W == AnyWord
// is treated as matching any input word; an exact match later in the list takes priority.
// The list is sorted by W, so AnyWord is always first if present.
// - if M == 1, a one-word value V that is the match value to report,
// identifying which of a set of regexps has been matched.
//
// - N two-word pairs W:NEXT indicating that if word W is seen, the DFA should
// move to the state at offset NEXT. The pairs are sorted by W. An entry for W == AnyWord
// is treated as matching any input word; an exact match later in the list takes priority.
// The list is sorted by W, so AnyWord is always first if present.
type reDFA []int32

// A dfaBuilder holds state for building a DFA from a reProg.
Expand Down Expand Up @@ -784,7 +777,6 @@ func (dfa reDFA) string(d *Dict) string {
// off = dnext
// }
// }
//
func (dfa reDFA) stateAt(off int32) (match int32, delta []int32) {
hdr := dfa[off]
off++
Expand Down Expand Up @@ -908,10 +900,7 @@ Words:
// the last time we saw a matching state),
// print information about it.
if TraceDFA > 0 && i-end >= TraceDFA {
start := i - 10
if start < 0 {
start = 0
}
start := max(i-10, 0)
print("DFA mismatch at «",
text[words[start].Lo:words[i].Lo], "|",
text[words[i].Lo:words[i].Hi], "»\n")
Expand All @@ -932,25 +921,18 @@ Words:
end = len(words)
}
if i := len(words); TraceDFA > 0 && i-end >= TraceDFA {
start := i - 10
if start < 0 {
start = 0
}
start := max(i-10, 0)
println("DFA ran out of input at «", text[words[start].Lo:], "|", "EOF", "»\n")
}
return match, end
}

func sortInt32s(x []int32) {
sort.Slice(x, func(i, j int) bool {
return x[i] < x[j]
})
slices.Sort(x)
}

func sortWordIDs(x []WordID) {
sort.Slice(x, func(i, j int) bool {
return x[i] < x[j]
})
slices.Sort(x)
}

// canMisspell reports whether want can be misspelled as have.
Expand Down
14 changes: 7 additions & 7 deletions internal/match/rematch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,10 @@ The name __10__ may not be used.

func TestCompile(t *testing.T) {
var d Dict
for _, tt := range strings.Split(compileTests, "\n\n") {
for tt := range strings.SplitSeq(compileTests, "\n\n") {
tt = strings.TrimSpace(tt) + "\n"
i := strings.Index(tt, "\n")
in, want := tt[:i], tt[i+1:]
before, after, _ := strings.Cut(tt, "\n")
in, want := before, after

prog := testProg(t, &d, in)
if prog == nil {
Expand All @@ -356,7 +356,7 @@ func TestCompile(t *testing.T) {
func testProg(t *testing.T, dict *Dict, expr string) reProg {
if strings.Contains(expr, "/") {
var list []reProg
for _, str := range strings.Split(expr, "/") {
for str := range strings.SplitSeq(expr, "/") {
re, err := reParse(dict, str, false)
if err != nil {
t.Errorf("Parse(%q): %v", expr, err)
Expand Down Expand Up @@ -567,10 +567,10 @@ The name __10__ may not be used

func TestCompileDFA(t *testing.T) {
var d Dict
for _, tt := range strings.Split(compileDFATests, "\n\n") {
for tt := range strings.SplitSeq(compileDFATests, "\n\n") {
tt = strings.TrimSpace(tt) + "\n"
i := strings.Index(tt, "\n")
in, want := tt[:i], tt[i+1:]
before, after, _ := strings.Cut(tt, "\n")
in, want := before, after

prog := testProg(t, &d, in)
if prog == nil {
Expand Down
Loading