Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
Now allows UseTeddy when anchors are only `(?m)^` (no \b, $, etc).
`http_methods` on macOS ARM64: 89ms → **<1ms** (restored to v0.12.14 level).

- **Fix NFA candidate loop guard** — `IsComplete()` guard blocked prefilter
candidate loop for ALL incomplete prefilters, including prefix-only ones
where all alternation branches are represented. Now uses `partialCoverage`
flag (set only on overflow truncation) instead of `IsComplete()`. Pattern
` [5][0-9]{2} | [4][0-9]{2} ` (Kostya's `errors`): 1984ms → **109ms**.
Rust handles this by integrating prefilter as skip-ahead inside PikeVM
(not as an external correctness gate) — see `pikevm.rs:1293-1299`.

## [0.12.16] - 2026-03-21

### Performance
Expand Down
10 changes: 8 additions & 2 deletions literal/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,9 @@ func (e *Extractor) extractPrefixesAlternate(re *syntax.Regexp, depth int) *Seq
result := NewSeq(allLits...)

if overflowed || result.Len() > e.config.MaxLiterals {
// Either not all branches are represented (overflow) or too many literals.
// Trim to 3-byte prefixes + dedup to fit prefilter capacity.
// Mark ALL as inexact — prefilter is used for skip-ahead only,
// DFA/NFA verifies each candidate (safe with partial coverage).
// DFA/NFA verifies each candidate.
//
// Rust does the same: optimize_for_prefix_by_preference trims and deduplicates.
// A partial prefilter is much better than no prefilter — DFA with skip-ahead
Expand All @@ -281,6 +280,13 @@ func (e *Extractor) extractPrefixesAlternate(re *syntax.Regexp, depth int) *Seq
if result.Len() > e.config.MaxLiterals {
result.literals = result.literals[:e.config.MaxLiterals]
}
// Mark partial coverage when overflow truncated branches.
// Prefilter with partial coverage CANNOT be used in candidate loops
// (would miss unrepresented branches). Only safe as skip-ahead
// inside NFA/DFA engine (Rust approach: PikeVM integrates prefilter).
if overflowed {
result.partialCoverage = true
}
}

return result
Expand Down
16 changes: 15 additions & 1 deletion literal/seq.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,21 @@ func (l Literal) String() string {
// )
// fmt.Printf("Sequence has %d literals\n", seq.Len()) // Output: Sequence has 2 literals
type Seq struct {
literals []Literal
literals []Literal
partialCoverage bool // True when alternation overflow truncated branches
}

// IsPartialCoverage returns true if the literal set doesn't cover all
// alternation branches (due to overflow truncation). A partial-coverage
// prefilter cannot be used as a correctness gate in candidate loops —
// it would miss branches whose literals were not extracted.
// Rust avoids this by integrating prefilter as skip-ahead inside PikeVM,
// not as an external candidate loop.
func (s *Seq) IsPartialCoverage() bool {
if s == nil {
return false
}
return s.partialCoverage
}

// NewSeq creates a new sequence from the given literals.
Expand Down
1 change: 1 addition & 0 deletions meta/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,7 @@ func CompileRegexp(re *syntax.Regexp, config Config) (*Engine, error) {
ahoCorasick: engines.ahoCorasick,
anchoredLiteralInfo: anchoredLiteralInfo,
prefilter: pf,
prefilterPartialCoverage: literals != nil && literals.IsPartialCoverage(),
strategy: strategy,
config: config,
onepass: onePassRes,
Expand Down
1 change: 1 addition & 0 deletions meta/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ type Engine struct {
ahoCorasick *ahocorasick.Automaton // For large literal alternations (>32 patterns)
anchoredLiteralInfo *AnchoredLiteralInfo // For ^prefix.*suffix$ patterns (Issue #79)
prefilter prefilter.Prefilter
prefilterPartialCoverage bool // True when prefilter doesn't cover all alternation branches
strategy Strategy
config Config

Expand Down
34 changes: 20 additions & 14 deletions meta/find_indices.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,16 @@ func (e *Engine) findIndicesNFA(haystack []byte) (int, int, bool) {
state := e.getSearchState()
defer e.putSearchState(state)

// Use prefilter candidate loop for skip-ahead — but ONLY when prefilter
// covers all possible match positions (IsComplete or all branches represented).
// Incomplete prefilters (partial case-fold coverage) cannot be used as
// correctness gates — they'd miss branches whose literals were truncated.
if e.prefilter != nil && e.prefilter.IsComplete() {
// Use prefilter for candidate skip-ahead if available.
// Prefilter finds PREFIX positions → NFA/BT verifies full match from there.
// Safe for both complete and incomplete prefilters — as long as all
// alternation branches are represented in the literal set.
//
// NOT safe for partial-coverage prefilters (overflow truncated branches):
// candidate loop would miss unrepresented branches entirely.
// Rust avoids this by integrating prefilter inside PikeVM as skip-ahead
// (not as an external correctness gate). See pikevm.rs:1293-1299.
if e.prefilter != nil && !e.prefilterPartialCoverage {
at := 0
for at < len(haystack) {
// Find next candidate position via prefilter
Expand Down Expand Up @@ -175,8 +180,8 @@ func (e *Engine) findIndicesNFAAt(haystack []byte, at int) (int, int, bool) {
state := e.getSearchState()
defer e.putSearchState(state)

// Use prefilter candidate loop — only safe with complete prefilter
if e.prefilter != nil && e.prefilter.IsComplete() {
// Use prefilter candidate loop — safe unless partial coverage (overflow)
if e.prefilter != nil && !e.prefilterPartialCoverage {
for at < len(haystack) {
pos := e.prefilter.Find(haystack, at)
if pos == -1 {
Expand Down Expand Up @@ -214,7 +219,7 @@ func (e *Engine) findIndicesNFAAt(haystack []byte, at int) (int, int, bool) {
func (e *Engine) findIndicesDFA(haystack []byte) (int, int, bool) {
atomic.AddUint64(&e.stats.DFASearches, 1)

// Literal fast path
// Literal fast path — requires complete prefilter
if e.prefilter != nil && e.prefilter.IsComplete() {
pos := e.prefilter.Find(haystack, 0)
if pos == -1 {
Expand Down Expand Up @@ -280,7 +285,8 @@ func (e *Engine) findIndicesDFA(haystack []byte) (int, int, bool) {
}

// Prefilter with non-greedy: use prefilter for rejection only, PikeVM for match.
if e.prefilter != nil {
// Not safe with partial coverage — would miss unrepresented branches.
if e.prefilter != nil && !e.prefilterPartialCoverage {
pos := e.prefilter.Find(haystack, 0)
if pos == -1 {
return -1, -1, false
Expand Down Expand Up @@ -308,7 +314,7 @@ func (e *Engine) findIndicesDFA(haystack []byte) (int, int, bool) {
func (e *Engine) findIndicesDFAAt(haystack []byte, at int) (int, int, bool) {
atomic.AddUint64(&e.stats.DFASearches, 1)

// Literal fast path
// Literal fast path — requires complete prefilter
if e.prefilter != nil && e.prefilter.IsComplete() {
pos := e.prefilter.Find(haystack, at)
if pos == -1 {
Expand All @@ -323,7 +329,7 @@ func (e *Engine) findIndicesDFAAt(haystack []byte, at int) (int, int, bool) {
}

// Prefilter skip: use prefix prefilter to jump to candidate position.
if e.prefilter != nil {
if e.prefilter != nil && e.prefilter.IsComplete() {
pos := e.prefilter.Find(haystack, at)
if pos == -1 {
return -1, -1, false
Expand Down Expand Up @@ -1028,9 +1034,9 @@ func (e *Engine) findIndicesNFAAtWithState(haystack []byte, at int, state *Searc
// BoundedBacktracker can be used for Find operations only when safe
useBT := e.boundedBacktracker != nil && !e.canMatchEmpty

// Use prefilter candidate loop — only safe with complete prefilter.
// Incomplete prefilters (partial case-fold coverage) would miss branches.
if e.prefilter != nil && e.prefilter.IsComplete() {
// Use prefilter candidate loop — safe unless partial coverage (overflow).
// Partial-coverage prefilters would miss unrepresented branches.
if e.prefilter != nil && !e.prefilterPartialCoverage {
for at < len(haystack) {
pos := e.prefilter.Find(haystack, at)
if pos == -1 {
Expand Down
Loading