Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 25 additions & 19 deletions collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,13 @@ import (

// Collator provides functionality for comparing strings for a given
// collation order.
//
// A Collator is safe for concurrent use by multiple goroutines after
// construction. Compare and CompareString use goroutine-local state only.
type Collator struct {
options

sorter sorter

_iter [2]iter
}

func (c *Collator) iter(i int) *iter {
// TODO: evaluate performance for making the second iterator optional.
return &c._iter[i]
}

// Supported returns the list of languages for which collating differs from its parent.
Expand Down Expand Up @@ -79,8 +75,6 @@ func (c *Collator) init() {
if c.numeric {
c.t = colltab.NewNumericWeighter(c.t)
}
c._iter[0].init(c)
c._iter[1].init(c)
}

// Buffer holds keys generated by Key and KeyString.
Expand All @@ -102,12 +96,16 @@ func (b *Buffer) Reset() {

// Compare returns an integer comparing the two byte slices.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
// Compare is safe for concurrent use by multiple goroutines.
func (c *Collator) Compare(a, b []byte) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInput(a)
c.iter(1).SetInput(b)
if res := c.compare(); res != 0 {
var ia, ib iter
ia.init(c)
ib.init(c)
ia.SetInput(a)
ib.SetInput(b)
if res := c.compareIters(&ia, &ib); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
Expand All @@ -118,12 +116,16 @@ func (c *Collator) Compare(a, b []byte) int {

// CompareString returns an integer comparing the two strings.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
// CompareString is safe for concurrent use by multiple goroutines.
func (c *Collator) CompareString(a, b string) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).SetInputString(a)
c.iter(1).SetInputString(b)
if res := c.compare(); res != 0 {
var ia, ib iter
ia.init(c)
ib.init(c)
ia.SetInputString(a)
ib.SetInputString(b)
if res := c.compareIters(&ia, &ib); res != 0 {
return res
}
if !c.ignore[colltab.Identity] {
Expand Down Expand Up @@ -154,8 +156,7 @@ func compareLevel(f func(i *iter) int, a, b *iter) int {
return 0
}

func (c *Collator) compare() int {
ia, ib := c.iter(0), c.iter(1)
func (c *Collator) compareIters(ia, ib *iter) int {
// Process primary level
if c.alternate != altShifted {
// TODO: implement script reordering
Expand Down Expand Up @@ -216,22 +217,27 @@ func (c *Collator) key(buf *Buffer, w []colltab.Elem) []byte {
}

func (c *Collator) getColElems(str []byte) []colltab.Elem {
i := c.iter(0)
var i iter
i.init(c)
i.SetInput(str)
for i.Next() {
}
return i.Elems
}

func (c *Collator) getColElemsString(str string) []colltab.Elem {
i := c.iter(0)
var i iter
i.init(c)
i.SetInputString(str)
for i.Next() {
}
return i.Elems
}

type iter struct {
// wa is the goroutine-local scratch buffer for collation elements.
// 512 slots avoid heap fallback for virtually all real-world strings;
// the CPU only touches as many cache lines as the comparison actually needs.
wa [512]colltab.Elem

colltab.Iter
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module golang.org/x/text

go 1.25.0
go 1.24.0

require golang.org/x/tools v0.41.0 // tagx:ignore

Expand Down