Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion db/queries/queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,10 @@ func isNumericType(colType string) bool {
// When allCols is provided, it returns concat_ws('|', col1_expr, col2_expr, ...)
// with numeric/decimal columns wrapped in trim_scale() to normalize trailing zeros.
// If allCols is nil/empty, falls back to the table-alias::text whole-row cast.
//
// PostgreSQL limits functions to 100 arguments. Since concat_ws uses 1 argument
// for the separator, at most 99 column expressions fit per call. For wider
// tables, the expressions are batched into nested concat_ws calls.
func buildRowTextExpr(tableAlias string, allCols []string, colTypes map[string]string) string {
if len(allCols) == 0 {
return tableAlias + "::text"
Expand All @@ -570,7 +574,29 @@ func buildRowTextExpr(tableAlias string, allCols []string, colTypes map[string]s
exprs[i] = fmt.Sprintf("COALESCE(%s::text, '')", qualifiedCol)
}
}
return fmt.Sprintf("concat_ws('|', %s)", strings.Join(exprs, ", "))
return concatWSBatched(exprs)
}

// concatWSBatched produces a concat_ws('|', ...) expression. When len(exprs)
// exceeds 99 (the max value-arguments per concat_ws call, since the separator
// takes one slot), it splits the expressions into batches and nests the calls.
func concatWSBatched(exprs []string) string {
const maxArgs = 99 // 100 total - 1 for the separator

if len(exprs) <= maxArgs {
return fmt.Sprintf("concat_ws('|', %s)", strings.Join(exprs, ", "))
}

// Split into batches, wrap each in its own concat_ws, then combine.
var batches []string
for i := 0; i < len(exprs); i += maxArgs {
end := i + maxArgs
if end > len(exprs) {
end = len(exprs)
}
batches = append(batches, fmt.Sprintf("concat_ws('|', %s)", strings.Join(exprs[i:end], ", ")))
}
return fmt.Sprintf("concat_ws('|', %s)", strings.Join(batches, ", "))
}

func BlockHashSQL(schema, table string, primaryKeyCols []string, mode string, includeLower, includeUpper bool, filter string, allCols []string, colTypes map[string]string) (string, error) {
Expand Down
58 changes: 58 additions & 0 deletions db/queries/queries_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
package queries

import (
"fmt"
"strings"
"testing"
)
Expand Down Expand Up @@ -409,3 +410,60 @@ func TestBlockHashSQL(t *testing.T) {
})
}
}

func TestConcatWSBatched(t *testing.T) {
t.Run("under limit is single concat_ws", func(t *testing.T) {
exprs := make([]string, 50)
for i := range exprs {
exprs[i] = fmt.Sprintf("col%d", i)
}
result := concatWSBatched(exprs)
// Should be a single concat_ws with all 50 expressions
if strings.Count(result, "concat_ws(") != 1 {
t.Errorf("expected 1 concat_ws call, got: %s", result)
}
})

t.Run("at limit of 99 is single concat_ws", func(t *testing.T) {
exprs := make([]string, 99)
for i := range exprs {
exprs[i] = fmt.Sprintf("col%d", i)
}
result := concatWSBatched(exprs)
if strings.Count(result, "concat_ws(") != 1 {
t.Errorf("expected 1 concat_ws call for exactly 99 exprs, got: %s", result)
}
})

t.Run("100 expressions nests into batches", func(t *testing.T) {
exprs := make([]string, 100)
for i := range exprs {
exprs[i] = fmt.Sprintf("col%d", i)
}
result := concatWSBatched(exprs)
// Should have 3 concat_ws calls: 1 outer + 2 inner (99 + 1)
if strings.Count(result, "concat_ws(") != 3 {
t.Errorf("expected 3 concat_ws calls for 100 exprs, got %d in: %s",
strings.Count(result, "concat_ws("), result)
}
// Every expression should be present
for _, e := range exprs {
if !strings.Contains(result, e) {
t.Errorf("missing expression %s in result", e)
}
}
})

t.Run("200 expressions nests into 3 inner batches", func(t *testing.T) {
exprs := make([]string, 200)
for i := range exprs {
exprs[i] = fmt.Sprintf("col%d", i)
}
result := concatWSBatched(exprs)
// 99 + 99 + 2 = 3 inner batches + 1 outer = 4 concat_ws calls
if strings.Count(result, "concat_ws(") != 4 {
t.Errorf("expected 4 concat_ws calls for 200 exprs, got %d in: %s",
strings.Count(result, "concat_ws("), result)
}
})
}
Loading