Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sjsonnet/src/sjsonnet/ByteRenderer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ class ByteRenderer(out: OutputStream = new java.io.ByteArrayOutputStream(), inde
(vt: @scala.annotation.switch) match {
case 0 => // TAG_STR
val s = v.asInstanceOf[Val.Str]
if (s._asciiSafe) renderAsciiSafeString(s.str)
if (s.isInstanceOf[Val.AsciiSafeStr]) renderAsciiSafeString(s.str)
else renderQuotedString(s.str)
case 1 => // TAG_NUM
renderDouble(v.asDouble)
Expand Down
28 changes: 16 additions & 12 deletions sjsonnet/src/sjsonnet/Format.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ object Format {
/**
* True when every literal segment (leading + inter-spec literals) contains only printable
* ASCII with no `"` or `\`. Computed once at parse time; combined at format time with the
* ASCII-safety of each interpolated value to set the result's [[Val.Str._asciiSafe]] flag.
* ASCII-safety of each interpolated value to decide whether the result is a
* [[Val.AsciiSafeStr]].
*/
val literalsAsciiSafe: Boolean)
extends CompiledFormat
Expand Down Expand Up @@ -868,27 +869,30 @@ object Format {

/**
* ASCII-safety predicate matching the output of [[simpleStringValue]] (used by the simple
* `%(name)s` fast path). Numeric/boolean/null literals are always ASCII; strings forward their
* cached `_asciiSafe` flag; complex types route through Renderer which may emit non-ASCII.
* `%(name)s` fast path). Numeric/boolean/null literals are always ASCII; strings forward via
* subclass check ([[Val.AsciiSafeStr]]); complex types route through Renderer which may emit
* non-ASCII.
*/
@inline private def simpleStringValueAsciiSafe(rawVal: Val): Boolean = rawVal match {
case vs: Val.Str => vs._asciiSafe
case _: Val.Num => true
case _: Val.True => true
case _: Val.False => true
case _: Val.Null => true
case _ => false
case _: Val.AsciiSafeStr => true
case _: Val.Str => false
case _: Val.Num => true
case _: Val.True => true
case _: Val.False => true
case _: Val.Null => true
case _ => false
}

/**
* ASCII-safety predicate for the output of a single format spec, used by the general [[format]]
* path. Mirrors the conversion logic below: strings forward their cached flag, numerics produce
* path. Mirrors the conversion logic below: strings forward via subclass check, numerics produce
* ASCII (except `%c` which depends on the codepoint), other scalars are always ASCII, and Arr/Obj
* go through Renderer (which preserves non-ASCII source bytes).
*/
@inline private def specOutputAsciiSafe(rawVal: Val, conversion: Char): Boolean = rawVal match {
case vs: Val.Str => vs._asciiSafe
case vn: Val.Num =>
case _: Val.AsciiSafeStr => true
case _: Val.Str => false
case vn: Val.Num =>
conversion match {
case 'c' =>
val ch = vn.asDouble.toInt
Expand Down
6 changes: 3 additions & 3 deletions sjsonnet/src/sjsonnet/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -754,10 +754,10 @@ class Parser(
// cost more than the potential memory savings for strings that are unlikely
// to repeat (e.g., 600KB text block literals)
val unique = if (s.length > 1024) s else internedStrings.getOrElseUpdate(s, s)
val result = Val.Str(pos, unique)
if (unique.length > 1024 && CharSWAR.isAsciiJsonSafe(unique))
result._asciiSafe = true
result
Val.Str.asciiSafe(pos, unique)
else
Val.Str(pos, unique)
}

// Any `expr` that isn't naively left-recursive
Expand Down
36 changes: 19 additions & 17 deletions sjsonnet/src/sjsonnet/Val.scala
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,11 @@ object Val {
* strings). Concat nodes have `_str == null` and non-null children; the flat string is lazily
* computed on first `.str` access, then cached and children cleared for GC.
*
* Single monomorphic class ensures optimal JIT inlining — no virtual dispatch on `.str`.
* Subclassing: only [[Val.AsciiSafeStr]] extends this class. The two-class hierarchy lets the JIT
* still devirtualize `.str` access through CHA (only one non-final implementation in the
* codebase) while saving 8 bytes per instance compared to a boolean field plus alignment padding.
*/
final class Str private[sjsonnet] (var pos: Position, private[sjsonnet] var _str: String)
class Str private[sjsonnet] (var pos: Position, private[sjsonnet] var _str: String)
extends Literal {

// DO NOT CHANGE to separate _left/_right fields.
Expand All @@ -340,11 +342,6 @@ object Val {
// cold flatten path, which is amortized O(1) per character.
private[sjsonnet] var _children: Array[Str] = null

// Flag indicating this string is known to contain only printable ASCII (0x20-0x7E) with no
// characters requiring JSON escaping (no ", \, or control chars). When true, the renderer
// can skip SWAR escape scanning and UTF-8 encoding, writing bytes directly.
private[sjsonnet] var _asciiSafe: Boolean = false

def prettyName = "string"
private[sjsonnet] def valTag: Byte = TAG_STR

Expand Down Expand Up @@ -407,17 +404,23 @@ object Val {
override def toString: String = s"Str($pos, $str)"
}

/**
* String known to contain only printable ASCII (0x20-0x7E) with no characters requiring JSON
* escaping (no `"`, `\`, or control chars). [[ByteRenderer]] checks for this subclass to skip
* SWAR escape scanning and UTF-8 encoding, writing bytes directly.
*
* Marker subclass instead of a boolean field saves 8 bytes per instance (boolean + alignment
* padding) — significant for string-heavy workloads where Val.Str instances number in millions.
*/
final class AsciiSafeStr private[sjsonnet] (pos0: Position, str0: String) extends Str(pos0, str0)

object Str {

/** Create a leaf string node — zero overhead vs the old case class. */
def apply(pos: Position, s: String): Str = new Str(pos, s)

/** Create a leaf string node marked as ASCII-safe (no JSON escaping needed). */
def asciiSafe(pos: Position, s: String): Str = {
val v = new Str(pos, s)
v._asciiSafe = true
v
}
def asciiSafe(pos: Position, s: String): Str = new AsciiSafeStr(pos, s)

/** Backward-compatible extractor: `case Val.Str(pos, s) =>` still works. */
def unapply(s: Str): Option[(Position, String)] = Some((s.pos, s.str))
Expand All @@ -432,16 +435,15 @@ object Val {
// Empty string elimination
if (ls != null && ls.isEmpty) return right
if (rs != null && rs.isEmpty) return left
val bothSafe = left.isInstanceOf[AsciiSafeStr] && right.isInstanceOf[AsciiSafeStr]
// Small string eagerness: both flat and combined length <= 128
if (ls != null && rs != null && ls.length + rs.length <= 128) {
val result = new Str(pos, ls + rs)
if (left._asciiSafe && right._asciiSafe) result._asciiSafe = true
return result
val combined = ls + rs
return if (bothSafe) new AsciiSafeStr(pos, combined) else new Str(pos, combined)
}
// Rope node: O(1)
val node = new Str(pos, null)
val node = if (bothSafe) new AsciiSafeStr(pos, null) else new Str(pos, null)
node._children = Array(left, right)
if (left._asciiSafe && right._asciiSafe) node._asciiSafe = true
node
}
}
Expand Down
55 changes: 26 additions & 29 deletions sjsonnet/src/sjsonnet/stdlib/StringModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ object StringModule extends AbstractFunctionModule {
(x.value match {
case v: Val.Str =>
val s = v.str
if (v._asciiSafe) s.length
if (v.isInstanceOf[Val.AsciiSafeStr]) s.length
else s.codePointCount(0, s.length)
case a: Val.Arr => a.length
case o: Val.Obj => o.visibleKeyNames.length
Expand Down Expand Up @@ -131,7 +131,7 @@ object StringModule extends AbstractFunctionModule {
def evalRhs(_s: Eval, from: Eval, len: Eval, ev: EvalScope, pos: Position): Val = {
val srcVal = _s.value
val str = srcVal.asString
val srcAsciiSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe
val srcAsciiSafe = srcVal.isInstanceOf[Val.AsciiSafeStr]
val offset = from.value match {
case v: Val.Num => v.asPositiveInt
case _ => Error.fail("Expected a number for offset in substr, got " + from.value.prettyName)
Expand All @@ -148,11 +148,8 @@ object StringModule extends AbstractFunctionModule {
val safeOffset = math.min(offset, strLen)
val safeLength = math.min(length, strLen - safeOffset)
if (safeLength <= 0) Val.Str(pos, "")
else {
val result = Val.Str(pos, str.substring(safeOffset, safeOffset + safeLength))
result._asciiSafe = true
result
}
else
Val.Str.asciiSafe(pos, str.substring(safeOffset, safeOffset + safeLength))
} else {
val requestedEnd = offset.toLong + length.toLong
if (
Expand Down Expand Up @@ -242,8 +239,8 @@ object StringModule extends AbstractFunctionModule {
val toVal = to.value
val out = srcVal.asString.replace(fromForce, toVal.asString)
// Result is asciiSafe iff both src and `to` are asciiSafe (`from` is removed).
val srcSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe
val toSafe = toVal.isInstanceOf[Val.Str] && toVal.asInstanceOf[Val.Str]._asciiSafe
val srcSafe = srcVal.isInstanceOf[Val.AsciiSafeStr]
val toSafe = toVal.isInstanceOf[Val.AsciiSafeStr]
if (srcSafe && toSafe) Val.Str.asciiSafe(pos, out) else Val.Str(pos, out)
}
}
Expand Down Expand Up @@ -386,8 +383,8 @@ object StringModule extends AbstractFunctionModule {
right = true
)
v match {
case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
}
}
}
Expand All @@ -409,8 +406,8 @@ object StringModule extends AbstractFunctionModule {
right = false
)
v match {
case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
}
}
}
Expand All @@ -432,8 +429,8 @@ object StringModule extends AbstractFunctionModule {
right = true
)
v match {
case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
}
}
}
Expand Down Expand Up @@ -461,7 +458,7 @@ object StringModule extends AbstractFunctionModule {
if (resultLen > Int.MaxValue) Error.fail("String is too large to join")
if (count == 1) str
else {
val asciiSafe = str._asciiSafe && sep._asciiSafe
val asciiSafe = str.isInstanceOf[Val.AsciiSafeStr] && sep.isInstanceOf[Val.AsciiSafeStr]

val b = new java.lang.StringBuilder(resultLen.toInt)
if (s.length + sepStr.length <= 64) {
Expand Down Expand Up @@ -533,12 +530,12 @@ object StringModule extends AbstractFunctionModule {
case x: Val.Str =>
if (added) {
totalLen += sepLen
asciiSafe &&= sep._asciiSafe
asciiSafe &&= sep.isInstanceOf[Val.AsciiSafeStr]
}
val str = x.str
totalLen += str.length
if (totalLen > Int.MaxValue) Error.fail("String is too large to join")
asciiSafe &&= x._asciiSafe
asciiSafe &&= x.isInstanceOf[Val.AsciiSafeStr]
added = true
case x => Error.fail("Cannot join " + x.prettyName)
}
Expand Down Expand Up @@ -583,7 +580,7 @@ object StringModule extends AbstractFunctionModule {
case x: Val.Str =>
totalLen += x.str.length
if (totalLen > Int.MaxValue) Error.fail("String is too large to join")
asciiSafe &&= x._asciiSafe
asciiSafe &&= x.isInstanceOf[Val.AsciiSafeStr]
elemCount += 1
case _ => return null
}
Expand All @@ -593,7 +590,7 @@ object StringModule extends AbstractFunctionModule {
if (elemCount > 1) {
totalLen += sepLen.toLong * (elemCount - 1)
if (totalLen > Int.MaxValue) Error.fail("String is too large to join")
asciiSafe &&= sep._asciiSafe
asciiSafe &&= sep.isInstanceOf[Val.AsciiSafeStr]
}

val b = new java.lang.StringBuilder(totalLen.toInt)
Expand Down Expand Up @@ -648,11 +645,11 @@ object StringModule extends AbstractFunctionModule {
case x: Val.Str =>
if (added) {
b.append(s)
asciiSafe &&= sepStr._asciiSafe
asciiSafe &&= sepStr.isInstanceOf[Val.AsciiSafeStr]
}
added = true
b.append(x.str)
asciiSafe &&= x._asciiSafe
asciiSafe &&= x.isInstanceOf[Val.AsciiSafeStr]
case x => Error.fail("Cannot join " + x.prettyName)
}
i += 1
Expand Down Expand Up @@ -864,7 +861,7 @@ object StringModule extends AbstractFunctionModule {
private object Split extends Val.Builtin2("split", "str", "c") {
def evalRhs(str: Eval, c: Eval, ev: EvalScope, pos: Position): Val = {
val v = str.value
val safe = v.isInstanceOf[Val.Str] && v.asInstanceOf[Val.Str]._asciiSafe
val safe = v.isInstanceOf[Val.AsciiSafeStr]
Val.Arr(pos, splitLimit(pos, v.asString, c.value.asString, -1, safe))
}
}
Expand All @@ -882,7 +879,7 @@ object StringModule extends AbstractFunctionModule {
private object SplitLimit extends Val.Builtin3("splitLimit", "str", "c", "maxsplits") {
def evalRhs(str: Eval, c: Eval, maxSplits: Eval, ev: EvalScope, pos: Position): Val = {
val v = str.value
val safe = v.isInstanceOf[Val.Str] && v.asInstanceOf[Val.Str]._asciiSafe
val safe = v.isInstanceOf[Val.AsciiSafeStr]
Val.Arr(
pos,
splitLimit(pos, v.asString, c.value.asString, maxSplits.value.asInt, safe)
Expand All @@ -900,7 +897,7 @@ object StringModule extends AbstractFunctionModule {
private object SplitLimitR extends Val.Builtin3("splitLimitR", "str", "c", "maxsplits") {
def evalRhs(str: Eval, c: Eval, maxSplits: Eval, ev: EvalScope, pos: Position): Val = {
val v = str.value
val safe = v.isInstanceOf[Val.Str] && v.asInstanceOf[Val.Str]._asciiSafe
val safe = v.isInstanceOf[Val.AsciiSafeStr]
Val.Arr(
pos,
splitLimitR(pos, v.asString, c.value.asString, maxSplits.value.asInt, safe)
Expand Down Expand Up @@ -1051,8 +1048,8 @@ object StringModule extends AbstractFunctionModule {
val s = v.asString
val out = asciiUpper(s)
v match {
case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
}
}
}
Expand All @@ -1070,8 +1067,8 @@ object StringModule extends AbstractFunctionModule {
val s = v.asString
val out = asciiLower(s)
v match {
case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out)
case _ => Val.Str(pos, out)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Directional coverage for Format ASCII-safety propagation.
// Ensures format strings preserve correct values across paths that set Val.Str._asciiSafe:
// Ensures format strings preserve correct values across paths that produce Val.AsciiSafeStr:
// - simple %(name)s fast path with ASCII / non-ASCII literals and values
// - general format path with %s / %d / %c / %o / %x conversions
// - mixed ASCII literals + non-ASCII string interpolations (output must be correct)
Expand Down
8 changes: 4 additions & 4 deletions sjsonnet/test/src/sjsonnet/FormatTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ object FormatTests extends TestSuite {
)
val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str]
result.str ==> "hello 3"
result._asciiSafe ==> true
result.isInstanceOf[Val.AsciiSafeStr] ==> true
}

test("simple named format does not mark unsafe string values ascii-safe") {
Expand All @@ -42,7 +42,7 @@ object FormatTests extends TestSuite {
)
val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str]
result.str ==> "hello \""
result._asciiSafe ==> false
result.isInstanceOf[Val.AsciiSafeStr] ==> false
}

test("simple named format does not mark unsafe static literals ascii-safe") {
Expand All @@ -53,7 +53,7 @@ object FormatTests extends TestSuite {
)
val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str]
result.str ==> "hello \"3"
result._asciiSafe ==> false
result.isInstanceOf[Val.AsciiSafeStr] ==> false
}

test("simple named format combines ascii-safety across multiple keys") {
Expand All @@ -67,7 +67,7 @@ object FormatTests extends TestSuite {
)
val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str]
result.str ==> "safe \\ safe"
result._asciiSafe ==> false
result.isInstanceOf[Val.AsciiSafeStr] ==> false
}
}
}
Loading