diff --git a/sjsonnet/src/sjsonnet/ByteRenderer.scala b/sjsonnet/src/sjsonnet/ByteRenderer.scala index ad65bdbd..11766386 100644 --- a/sjsonnet/src/sjsonnet/ByteRenderer.scala +++ b/sjsonnet/src/sjsonnet/ByteRenderer.scala @@ -193,7 +193,7 @@ class ByteRenderer(out: OutputStream = new java.io.ByteArrayOutputStream(), inde (vt: @scala.annotation.switch) match { case 0 => // TAG_STR val s = v.asInstanceOf[Val.Str] - if (s._asciiSafe) renderAsciiSafeString(s.str) + if (s.isInstanceOf[Val.AsciiSafeStr]) renderAsciiSafeString(s.str) else renderQuotedString(s.str) case 1 => // TAG_NUM renderDouble(v.asDouble) diff --git a/sjsonnet/src/sjsonnet/Format.scala b/sjsonnet/src/sjsonnet/Format.scala index e69c60c1..052599ea 100644 --- a/sjsonnet/src/sjsonnet/Format.scala +++ b/sjsonnet/src/sjsonnet/Format.scala @@ -50,7 +50,8 @@ object Format { /** * True when every literal segment (leading + inter-spec literals) contains only printable * ASCII with no `"` or `\`. Computed once at parse time; combined at format time with the - * ASCII-safety of each interpolated value to set the result's [[Val.Str._asciiSafe]] flag. + * ASCII-safety of each interpolated value to decide whether the result is a + * [[Val.AsciiSafeStr]]. */ val literalsAsciiSafe: Boolean) extends CompiledFormat @@ -868,27 +869,30 @@ object Format { /** * ASCII-safety predicate matching the output of [[simpleStringValue]] (used by the simple - * `%(name)s` fast path). Numeric/boolean/null literals are always ASCII; strings forward their - * cached `_asciiSafe` flag; complex types route through Renderer which may emit non-ASCII. + * `%(name)s` fast path). Numeric/boolean/null literals are always ASCII; strings forward via + * subclass check ([[Val.AsciiSafeStr]]); complex types route through Renderer which may emit + * non-ASCII. */ @inline private def simpleStringValueAsciiSafe(rawVal: Val): Boolean = rawVal match { - case vs: Val.Str => vs._asciiSafe - case _: Val.Num => true - case _: Val.True => true - case _: Val.False => true - case _: Val.Null => true - case _ => false + case _: Val.AsciiSafeStr => true + case _: Val.Str => false + case _: Val.Num => true + case _: Val.True => true + case _: Val.False => true + case _: Val.Null => true + case _ => false } /** * ASCII-safety predicate for the output of a single format spec, used by the general [[format]] - * path. Mirrors the conversion logic below: strings forward their cached flag, numerics produce + * path. Mirrors the conversion logic below: strings forward via subclass check, numerics produce * ASCII (except `%c` which depends on the codepoint), other scalars are always ASCII, and Arr/Obj * go through Renderer (which preserves non-ASCII source bytes). */ @inline private def specOutputAsciiSafe(rawVal: Val, conversion: Char): Boolean = rawVal match { - case vs: Val.Str => vs._asciiSafe - case vn: Val.Num => + case _: Val.AsciiSafeStr => true + case _: Val.Str => false + case vn: Val.Num => conversion match { case 'c' => val ch = vn.asDouble.toInt diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala index 348d742e..30a2f0e6 100644 --- a/sjsonnet/src/sjsonnet/Parser.scala +++ b/sjsonnet/src/sjsonnet/Parser.scala @@ -754,10 +754,10 @@ class Parser( // cost more than the potential memory savings for strings that are unlikely // to repeat (e.g., 600KB text block literals) val unique = if (s.length > 1024) s else internedStrings.getOrElseUpdate(s, s) - val result = Val.Str(pos, unique) if (unique.length > 1024 && CharSWAR.isAsciiJsonSafe(unique)) - result._asciiSafe = true - result + Val.Str.asciiSafe(pos, unique) + else + Val.Str(pos, unique) } // Any `expr` that isn't naively left-recursive diff --git a/sjsonnet/src/sjsonnet/Val.scala b/sjsonnet/src/sjsonnet/Val.scala index 74429f8e..42698162 100644 --- a/sjsonnet/src/sjsonnet/Val.scala +++ b/sjsonnet/src/sjsonnet/Val.scala @@ -328,9 +328,11 @@ object Val { * strings). Concat nodes have `_str == null` and non-null children; the flat string is lazily * computed on first `.str` access, then cached and children cleared for GC. * - * Single monomorphic class ensures optimal JIT inlining — no virtual dispatch on `.str`. + * Subclassing: only [[Val.AsciiSafeStr]] extends this class. The two-class hierarchy lets the JIT + * still devirtualize `.str` access through CHA (only one non-final implementation in the + * codebase) while saving 8 bytes per instance compared to a boolean field plus alignment padding. */ - final class Str private[sjsonnet] (var pos: Position, private[sjsonnet] var _str: String) + class Str private[sjsonnet] (var pos: Position, private[sjsonnet] var _str: String) extends Literal { // DO NOT CHANGE to separate _left/_right fields. @@ -340,11 +342,6 @@ object Val { // cold flatten path, which is amortized O(1) per character. private[sjsonnet] var _children: Array[Str] = null - // Flag indicating this string is known to contain only printable ASCII (0x20-0x7E) with no - // characters requiring JSON escaping (no ", \, or control chars). When true, the renderer - // can skip SWAR escape scanning and UTF-8 encoding, writing bytes directly. - private[sjsonnet] var _asciiSafe: Boolean = false - def prettyName = "string" private[sjsonnet] def valTag: Byte = TAG_STR @@ -407,17 +404,23 @@ object Val { override def toString: String = s"Str($pos, $str)" } + /** + * String known to contain only printable ASCII (0x20-0x7E) with no characters requiring JSON + * escaping (no `"`, `\`, or control chars). [[ByteRenderer]] checks for this subclass to skip + * SWAR escape scanning and UTF-8 encoding, writing bytes directly. + * + * Marker subclass instead of a boolean field saves 8 bytes per instance (boolean + alignment + * padding) — significant for string-heavy workloads where Val.Str instances number in millions. + */ + final class AsciiSafeStr private[sjsonnet] (pos0: Position, str0: String) extends Str(pos0, str0) + object Str { /** Create a leaf string node — zero overhead vs the old case class. */ def apply(pos: Position, s: String): Str = new Str(pos, s) /** Create a leaf string node marked as ASCII-safe (no JSON escaping needed). */ - def asciiSafe(pos: Position, s: String): Str = { - val v = new Str(pos, s) - v._asciiSafe = true - v - } + def asciiSafe(pos: Position, s: String): Str = new AsciiSafeStr(pos, s) /** Backward-compatible extractor: `case Val.Str(pos, s) =>` still works. */ def unapply(s: Str): Option[(Position, String)] = Some((s.pos, s.str)) @@ -432,16 +435,15 @@ object Val { // Empty string elimination if (ls != null && ls.isEmpty) return right if (rs != null && rs.isEmpty) return left + val bothSafe = left.isInstanceOf[AsciiSafeStr] && right.isInstanceOf[AsciiSafeStr] // Small string eagerness: both flat and combined length <= 128 if (ls != null && rs != null && ls.length + rs.length <= 128) { - val result = new Str(pos, ls + rs) - if (left._asciiSafe && right._asciiSafe) result._asciiSafe = true - return result + val combined = ls + rs + return if (bothSafe) new AsciiSafeStr(pos, combined) else new Str(pos, combined) } // Rope node: O(1) - val node = new Str(pos, null) + val node = if (bothSafe) new AsciiSafeStr(pos, null) else new Str(pos, null) node._children = Array(left, right) - if (left._asciiSafe && right._asciiSafe) node._asciiSafe = true node } } diff --git a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala index a8ab4304..dfb916f5 100644 --- a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala @@ -84,7 +84,7 @@ object StringModule extends AbstractFunctionModule { (x.value match { case v: Val.Str => val s = v.str - if (v._asciiSafe) s.length + if (v.isInstanceOf[Val.AsciiSafeStr]) s.length else s.codePointCount(0, s.length) case a: Val.Arr => a.length case o: Val.Obj => o.visibleKeyNames.length @@ -131,7 +131,7 @@ object StringModule extends AbstractFunctionModule { def evalRhs(_s: Eval, from: Eval, len: Eval, ev: EvalScope, pos: Position): Val = { val srcVal = _s.value val str = srcVal.asString - val srcAsciiSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe + val srcAsciiSafe = srcVal.isInstanceOf[Val.AsciiSafeStr] val offset = from.value match { case v: Val.Num => v.asPositiveInt case _ => Error.fail("Expected a number for offset in substr, got " + from.value.prettyName) @@ -148,11 +148,8 @@ object StringModule extends AbstractFunctionModule { val safeOffset = math.min(offset, strLen) val safeLength = math.min(length, strLen - safeOffset) if (safeLength <= 0) Val.Str(pos, "") - else { - val result = Val.Str(pos, str.substring(safeOffset, safeOffset + safeLength)) - result._asciiSafe = true - result - } + else + Val.Str.asciiSafe(pos, str.substring(safeOffset, safeOffset + safeLength)) } else { val requestedEnd = offset.toLong + length.toLong if ( @@ -242,8 +239,8 @@ object StringModule extends AbstractFunctionModule { val toVal = to.value val out = srcVal.asString.replace(fromForce, toVal.asString) // Result is asciiSafe iff both src and `to` are asciiSafe (`from` is removed). - val srcSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe - val toSafe = toVal.isInstanceOf[Val.Str] && toVal.asInstanceOf[Val.Str]._asciiSafe + val srcSafe = srcVal.isInstanceOf[Val.AsciiSafeStr] + val toSafe = toVal.isInstanceOf[Val.AsciiSafeStr] if (srcSafe && toSafe) Val.Str.asciiSafe(pos, out) else Val.Str(pos, out) } } @@ -386,8 +383,8 @@ object StringModule extends AbstractFunctionModule { right = true ) v match { - case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out) - case _ => Val.Str(pos, out) + case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out) + case _ => Val.Str(pos, out) } } } @@ -409,8 +406,8 @@ object StringModule extends AbstractFunctionModule { right = false ) v match { - case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out) - case _ => Val.Str(pos, out) + case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out) + case _ => Val.Str(pos, out) } } } @@ -432,8 +429,8 @@ object StringModule extends AbstractFunctionModule { right = true ) v match { - case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out) - case _ => Val.Str(pos, out) + case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out) + case _ => Val.Str(pos, out) } } } @@ -461,7 +458,7 @@ object StringModule extends AbstractFunctionModule { if (resultLen > Int.MaxValue) Error.fail("String is too large to join") if (count == 1) str else { - val asciiSafe = str._asciiSafe && sep._asciiSafe + val asciiSafe = str.isInstanceOf[Val.AsciiSafeStr] && sep.isInstanceOf[Val.AsciiSafeStr] val b = new java.lang.StringBuilder(resultLen.toInt) if (s.length + sepStr.length <= 64) { @@ -533,12 +530,12 @@ object StringModule extends AbstractFunctionModule { case x: Val.Str => if (added) { totalLen += sepLen - asciiSafe &&= sep._asciiSafe + asciiSafe &&= sep.isInstanceOf[Val.AsciiSafeStr] } val str = x.str totalLen += str.length if (totalLen > Int.MaxValue) Error.fail("String is too large to join") - asciiSafe &&= x._asciiSafe + asciiSafe &&= x.isInstanceOf[Val.AsciiSafeStr] added = true case x => Error.fail("Cannot join " + x.prettyName) } @@ -583,7 +580,7 @@ object StringModule extends AbstractFunctionModule { case x: Val.Str => totalLen += x.str.length if (totalLen > Int.MaxValue) Error.fail("String is too large to join") - asciiSafe &&= x._asciiSafe + asciiSafe &&= x.isInstanceOf[Val.AsciiSafeStr] elemCount += 1 case _ => return null } @@ -593,7 +590,7 @@ object StringModule extends AbstractFunctionModule { if (elemCount > 1) { totalLen += sepLen.toLong * (elemCount - 1) if (totalLen > Int.MaxValue) Error.fail("String is too large to join") - asciiSafe &&= sep._asciiSafe + asciiSafe &&= sep.isInstanceOf[Val.AsciiSafeStr] } val b = new java.lang.StringBuilder(totalLen.toInt) @@ -648,11 +645,11 @@ object StringModule extends AbstractFunctionModule { case x: Val.Str => if (added) { b.append(s) - asciiSafe &&= sepStr._asciiSafe + asciiSafe &&= sepStr.isInstanceOf[Val.AsciiSafeStr] } added = true b.append(x.str) - asciiSafe &&= x._asciiSafe + asciiSafe &&= x.isInstanceOf[Val.AsciiSafeStr] case x => Error.fail("Cannot join " + x.prettyName) } i += 1 @@ -864,7 +861,7 @@ object StringModule extends AbstractFunctionModule { private object Split extends Val.Builtin2("split", "str", "c") { def evalRhs(str: Eval, c: Eval, ev: EvalScope, pos: Position): Val = { val v = str.value - val safe = v.isInstanceOf[Val.Str] && v.asInstanceOf[Val.Str]._asciiSafe + val safe = v.isInstanceOf[Val.AsciiSafeStr] Val.Arr(pos, splitLimit(pos, v.asString, c.value.asString, -1, safe)) } } @@ -882,7 +879,7 @@ object StringModule extends AbstractFunctionModule { private object SplitLimit extends Val.Builtin3("splitLimit", "str", "c", "maxsplits") { def evalRhs(str: Eval, c: Eval, maxSplits: Eval, ev: EvalScope, pos: Position): Val = { val v = str.value - val safe = v.isInstanceOf[Val.Str] && v.asInstanceOf[Val.Str]._asciiSafe + val safe = v.isInstanceOf[Val.AsciiSafeStr] Val.Arr( pos, splitLimit(pos, v.asString, c.value.asString, maxSplits.value.asInt, safe) @@ -900,7 +897,7 @@ object StringModule extends AbstractFunctionModule { private object SplitLimitR extends Val.Builtin3("splitLimitR", "str", "c", "maxsplits") { def evalRhs(str: Eval, c: Eval, maxSplits: Eval, ev: EvalScope, pos: Position): Val = { val v = str.value - val safe = v.isInstanceOf[Val.Str] && v.asInstanceOf[Val.Str]._asciiSafe + val safe = v.isInstanceOf[Val.AsciiSafeStr] Val.Arr( pos, splitLimitR(pos, v.asString, c.value.asString, maxSplits.value.asInt, safe) @@ -1051,8 +1048,8 @@ object StringModule extends AbstractFunctionModule { val s = v.asString val out = asciiUpper(s) v match { - case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out) - case _ => Val.Str(pos, out) + case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out) + case _ => Val.Str(pos, out) } } } @@ -1070,8 +1067,8 @@ object StringModule extends AbstractFunctionModule { val s = v.asString val out = asciiLower(s) v match { - case vs: Val.Str if vs._asciiSafe => Val.Str.asciiSafe(pos, out) - case _ => Val.Str(pos, out) + case _: Val.AsciiSafeStr => Val.Str.asciiSafe(pos, out) + case _ => Val.Str(pos, out) } } } diff --git a/sjsonnet/test/resources/new_test_suite/format_asciisafe_propagation.jsonnet b/sjsonnet/test/resources/new_test_suite/format_asciisafe_propagation.jsonnet index dfcf3e8a..b91265e5 100644 --- a/sjsonnet/test/resources/new_test_suite/format_asciisafe_propagation.jsonnet +++ b/sjsonnet/test/resources/new_test_suite/format_asciisafe_propagation.jsonnet @@ -1,5 +1,5 @@ // Directional coverage for Format ASCII-safety propagation. -// Ensures format strings preserve correct values across paths that set Val.Str._asciiSafe: +// Ensures format strings preserve correct values across paths that produce Val.AsciiSafeStr: // - simple %(name)s fast path with ASCII / non-ASCII literals and values // - general format path with %s / %d / %c / %o / %x conversions // - mixed ASCII literals + non-ASCII string interpolations (output must be correct) diff --git a/sjsonnet/test/src/sjsonnet/FormatTests.scala b/sjsonnet/test/src/sjsonnet/FormatTests.scala index 61085be0..0019df5d 100644 --- a/sjsonnet/test/src/sjsonnet/FormatTests.scala +++ b/sjsonnet/test/src/sjsonnet/FormatTests.scala @@ -31,7 +31,7 @@ object FormatTests extends TestSuite { ) val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str] result.str ==> "hello 3" - result._asciiSafe ==> true + result.isInstanceOf[Val.AsciiSafeStr] ==> true } test("simple named format does not mark unsafe string values ascii-safe") { @@ -42,7 +42,7 @@ object FormatTests extends TestSuite { ) val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str] result.str ==> "hello \"" - result._asciiSafe ==> false + result.isInstanceOf[Val.AsciiSafeStr] ==> false } test("simple named format does not mark unsafe static literals ascii-safe") { @@ -53,7 +53,7 @@ object FormatTests extends TestSuite { ) val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str] result.str ==> "hello \"3" - result._asciiSafe ==> false + result.isInstanceOf[Val.AsciiSafeStr] ==> false } test("simple named format combines ascii-safety across multiple keys") { @@ -67,7 +67,7 @@ object FormatTests extends TestSuite { ) val result = fmt.evalRhs(obj, scope, pos).asInstanceOf[Val.Str] result.str ==> "safe \\ safe" - result._asciiSafe ==> false + result.isInstanceOf[Val.AsciiSafeStr] ==> false } } }