From c85f9dabff3f4b19e88d52a0651a8b66c3412f3a Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:23:59 -0400 Subject: [PATCH 1/7] test: adjust multiline patching test --- src/__tests__/patch.test.ts | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/__tests__/patch.test.ts b/src/__tests__/patch.test.ts index 0e5cf04..c17442f 100644 --- a/src/__tests__/patch.test.ts +++ b/src/__tests__/patch.test.ts @@ -2930,22 +2930,35 @@ describe('TOML v1.1 multiline inline tables - edit operations (newline.toml spec }); test('should edit a value in an inline table that contains a multiline string value', () => { - // tbl-2 from newline.toml: inline table whose value is a multiline string - const existing = dedent` - tbl-2 = { - k = """Hello""" - } - ` + '\n'; + // Verifies that preserveFormatting preserves the structural suffix of a multiline string: + // the line-continuation backslash and the closing indent must be preserved. + // + // Note: dedent eats `\` sequences (its raw-string cleanup regex), so these + // strings are written with explicit concatenation to control every character exactly. + // + // The TOML ` Hello \ ` encodes value ` Hello ` + // (8 spaces + "Hello " — the `\` is trimmed as a line continuation). + const existing = + 'tbl-2 = {\n' + + ' k = """\\\n' + + ' Hello \\\n' + + ' """\n' + + '}\n'; const value = parse(existing); - value['tbl-2'].k = 'Goodbye'; + // Sanity-check: line continuation trims backslash+newline+indent, leaving the trailing space. + expect(value['tbl-2'].k).toEqual('Hello '); + + value['tbl-2'].k = 'Goodbye \n'; const patched = patch(existing, value); - expect(patched).toEqual(dedent` - tbl-2 = { - k = """Goodbye""" - } - ` + '\n'); + expect(patched).toEqual( + 'tbl-2 = {\n' + + ' k = """\\\n' + + ' Goodbye \\\n' + + ' """\n' + + '}\n' + ); }); test('should preserve no-trailing-newline-before-brace format when editing', () => { From 44d456c5dab71d51df84028ee595ed1f19e9a8e4 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Tue, 17 Mar 2026 18:32:36 -0400 Subject: [PATCH 2/7] WIP --- src/generate.ts | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/generate.ts b/src/generate.ts index df4581e..a9a7370 100644 --- a/src/generate.ts +++ b/src/generate.ts @@ -185,9 +185,35 @@ export function generateString(value: string, existingRaw?: string): String { .replace(/"""/g, '""\\\"'); } - // Format with or without leading newline based on original + // Format with or without leading newline based on original. + // For the common case, wrap escaped content between the delimiter+newline and closing delimiter. + // Special case: if the existing body has a single content line ending with an odd number of + // trailing backslashes (a TOML line-continuation), and the new value is also single-line, + // preserve the structural suffix: the line-continuation backslash and the closing indent. if (hasLeadingNewline) { - raw = `${delimiter}${newlineChar}${escaped}${delimiter}`; + const bodyStart = delimiter.length + newlineChar.length; + const bodyEnd = existingRaw.length - delimiter.length; + const existingBody = existingRaw.slice(bodyStart, bodyEnd); + const lastNewlineIdx = existingBody.lastIndexOf(newlineChar); + + let preserved = false; + if (lastNewlineIdx >= 0 && !escaped.includes(newlineChar)) { + const closingIndent = existingBody.slice(lastNewlineIdx + newlineChar.length); + const contentArea = existingBody.slice(0, lastNewlineIdx); + + if (!contentArea.includes(newlineChar)) { + const trailingBackslashes = contentArea.match(/(\\+)$/); + if (trailingBackslashes && trailingBackslashes[1].length % 2 === 1) { + // Odd trailing backslashes = line-continuation; preserve the closing structure. + raw = `${delimiter}${newlineChar}${escaped}\\${newlineChar}${closingIndent}${delimiter}`; + preserved = true; + } + } + } + + if (!preserved) { + raw = `${delimiter}${newlineChar}${escaped}${delimiter}`; + } } else { raw = `${delimiter}${escaped}${delimiter}`; } From 10e93d12b611431d6b1145e904f5e33b702ba876 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Tue, 17 Mar 2026 21:23:45 -0400 Subject: [PATCH 3/7] test: add second test for more than one line --- src/__tests__/patch.test.ts | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/__tests__/patch.test.ts b/src/__tests__/patch.test.ts index c17442f..67387ce 100644 --- a/src/__tests__/patch.test.ts +++ b/src/__tests__/patch.test.ts @@ -2949,7 +2949,7 @@ describe('TOML v1.1 multiline inline tables - edit operations (newline.toml spec // Sanity-check: line continuation trims backslash+newline+indent, leaving the trailing space. expect(value['tbl-2'].k).toEqual('Hello '); - value['tbl-2'].k = 'Goodbye \n'; + value['tbl-2'].k = 'Goodbye '; const patched = patch(existing, value); expect(patched).toEqual( @@ -2961,6 +2961,33 @@ describe('TOML v1.1 multiline inline tables - edit operations (newline.toml spec ); }); + test('should edit a value in an inline table that contains a multiline string value 2', () => { + const existing = + 'tbl-2 = {\n' + + ' k = """\\\n' + + ' Hello \\\n' + + ' World.\\\n' + + ' """\n' + + '}\n'; + + const value = parse(existing); + // The `\` sequences are line continuations: they trim the backslash, + // newline and following whitespace, joining everything into one value. + expect(value['tbl-2'].k).toEqual('Hello World.'); + + value['tbl-2'].k = 'Bonjour World.'; + const patched = patch(existing, value); + + expect(patched).toEqual( + 'tbl-2 = {\n' + + ' k = """\\\n' + + ' Bonjour \\\n' + + ' World.\\\n' + + ' """\n' + + '}\n' + ); + }); + test('should preserve no-trailing-newline-before-brace format when editing', () => { // no-newline-before-brace from newline.toml: last key on same line as } const existing = dedent` From 8e059544b9c9d6fb8037248202cdd3884862edc3 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Tue, 17 Mar 2026 21:25:26 -0400 Subject: [PATCH 4/7] fix: generate string for line ending backslash --- src/generate.ts | 170 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 163 insertions(+), 7 deletions(-) diff --git a/src/generate.ts b/src/generate.ts index a9a7370..a5dda9b 100644 --- a/src/generate.ts +++ b/src/generate.ts @@ -139,6 +139,149 @@ export function generateKey(value: string[]): Key { value }; } + +/** + * Rebuilds a multiline string that uses line ending backslash (line-continuation) formatting. + * + * Given an existing raw TOML string like: + * """\ + * Hello \ + * World.\ + * """ + * + * and a new decoded value like "Bonjour World.", this function: + * 1. Parses the existing raw into per-line segments, extracting each segment's indent + * and trailing whitespace. + * 2. Splits both the original decoded value and the new value into words. + * 3. Redistributes the new words across the original line structure by mapping words + * at each segment boundary. + * 4. Reassembles with `\` between segments and `\"""` at the end. + * + * @param existingRaw - The full raw TOML string including delimiters. + * @param escaped - The new value after escaping (for basic multiline strings). + * @param decodedValue - The new decoded (unescaped) string value. + * @param delimiter - The multiline delimiter ('"""' or "'''"). + * @param newlineChar - The newline character to use ('\n' or '\r\n'). + * @returns The reconstructed raw TOML string. + */ +function rebuildLineContinuation( + existingRaw: string, + escaped: string, + decodedValue: string, + delimiter: string, + newlineChar: string +): string { + // Parse the body: everything between `"""\` and `"""` + const bodyStart = delimiter.length + 1 /* \ */ + newlineChar.length; + const bodyEnd = existingRaw.length - delimiter.length; + const body = existingRaw.slice(bodyStart, bodyEnd); + + // Split the body into raw lines + const rawLines = body.split(newlineChar); + + // The last raw line is the closing indent (whitespace before the closing """) + const closingIndent = rawLines[rawLines.length - 1]; + + // Content lines are all lines except the last one (the closing indent line). + // Each content line has the format: \ + const contentLines = rawLines.slice(0, -1); + + // Parse each content line into { indent, content, trailingWs } + interface Segment { + indent: string; + content: string; + trailingWs: string; + } + + const segments: Segment[] = contentLines.map(line => { + // Strip the trailing backslash (the line-continuation character) + let stripped = line; + if (stripped.endsWith('\\')) { + stripped = stripped.slice(0, -1); + } + + // Extract leading whitespace (indent) + const indentMatch = stripped.match(/^([\t ]*)/); + const indent = indentMatch ? indentMatch[1] : ''; + + // Extract trailing whitespace + const trailingMatch = stripped.match(/([\t ]+)$/); + const trailingWs = trailingMatch ? trailingMatch[1] : ''; + + // The content is what's between the indent and the trailing whitespace + const content = stripped.slice(indent.length, stripped.length - trailingWs.length); + + return { indent, content, trailingWs }; + }); + + // Split into "words" (non-whitespace tokens) for mapping between old and new values. + const splitWords = (s: string): string[] => { + const matches = s.match(/\S+/g); + return matches || []; + }; + + const originalWords = splitWords(segments.map(s => s.content).join('')); + const newWords = splitWords(decodedValue); + + // If there's only one segment or no words to match, use a simple single-line rebuild + if (segments.length <= 1 || originalWords.length === 0) { + let escapedInline = escaped; + if (escapedInline.endsWith('\r\n')) escapedInline = escapedInline.slice(0, -2); + else if (escapedInline.endsWith('\n')) escapedInline = escapedInline.slice(0, -1); + + const seg = segments[0] || { indent: '', trailingWs: ' ' }; + return `${delimiter}\\${newlineChar}${seg.indent}${escapedInline}\\${newlineChar}${closingIndent}${delimiter}`; + } + + // For each segment, determine which original words it contains. + // segmentWordRanges[i] = [startWordIdx, endWordIdx) + const segmentWordRanges: [number, number][] = []; + let wordIdx = 0; + for (const seg of segments) { + const segWords = splitWords(seg.content); + const start = wordIdx; + wordIdx += segWords.length; + segmentWordRanges.push([start, wordIdx]); + } + + // Redistribute the new words across segments using the same ranges. + const newSegments: string[] = []; + for (let i = 0; i < segments.length; i++) { + const [origStart, origEnd] = segmentWordRanges[i]; + const origCount = origEnd - origStart; + + let segNewWords: string[]; + if (i === segments.length - 1) { + // Last content segment gets all remaining words + segNewWords = newWords.slice(origStart); + } else { + segNewWords = newWords.slice(origStart, origStart + origCount); + } + + newSegments.push(segNewWords.join(' ')); + } + + // Remove trailing empty segments (in case the new value has fewer words) + while (newSegments.length > 1 && newSegments[newSegments.length - 1] === '') { + newSegments.pop(); + } + + // Reassemble the raw string + const rebuiltLines: string[] = []; + for (let i = 0; i < newSegments.length; i++) { + const seg = i < segments.length ? segments[i] : segments[segments.length - 1]; + // Preserve trailing whitespace from the original segment, but don't double up + // if the new content already ends with whitespace. + let trailing = seg.trailingWs; + if (newSegments[i].length > 0 && /\s$/.test(newSegments[i])) { + trailing = ''; + } + rebuiltLines.push(`${seg.indent}${newSegments[i]}${trailing}\\`); + } + + return `${delimiter}\\${newlineChar}${rebuiltLines.join(newlineChar)}${newlineChar}${closingIndent}${delimiter}`; +} + /** * Generates a new String node, preserving multiline format if existingRaw is provided. * @@ -147,7 +290,7 @@ export function generateKey(value: string[]): Key { * @returns A new String node. */ export function generateString(value: string, existingRaw?: string): String { - let raw: string; + let raw = ''; if (existingRaw && isMultilineString(existingRaw)) { // Preserve multiline format @@ -164,6 +307,10 @@ export function generateString(value: string, existingRaw?: string): String { const newlineChar = existingRaw.includes('\r\n') ? '\r\n' : '\n'; const hasLeadingNewline = existingRaw.startsWith(`${delimiter}${newlineChar}`) || ((existingRaw.startsWith("'''\n") || existingRaw.startsWith("'''\r\n")) && !isLiteral); + // Detect the """\ opening: delimiter immediately followed by a line-continuation backslash. + // This is distinct from hasLeadingNewline (delimiter immediately followed by a bare newline). + const hasLeadingLineContinuation = + !hasLeadingNewline && existingRaw.startsWith(`${delimiter}\\${newlineChar}`); let escaped: string; if (isLiteral) { @@ -185,12 +332,21 @@ export function generateString(value: string, existingRaw?: string): String { .replace(/"""/g, '""\\\"'); } - // Format with or without leading newline based on original. - // For the common case, wrap escaped content between the delimiter+newline and closing delimiter. - // Special case: if the existing body has a single content line ending with an odd number of - // trailing backslashes (a TOML line-continuation), and the new value is also single-line, - // preserve the structural suffix: the line-continuation backslash and the closing indent. - if (hasLeadingNewline) { + // Generate the replacement raw string, preserving the structural format of the existing raw. + if (hasLeadingLineContinuation) { + // Format: """\INDENT CONTENT \INDENT CONTENT \INDENT""" + // Each `\` is a line continuation: it trims the backslash, newline, + // and all following whitespace, joining content segments into a single decoded value. + // + // Strategy: + // 1. Parse the existing raw body into segments (one per continuation line). + // 2. Extract each segment's indent and trailing whitespace. + // 3. Split both the original decoded value and the new value into words. + // 4. Redistribute the new words across the same number of segments by matching + // word positions from the original, preserving per-segment whitespace. + // 5. Reassemble with `\` between segments. + raw = rebuildLineContinuation(existingRaw, escaped, value, delimiter, newlineChar); + } else if (hasLeadingNewline) { const bodyStart = delimiter.length + newlineChar.length; const bodyEnd = existingRaw.length - delimiter.length; const existingBody = existingRaw.slice(bodyStart, bodyEnd); From 35a37be4cb8dbd2b11dcb5f2e2370ec656d11c2e Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Tue, 17 Mar 2026 22:04:27 -0400 Subject: [PATCH 5/7] fix: use rebuildLeadingNewlineContinuation --- src/__tests__/patch.test.ts | 33 ++++++++ src/generate.ts | 145 +++++++++++++++++++++++++++++++----- 2 files changed, 161 insertions(+), 17 deletions(-) diff --git a/src/__tests__/patch.test.ts b/src/__tests__/patch.test.ts index 67387ce..00a7e55 100644 --- a/src/__tests__/patch.test.ts +++ b/src/__tests__/patch.test.ts @@ -2988,6 +2988,39 @@ describe('TOML v1.1 multiline inline tables - edit operations (newline.toml spec ); }); + test('should edit a value in an inline table that contains a multiline string value 3', () => { + // Uses """\n (leading newline) format — NOT """\\ (leading line-continuation). + // The body contains line-continuation backslashes with blank lines and mixed indentation. + const existing = + 'tbl-2 = {\n' + + ' k = """\n' + + 'The quick brown \\\n' + + '\n' + + '\n' + + ' fox jumps over \\\n' + + ' the lazy dog."""\n' + + '}\n'; + + const value = parse(existing); + // Line-continuation trims `\`, newline(s) and following whitespace: + // "The quick brown " + "fox jumps over " + "the lazy dog." + expect(value['tbl-2'].k).toEqual('The quick brown fox jumps over the lazy dog.'); + + value['tbl-2'].k = 'The quick brown cat jumps over the lazy dog.'; + const patched = patch(existing, value); + + expect(patched).toEqual( + 'tbl-2 = {\n' + + ' k = """\n' + + 'The quick brown \\\n' + + '\n' + + '\n' + + ' cat jumps over \\\n' + + ' the lazy dog."""\n' + + '}\n' + ); + }); + test('should preserve no-trailing-newline-before-brace format when editing', () => { // no-newline-before-brace from newline.toml: last key on same line as } const existing = dedent` diff --git a/src/generate.ts b/src/generate.ts index a5dda9b..ad809d9 100644 --- a/src/generate.ts +++ b/src/generate.ts @@ -282,6 +282,122 @@ function rebuildLineContinuation( return `${delimiter}\\${newlineChar}${rebuiltLines.join(newlineChar)}${newlineChar}${closingIndent}${delimiter}`; } +/** + * Rebuilds a multiline string that uses the `"""` (leading newline) format and contains + * line-continuation backslashes within the body. + * + * Given an existing raw TOML string like: + * """ + * The quick brown \ + * + * + * fox jumps over \ + * the lazy dog.""" + * + * and a new decoded value like "The quick brown cat jumps over the lazy dog.", this function + * preserves the line structure (including blank lines) while redistributing new words across + * the same content-line positions. + */ +function rebuildLeadingNewlineContinuation( + existingRaw: string, + escaped: string, + decodedValue: string, + delimiter: string, + newlineChar: string +): string { + const bodyStart = delimiter.length + newlineChar.length; + const bodyEnd = existingRaw.length - delimiter.length; + const body = existingRaw.slice(bodyStart, bodyEnd); + + const rawLines = body.split(newlineChar); + + // Check if the last line is a closing indent (pure whitespace) vs content. + const lastLine = rawLines[rawLines.length - 1]; + const hasClosingIndent = /^[\t ]*$/.test(lastLine); + const closingIndent = hasClosingIndent ? lastLine : ''; + const bodyLines = hasClosingIndent ? rawLines.slice(0, -1) : rawLines; + + interface Segment { + indent: string; + content: string; + trailingWs: string; + hasBackslash: boolean; + isBlank: boolean; + } + + const segments: Segment[] = bodyLines.map(line => { + if (line.trim() === '') { + return { indent: '', content: '', trailingWs: '', hasBackslash: false, isBlank: true }; + } + + let stripped = line; + const hasBackslash = stripped.endsWith('\\'); + if (hasBackslash) { + stripped = stripped.slice(0, -1); + } + + const indentMatch = stripped.match(/^([\t ]*)/); + const indent = indentMatch ? indentMatch[1] : ''; + const trailingMatch = stripped.match(/([\t ]+)$/); + const trailingWs = trailingMatch ? trailingMatch[1] : ''; + const content = stripped.slice(indent.length, stripped.length - trailingWs.length); + + return { indent, content, trailingWs, hasBackslash, isBlank: false }; + }); + + const contentSegments = segments.filter(s => !s.isBlank); + + const splitWords = (s: string): string[] => s.match(/\S+/g) || []; + + const originalWords = splitWords(contentSegments.map(s => s.content).join('')); + const newWords = splitWords(decodedValue); + + // Map content segments to word ranges. + const segmentWordRanges: [number, number][] = []; + let wordIdx = 0; + for (const seg of contentSegments) { + const segWords = splitWords(seg.content); + const start = wordIdx; + wordIdx += segWords.length; + segmentWordRanges.push([start, wordIdx]); + } + + // Redistribute new words across content segments. + const newContents: string[] = []; + for (let i = 0; i < contentSegments.length; i++) { + const [origStart, origEnd] = segmentWordRanges[i]; + const origCount = origEnd - origStart; + const segNewWords = + i === contentSegments.length - 1 + ? newWords.slice(origStart) + : newWords.slice(origStart, origStart + origCount); + newContents.push(segNewWords.join(' ')); + } + + // Reassemble: walk all segments (including blanks), replacing content in non-blank ones. + let contentIdx = 0; + const rebuiltLines: string[] = []; + for (const seg of segments) { + if (seg.isBlank) { + rebuiltLines.push(''); + } else { + const newContent = newContents[contentIdx]; + let trailing = seg.trailingWs; + if (newContent.length > 0 && /\s$/.test(newContent)) { + trailing = ''; + } + const backslash = seg.hasBackslash ? '\\' : ''; + rebuiltLines.push(`${seg.indent}${newContent}${trailing}${backslash}`); + contentIdx++; + } + } + + if (hasClosingIndent) { + return `${delimiter}${newlineChar}${rebuiltLines.join(newlineChar)}${newlineChar}${closingIndent}${delimiter}`; + } + return `${delimiter}${newlineChar}${rebuiltLines.join(newlineChar)}${delimiter}`; +} + /** * Generates a new String node, preserving multiline format if existingRaw is provided. * @@ -350,24 +466,19 @@ export function generateString(value: string, existingRaw?: string): String { const bodyStart = delimiter.length + newlineChar.length; const bodyEnd = existingRaw.length - delimiter.length; const existingBody = existingRaw.slice(bodyStart, bodyEnd); - const lastNewlineIdx = existingBody.lastIndexOf(newlineChar); - - let preserved = false; - if (lastNewlineIdx >= 0 && !escaped.includes(newlineChar)) { - const closingIndent = existingBody.slice(lastNewlineIdx + newlineChar.length); - const contentArea = existingBody.slice(0, lastNewlineIdx); - - if (!contentArea.includes(newlineChar)) { - const trailingBackslashes = contentArea.match(/(\\+)$/); - if (trailingBackslashes && trailingBackslashes[1].length % 2 === 1) { - // Odd trailing backslashes = line-continuation; preserve the closing structure. - raw = `${delimiter}${newlineChar}${escaped}\\${newlineChar}${closingIndent}${delimiter}`; - preserved = true; - } - } - } - if (!preserved) { + // Detect line-continuation backslashes in the body: any line ending with an odd + // number of backslashes is a continuation line. + const bodyLines = existingBody.split(newlineChar); + const hasContinuationLines = !escaped.includes(newlineChar) && + bodyLines.some(line => { + const m = line.match(/(\\+)$/); + return m !== null && m[1].length % 2 === 1; + }); + + if (hasContinuationLines) { + raw = rebuildLeadingNewlineContinuation(existingRaw, escaped, value, delimiter, newlineChar); + } else { raw = `${delimiter}${newlineChar}${escaped}${delimiter}`; } } else { From bfd929d54523d116f9d84f00ab7b0a03314d3165 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Tue, 17 Mar 2026 22:05:15 -0400 Subject: [PATCH 6/7] fix: make sure rebuildLeadingNewlineContinuation handles alternative cases --- src/__tests__/patch.test.ts | 24 ++++ src/generate.ts | 240 ++++++++---------------------------- 2 files changed, 77 insertions(+), 187 deletions(-) diff --git a/src/__tests__/patch.test.ts b/src/__tests__/patch.test.ts index 00a7e55..4d10e7a 100644 --- a/src/__tests__/patch.test.ts +++ b/src/__tests__/patch.test.ts @@ -3021,6 +3021,30 @@ describe('TOML v1.1 multiline inline tables - edit operations (newline.toml spec ); }); + test('should edit a value in an inline table that contains a multiline string value 4', () => { + // Uses """content (no newline after delimiter) with line-continuation in the body. + const existing = + 'tbl-2 = {\n' + + ' k = """The quick brown \\\n' + + ' fox jumps over \\\n' + + ' the lazy dog."""\n' + + '}\n'; + + const value = parse(existing); + expect(value['tbl-2'].k).toEqual('The quick brown fox jumps over the lazy dog.'); + + value['tbl-2'].k = 'The quick brown cat jumps over the lazy dog.'; + const patched = patch(existing, value); + + expect(patched).toEqual( + 'tbl-2 = {\n' + + ' k = """The quick brown \\\n' + + ' cat jumps over \\\n' + + ' the lazy dog."""\n' + + '}\n' + ); + }); + test('should preserve no-trailing-newline-before-brace format when editing', () => { // no-newline-before-brace from newline.toml: last key on same line as } const existing = dedent` diff --git a/src/generate.ts b/src/generate.ts index ad809d9..d57d6e4 100644 --- a/src/generate.ts +++ b/src/generate.ts @@ -143,19 +143,18 @@ export function generateKey(value: string[]): Key { /** * Rebuilds a multiline string that uses line ending backslash (line-continuation) formatting. * - * Given an existing raw TOML string like: - * """\ - * Hello \ - * World.\ - * """ + * Handles all three opening formats: + * - `"""\content\indent"""` (leading line-continuation) + * - `"""content\..."""` (leading newline) + * - `"""content\..."""` (no leading newline) * - * and a new decoded value like "Bonjour World.", this function: - * 1. Parses the existing raw into per-line segments, extracting each segment's indent - * and trailing whitespace. - * 2. Splits both the original decoded value and the new value into words. - * 3. Redistributes the new words across the original line structure by mapping words - * at each segment boundary. - * 4. Reassembles with `\` between segments and `\"""` at the end. + * Strategy: + * 1. Detect the opening format and where the body starts. + * 2. Parse body lines into segments (indent, content, trailing whitespace, backslash flag), + * preserving blank lines. + * 3. Split both the original decoded value and the new value into words. + * 4. Redistribute new words across the same segment structure by mapping word positions. + * 5. Reassemble with the original opening format, per-line whitespace, and backslash placement. * * @param existingRaw - The full raw TOML string including delimiters. * @param escaped - The new value after escaping (for basic multiline strings). @@ -171,149 +170,31 @@ function rebuildLineContinuation( delimiter: string, newlineChar: string ): string { - // Parse the body: everything between `"""\` and `"""` - const bodyStart = delimiter.length + 1 /* \ */ + newlineChar.length; - const bodyEnd = existingRaw.length - delimiter.length; - const body = existingRaw.slice(bodyStart, bodyEnd); - - // Split the body into raw lines - const rawLines = body.split(newlineChar); - - // The last raw line is the closing indent (whitespace before the closing """) - const closingIndent = rawLines[rawLines.length - 1]; - - // Content lines are all lines except the last one (the closing indent line). - // Each content line has the format: \ - const contentLines = rawLines.slice(0, -1); - - // Parse each content line into { indent, content, trailingWs } - interface Segment { - indent: string; - content: string; - trailingWs: string; - } - - const segments: Segment[] = contentLines.map(line => { - // Strip the trailing backslash (the line-continuation character) - let stripped = line; - if (stripped.endsWith('\\')) { - stripped = stripped.slice(0, -1); - } - - // Extract leading whitespace (indent) - const indentMatch = stripped.match(/^([\t ]*)/); - const indent = indentMatch ? indentMatch[1] : ''; - - // Extract trailing whitespace - const trailingMatch = stripped.match(/([\t ]+)$/); - const trailingWs = trailingMatch ? trailingMatch[1] : ''; - - // The content is what's between the indent and the trailing whitespace - const content = stripped.slice(indent.length, stripped.length - trailingWs.length); - - return { indent, content, trailingWs }; - }); - - // Split into "words" (non-whitespace tokens) for mapping between old and new values. - const splitWords = (s: string): string[] => { - const matches = s.match(/\S+/g); - return matches || []; - }; - - const originalWords = splitWords(segments.map(s => s.content).join('')); - const newWords = splitWords(decodedValue); - - // If there's only one segment or no words to match, use a simple single-line rebuild - if (segments.length <= 1 || originalWords.length === 0) { - let escapedInline = escaped; - if (escapedInline.endsWith('\r\n')) escapedInline = escapedInline.slice(0, -2); - else if (escapedInline.endsWith('\n')) escapedInline = escapedInline.slice(0, -1); - - const seg = segments[0] || { indent: '', trailingWs: ' ' }; - return `${delimiter}\\${newlineChar}${seg.indent}${escapedInline}\\${newlineChar}${closingIndent}${delimiter}`; - } - - // For each segment, determine which original words it contains. - // segmentWordRanges[i] = [startWordIdx, endWordIdx) - const segmentWordRanges: [number, number][] = []; - let wordIdx = 0; - for (const seg of segments) { - const segWords = splitWords(seg.content); - const start = wordIdx; - wordIdx += segWords.length; - segmentWordRanges.push([start, wordIdx]); - } - - // Redistribute the new words across segments using the same ranges. - const newSegments: string[] = []; - for (let i = 0; i < segments.length; i++) { - const [origStart, origEnd] = segmentWordRanges[i]; - const origCount = origEnd - origStart; - - let segNewWords: string[]; - if (i === segments.length - 1) { - // Last content segment gets all remaining words - segNewWords = newWords.slice(origStart); - } else { - segNewWords = newWords.slice(origStart, origStart + origCount); - } - - newSegments.push(segNewWords.join(' ')); - } - - // Remove trailing empty segments (in case the new value has fewer words) - while (newSegments.length > 1 && newSegments[newSegments.length - 1] === '') { - newSegments.pop(); - } - - // Reassemble the raw string - const rebuiltLines: string[] = []; - for (let i = 0; i < newSegments.length; i++) { - const seg = i < segments.length ? segments[i] : segments[segments.length - 1]; - // Preserve trailing whitespace from the original segment, but don't double up - // if the new content already ends with whitespace. - let trailing = seg.trailingWs; - if (newSegments[i].length > 0 && /\s$/.test(newSegments[i])) { - trailing = ''; - } - rebuiltLines.push(`${seg.indent}${newSegments[i]}${trailing}\\`); + // Determine the opening format and where the body starts + let bodyStart: number; + let openingPrefix: string; + + if (existingRaw.startsWith(`${delimiter}\\${newlineChar}`)) { + // """\ format — delimiter followed by line-continuation + bodyStart = delimiter.length + 1 + newlineChar.length; + openingPrefix = `${delimiter}\\${newlineChar}`; + } else if (existingRaw.startsWith(`${delimiter}${newlineChar}`)) { + // """ format — delimiter followed by newline + bodyStart = delimiter.length + newlineChar.length; + openingPrefix = `${delimiter}${newlineChar}`; + } else { + // """content format — no newline after delimiter + bodyStart = delimiter.length; + openingPrefix = delimiter; } - return `${delimiter}\\${newlineChar}${rebuiltLines.join(newlineChar)}${newlineChar}${closingIndent}${delimiter}`; -} - -/** - * Rebuilds a multiline string that uses the `"""` (leading newline) format and contains - * line-continuation backslashes within the body. - * - * Given an existing raw TOML string like: - * """ - * The quick brown \ - * - * - * fox jumps over \ - * the lazy dog.""" - * - * and a new decoded value like "The quick brown cat jumps over the lazy dog.", this function - * preserves the line structure (including blank lines) while redistributing new words across - * the same content-line positions. - */ -function rebuildLeadingNewlineContinuation( - existingRaw: string, - escaped: string, - decodedValue: string, - delimiter: string, - newlineChar: string -): string { - const bodyStart = delimiter.length + newlineChar.length; const bodyEnd = existingRaw.length - delimiter.length; const body = existingRaw.slice(bodyStart, bodyEnd); - const rawLines = body.split(newlineChar); - // Check if the last line is a closing indent (pure whitespace) vs content. + // Determine closing format: does the closing delimiter sit on its own line? const lastLine = rawLines[rawLines.length - 1]; - const hasClosingIndent = /^[\t ]*$/.test(lastLine); + const hasClosingIndent = rawLines.length > 1 && /^[\t ]*$/.test(lastLine); const closingIndent = hasClosingIndent ? lastLine : ''; const bodyLines = hasClosingIndent ? rawLines.slice(0, -1) : rawLines; @@ -352,7 +233,7 @@ function rebuildLeadingNewlineContinuation( const originalWords = splitWords(contentSegments.map(s => s.content).join('')); const newWords = splitWords(decodedValue); - // Map content segments to word ranges. + // Map content segments to word ranges: segmentWordRanges[i] = [startWordIdx, endWordIdx) const segmentWordRanges: [number, number][] = []; let wordIdx = 0; for (const seg of contentSegments) { @@ -362,7 +243,7 @@ function rebuildLeadingNewlineContinuation( segmentWordRanges.push([start, wordIdx]); } - // Redistribute new words across content segments. + // Redistribute new words across content segments using the same ranges. const newContents: string[] = []; for (let i = 0; i < contentSegments.length; i++) { const [origStart, origEnd] = segmentWordRanges[i]; @@ -374,10 +255,17 @@ function rebuildLeadingNewlineContinuation( newContents.push(segNewWords.join(' ')); } - // Reassemble: walk all segments (including blanks), replacing content in non-blank ones. + // Trim trailing empty content entries (when new value has fewer words) + while (newContents.length > 1 && newContents[newContents.length - 1] === '') { + newContents.pop(); + } + + // Reassemble: walk segments, emitting up to the remaining content count. + const numContentToEmit = newContents.length; let contentIdx = 0; const rebuiltLines: string[] = []; for (const seg of segments) { + if (contentIdx >= numContentToEmit) break; if (seg.isBlank) { rebuiltLines.push(''); } else { @@ -393,9 +281,9 @@ function rebuildLeadingNewlineContinuation( } if (hasClosingIndent) { - return `${delimiter}${newlineChar}${rebuiltLines.join(newlineChar)}${newlineChar}${closingIndent}${delimiter}`; + return `${openingPrefix}${rebuiltLines.join(newlineChar)}${newlineChar}${closingIndent}${delimiter}`; } - return `${delimiter}${newlineChar}${rebuiltLines.join(newlineChar)}${delimiter}`; + return `${openingPrefix}${rebuiltLines.join(newlineChar)}${delimiter}`; } /** @@ -423,10 +311,6 @@ export function generateString(value: string, existingRaw?: string): String { const newlineChar = existingRaw.includes('\r\n') ? '\r\n' : '\n'; const hasLeadingNewline = existingRaw.startsWith(`${delimiter}${newlineChar}`) || ((existingRaw.startsWith("'''\n") || existingRaw.startsWith("'''\r\n")) && !isLiteral); - // Detect the """\ opening: delimiter immediately followed by a line-continuation backslash. - // This is distinct from hasLeadingNewline (delimiter immediately followed by a bare newline). - const hasLeadingLineContinuation = - !hasLeadingNewline && existingRaw.startsWith(`${delimiter}\\${newlineChar}`); let escaped: string; if (isLiteral) { @@ -448,39 +332,21 @@ export function generateString(value: string, existingRaw?: string): String { .replace(/"""/g, '""\\\"'); } + // Detect line-continuation backslashes anywhere in the multiline string body. + // A line ending with an odd number of backslashes is a continuation line. + // Line-continuation is only meaningful in basic (""") strings, not literal ('''). + const innerContent = existingRaw.slice(delimiter.length, existingRaw.length - delimiter.length); + const hasLineContinuation = !isLiteral && !escaped.includes(newlineChar) && + innerContent.split(newlineChar).some(line => { + const m = line.match(/(\\+)$/); + return m !== null && m[1].length % 2 === 1; + }); + // Generate the replacement raw string, preserving the structural format of the existing raw. - if (hasLeadingLineContinuation) { - // Format: """\INDENT CONTENT \INDENT CONTENT \INDENT""" - // Each `\` is a line continuation: it trims the backslash, newline, - // and all following whitespace, joining content segments into a single decoded value. - // - // Strategy: - // 1. Parse the existing raw body into segments (one per continuation line). - // 2. Extract each segment's indent and trailing whitespace. - // 3. Split both the original decoded value and the new value into words. - // 4. Redistribute the new words across the same number of segments by matching - // word positions from the original, preserving per-segment whitespace. - // 5. Reassemble with `\` between segments. + if (hasLineContinuation) { raw = rebuildLineContinuation(existingRaw, escaped, value, delimiter, newlineChar); } else if (hasLeadingNewline) { - const bodyStart = delimiter.length + newlineChar.length; - const bodyEnd = existingRaw.length - delimiter.length; - const existingBody = existingRaw.slice(bodyStart, bodyEnd); - - // Detect line-continuation backslashes in the body: any line ending with an odd - // number of backslashes is a continuation line. - const bodyLines = existingBody.split(newlineChar); - const hasContinuationLines = !escaped.includes(newlineChar) && - bodyLines.some(line => { - const m = line.match(/(\\+)$/); - return m !== null && m[1].length % 2 === 1; - }); - - if (hasContinuationLines) { - raw = rebuildLeadingNewlineContinuation(existingRaw, escaped, value, delimiter, newlineChar); - } else { - raw = `${delimiter}${newlineChar}${escaped}${delimiter}`; - } + raw = `${delimiter}${newlineChar}${escaped}${delimiter}`; } else { raw = `${delimiter}${escaped}${delimiter}`; } From a4354e71f7e84616ead55d4fbd0afe6e51dcbdd6 Mon Sep 17 00:00:00 2001 From: Martin Leduc <31558169+DecimalTurn@users.noreply.github.com> Date: Wed, 18 Mar 2026 00:18:06 -0400 Subject: [PATCH 7/7] fix: resolve lint errors in generate.ts and audit script --- src/generate.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/generate.ts b/src/generate.ts index d57d6e4..559f9eb 100644 --- a/src/generate.ts +++ b/src/generate.ts @@ -230,7 +230,6 @@ function rebuildLineContinuation( const splitWords = (s: string): string[] => s.match(/\S+/g) || []; - const originalWords = splitWords(contentSegments.map(s => s.content).join('')); const newWords = splitWords(decodedValue); // Map content segments to word ranges: segmentWordRanges[i] = [startWordIdx, endWordIdx)