From 8ea670fb5888d2c06ce4130187434a5e3e2cd230 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 22 May 2026 01:33:01 -0400 Subject: [PATCH 1/2] feat(docx-core): emit rPrChange for formatted replacements --- packages/docx-core/SUPPORT.md | 2 +- .../canonical-emission-regression.test.ts | 10 +-- .../docx-core/src/primitives/text.test.ts | 83 +++++++++++++++++++ packages/docx-core/src/primitives/text.ts | 51 +++++++++++- 4 files changed, 139 insertions(+), 7 deletions(-) diff --git a/packages/docx-core/SUPPORT.md b/packages/docx-core/SUPPORT.md index e77021c..6e22fb8 100644 --- a/packages/docx-core/SUPPORT.md +++ b/packages/docx-core/SUPPORT.md @@ -12,7 +12,7 @@ The "OOXML revision element" column uses ECMA-376 element names from the tracked | Primitive / tool | Source path | OOXML revision element | Notes | | --- | --- | --- | --- | -| `text.ts` — `replaceParagraphTextRange` | `packages/docx-core/src/primitives/text.ts` | `w:ins`, `w:del`, *`w:rPrChange` (pending #173)* | Run-level text replacement; current implementation emits `w:ins`/`w:del` only. `w:rPrChange` for formatting-aware replacements is tracked as **#173**. **Verified by [120.8] (#143) regression test (locks ins/del behavior).** | +| `text.ts` — `replaceParagraphTextRange` | `packages/docx-core/src/primitives/text.ts` | `w:ins`, `w:del`, `w:rPrChange` | Run-level text replacement emits insertion/deletion wrappers, and formatting-aware replacements record the prior run properties with `w:rPrChange`. **Verified by [120.8] (#143) regression test (after #173).** | | `layout.ts` — `setParagraphSpacing` | `packages/docx-core/src/primitives/layout.ts` | `w:pPrChange` | Paragraph spacing mutations change paragraph properties, not spacer-paragraph structure. **Verified by [120.8] (#143) regression test.** | | `layout.ts` — `setTableRowHeight` | `packages/docx-core/src/primitives/layout.ts` | `w:trPrChange` | Row geometry changes belong under row-property revisions. **Verified by [120.8] (#143) regression test.** | | `layout.ts` — `setTableCellPadding` | `packages/docx-core/src/primitives/layout.ts` | `w:tcPrChange` | Cell padding changes belong under cell-property revisions. **Verified by [120.8] (#143) regression test.** | diff --git a/packages/docx-core/src/integration/canonical-emission-regression.test.ts b/packages/docx-core/src/integration/canonical-emission-regression.test.ts index 7b6ac5d..063d4e2 100644 --- a/packages/docx-core/src/integration/canonical-emission-regression.test.ts +++ b/packages/docx-core/src/integration/canonical-emission-regression.test.ts @@ -145,7 +145,7 @@ function revisionTuples(xml: string, requiredAuthor?: string): RevisionTuple[] { } describe('Canonical emission catalog', () => { - test('Table A: text.ts replaceParagraphTextRange emits tracked insertion and deletion wrappers', async ({ + test('Table A: text.ts replaceParagraphTextRange emits tracked insertion, deletion, and run-property change wrappers', async ({ given, when, then, @@ -172,11 +172,11 @@ describe('Canonical emission catalog', () => { }); }); - await then('document.xml contains tracked insertion and deletion metadata', () => { - // Current implementation emits tracked insertion/deletion containers but - // does not currently emit w:rPrChange from text replacement itself. - expectTrackedElementsWithFixedMetadata(documentXml, ['ins', 'del']); + await then('document.xml contains tracked insertion, deletion, and run-property metadata', () => { + expectTrackedElementsWithFixedMetadata(documentXml, ['ins', 'del', 'rPrChange']); expect(elementsByName(documentXml, 'b').length).toBeGreaterThan(0); + const rPrChange = elementsByName(documentXml, 'rPrChange')[0]!; + expect(rPrChange.getElementsByTagNameNS(W_NS, 'rPr')).toHaveLength(1); }); }); diff --git a/packages/docx-core/src/primitives/text.test.ts b/packages/docx-core/src/primitives/text.test.ts index 8c95870..6222456 100644 --- a/packages/docx-core/src/primitives/text.test.ts +++ b/packages/docx-core/src/primitives/text.test.ts @@ -787,6 +787,89 @@ describe('replaceParagraphTextRange tracked-change emission', () => { }); }); + test('does not emit rPrChange when explicit replacement formatting leaves run properties unchanged', async ({ + given, + when, + then, + }: AllureBddContext) => { + let p: Element; + + await given('a paragraph whose source run is already bold', () => { + const doc = makeDoc( + 'Hello', + ); + p = firstParagraph(doc); + }); + + await when('the replacement explicitly asks for the same bold formatting', () => { + replaceParagraphTextRange( + p, + 0, + 5, + [{ text: 'New', addRunProps: { bold: true } }], + createRevisionContext({ + author: 'SafeDocX AI', + date: '2026-05-03T14:15:16Z', + idState: createRevisionIdState(), + }), + ); + }); + + await then('tracked insertion and deletion are emitted without a property-change record', () => { + expect(p.getElementsByTagNameNS(W_NS, 'ins')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'del')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'rPrChange')).toHaveLength(0); + }); + }); + + test('emits rPrChange with the prior run properties when replacement formatting changes', async ({ + given, + when, + then, + }: AllureBddContext) => { + let p: Element; + let rPrChange: Element; + + await given('a paragraph whose source run is italic', () => { + const doc = makeDoc( + 'Hello', + ); + p = firstParagraph(doc); + }); + + await when('the replacement adds bold formatting under tracked changes', () => { + replaceParagraphTextRange( + p, + 0, + 5, + [{ text: 'New', addRunProps: { bold: true } }], + createRevisionContext({ + author: 'SafeDocX AI', + date: '2026-05-03T14:15:16Z', + idState: createRevisionIdState(), + }), + ); + rPrChange = p.getElementsByTagNameNS(W_NS, 'rPrChange').item(0) as Element; + }); + + await then('the inserted run records the previous italic rPr inside w:rPrChange', () => { + expect(p.getElementsByTagNameNS(W_NS, 'ins')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'del')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'rPrChange')).toHaveLength(1); + expect(rPrChange.getAttribute('w:id')).toBeTruthy(); + expect(rPrChange.getAttribute('w:author')).toBe('SafeDocX AI'); + expect(rPrChange.getAttribute('w:date')).toBe('2026-05-03T14:15:16Z'); + + const previousRPr = rPrChange.getElementsByTagNameNS(W_NS, W.rPr).item(0) as Element; + expect(previousRPr).toBeTruthy(); + expect(previousRPr.getElementsByTagNameNS(W_NS, W.i)).toHaveLength(1); + expect(previousRPr.getElementsByTagNameNS(W_NS, W.b)).toHaveLength(0); + + const insertedRun = p.getElementsByTagNameNS(W_NS, 'ins').item(0)!.getElementsByTagNameNS(W_NS, W.r).item(0)!; + expect(insertedRun.getElementsByTagNameNS(W_NS, W.b)).toHaveLength(1); + }); + }); + test('preserves per-run formatting inside tracked deletions spanning multiple runs', async ({ given, when, then }: AllureBddContext) => { let p: Element; let deletion: Element; diff --git a/packages/docx-core/src/primitives/text.ts b/packages/docx-core/src/primitives/text.ts index c9a14c6..a1a60cf 100644 --- a/packages/docx-core/src/primitives/text.ts +++ b/packages/docx-core/src/primitives/text.ts @@ -1,6 +1,7 @@ import { OOXML, W } from './namespaces.js'; import { SafeDocxError } from './errors.js'; import { + buildRPrChangeElement, createRevisionContainer, prepareElementForDeletion, type RevisionContext, @@ -126,13 +127,24 @@ function cloneRunFormattingOnly(doc: Document, sourceRun: Element): Element { if (child.nodeType !== 1) continue; const el = child as Element; if (isW(el, W.rPr)) { - r.appendChild(el.cloneNode(true)); + r.appendChild(cloneRPrWithoutChangeRecords(doc, el)); break; } } return r; } +function cloneRPrWithoutChangeRecords(doc: Document, rPr: Element): Element { + const clone = doc.createElementNS(OOXML.W_NS, `w:${W.rPr}`); + for (const child of Array.from(rPr.childNodes)) { + if (child.nodeType !== 1) continue; + const el = child as Element; + if (isW(el, 'rPrChange')) continue; + clone.appendChild(el.cloneNode(true)); + } + return clone; +} + function appendTextToRun(doc: Document, run: Element, text: string): void { // Convert \t and \n to OOXML equivalents where possible. let buf = ''; @@ -302,6 +314,36 @@ function getDirectChild(parent: Element, localName: string): Element | null { return null; } +function rPrComparableSignature(rPr: Element | null): string { + if (!rPr) return ''; + + const nodeSignature = (node: Node): string => { + if (node.nodeType === 3) return node.textContent ?? ''; + if (node.nodeType !== 1) return ''; + + const el = node as Element; + if (isW(el, 'rPrChange')) return ''; + + const attrs = Array.from(el.attributes) + .map((attr) => { + const attrNs = attr.namespaceURI ?? (attr.name.startsWith('w:') ? OOXML.W_NS : ''); + const attrName = attr.name.includes(':') ? attr.name.slice(attr.name.indexOf(':') + 1) : attr.localName; + return [attrNs, attrName, attr.value] as const; + }) + .sort(([aNs, aName], [bNs, bName]) => aNs.localeCompare(bNs) || aName.localeCompare(bName)) + .map(([ns, name, value]) => `${ns}:${name}=${value}`) + .join('|'); + const children = Array.from(el.childNodes).map(nodeSignature).join(''); + return `<${el.namespaceURI ?? ''}:${el.localName} ${attrs}>${children}`; + }; + + return Array.from(rPr.childNodes).map(nodeSignature).join(''); +} + +function getSnapshotRPr(doc: Document, sourceRPr: Element | null): Element { + return sourceRPr ? cloneRPrWithoutChangeRecords(doc, sourceRPr) : doc.createElementNS(OOXML.W_NS, `w:${W.rPr}`); +} + function ensureRPr(doc: Document, run: Element): Element { const existing = getDirectChild(run, W.rPr); if (existing) return existing; @@ -519,8 +561,15 @@ export function replaceParagraphTextRange( const replacementRuns: Element[] = []; for (const part of parts) { const tmpl = part.templateRun ?? templateRun; + const sourceRPr = getDirectChild(tmpl, W.rPr); + const sourceRPrSignature = rPrComparableSignature(sourceRPr); const newRun = cloneRunFormattingOnly(doc, tmpl); applyRunProps(doc, newRun, part.addRunProps, part.clearHighlight); + const newRPr = getDirectChild(newRun, W.rPr); + const hasExplicitFormattingMutation = !!part.addRunProps || !!part.clearHighlight; + if (ctx && hasExplicitFormattingMutation && rPrComparableSignature(newRPr) !== sourceRPrSignature) { + ensureRPr(doc, newRun).appendChild(buildRPrChangeElement(getSnapshotRPr(doc, sourceRPr), ctx)); + } appendTextToRun(doc, newRun, part.text); if (getRunVisibleLength(newRun) > 0) { replacementRuns.push(newRun); From c6f989f916268db2cfb3f2c7779af665c745c9fc Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 22 May 2026 02:48:22 -0400 Subject: [PATCH 2/2] fix(docx-core): canonicalize rPr signature to suppress spurious rPrChange MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Peer review of #215 surfaced two reproducible false positives in `rPrComparableSignature` that caused `replaceParagraphTextRange` to emit `w:rPrChange` for no-op formatting requests: 1. **Whitespace-sensitive comparison.** The signature concatenated text node `textContent`, so a pretty-printed source `` with inter-element whitespace did not match the whitespace-free `` produced by `cloneRPrWithoutChangeRecords` (which drops non-element children). Re-asserting an existing toggle therefore emitted a bogus `w:rPrChange`. The OOXML schema only permits element children inside `w:rPr`, so any text node there is insignificant pretty-printing — the signature now skips text nodes entirely. 2. **ST_OnOff variants compared as strings.** OOXML treats the absence of `w:val` on a toggle property as equivalent to `w:val="1"`, and the values `1`/`true`/`on` (and their falsy triple) are interchangeable. But `ensureBoolProp` always writes the explicit `w:val="1"` form, so re-asserting bold against a `` source produced a signature mismatch and a bogus `w:rPrChange`. The signature now canonicalizes toggle elements: it synthesizes a `w:val="1"` for absent attributes and normalizes `1|true|on`/`0|false|off` for the toggle set defined by ECMA-376 (`b`, `bCs`, `i`, `iCs`, `caps`, `smallCaps`, `strike`, `dstrike`, `outline`, `shadow`, `emboss`, `imprint`, `vanish`, `specVanish`, `webHidden`, `noProof`, `snapToGrid`, `rtl`, `cs`). New regression tests in `text.test.ts` lock both fixes plus two previously-uncovered shapes: `clearHighlight: true` must trigger `w:rPrChange` when the source carried a highlight, and a heterogeneous multi-run deletion pins the documented snapshot contract (the chosen template run's `rPr` becomes the prior state; per-run formatting of the removed span is preserved inside ``). Ref: #173 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../docx-core/src/primitives/text.test.ts | 153 ++++++++++++++++++ packages/docx-core/src/primitives/text.ts | 45 +++++- 2 files changed, 191 insertions(+), 7 deletions(-) diff --git a/packages/docx-core/src/primitives/text.test.ts b/packages/docx-core/src/primitives/text.test.ts index 6222456..e316412 100644 --- a/packages/docx-core/src/primitives/text.test.ts +++ b/packages/docx-core/src/primitives/text.test.ts @@ -870,6 +870,159 @@ describe('replaceParagraphTextRange tracked-change emission', () => { }); }); + test('does not emit rPrChange when source rPr only differs by pretty-printing whitespace', async ({ + given, + when, + then, + }: AllureBddContext) => { + let p: Element; + + await given('a paragraph whose source rPr is pretty-printed with whitespace text nodes', () => { + const doc = makeDoc( + '\n \nHello', + ); + p = firstParagraph(doc); + }); + + await when('the replacement re-asserts the existing bold formatting', () => { + replaceParagraphTextRange( + p, + 0, + 5, + [{ text: 'New', addRunProps: { bold: true } }], + createRevisionContext({ + author: 'SafeDocX AI', + date: '2026-05-03T14:15:16Z', + idState: createRevisionIdState(), + }), + ); + }); + + await then('insignificant whitespace is ignored and no rPrChange is emitted', () => { + expect(p.getElementsByTagNameNS(W_NS, 'ins')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'del')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'rPrChange')).toHaveLength(0); + }); + }); + + test('does not emit rPrChange when toggle properties differ only by ST_OnOff canonical form', async ({ + given, + when, + then, + }: AllureBddContext) => { + let p: Element; + + await given('a paragraph whose source bold toggle has no explicit w:val', () => { + const doc = makeDoc('Hello'); + p = firstParagraph(doc); + }); + + await when('the replacement asks for the same bold formatting (which normalizes to w:val="1")', () => { + replaceParagraphTextRange( + p, + 0, + 5, + [{ text: 'New', addRunProps: { bold: true } }], + createRevisionContext({ + author: 'SafeDocX AI', + date: '2026-05-03T14:15:16Z', + idState: createRevisionIdState(), + }), + ); + }); + + await then('absent w:val and w:val="1" are treated as equal and no rPrChange is emitted', () => { + expect(p.getElementsByTagNameNS(W_NS, 'ins')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'del')).toHaveLength(1); + expect(p.getElementsByTagNameNS(W_NS, 'rPrChange')).toHaveLength(0); + }); + }); + + test('emits rPrChange when clearHighlight removes a highlight from the source rPr', async ({ + given, + when, + then, + }: AllureBddContext) => { + let p: Element; + let rPrChange: Element; + + await given('a paragraph whose source run carries a yellow highlight', () => { + const doc = makeDoc( + 'Hello', + ); + p = firstParagraph(doc); + }); + + await when('the replacement clears the highlight under tracked changes', () => { + replaceParagraphTextRange( + p, + 0, + 5, + [{ text: 'New', clearHighlight: true }], + createRevisionContext({ + author: 'SafeDocX AI', + date: '2026-05-03T14:15:16Z', + idState: createRevisionIdState(), + }), + ); + rPrChange = p.getElementsByTagNameNS(W_NS, 'rPrChange').item(0) as Element; + }); + + await then('the inserted run records the previous highlight inside w:rPrChange', () => { + expect(p.getElementsByTagNameNS(W_NS, 'rPrChange')).toHaveLength(1); + const previousRPr = rPrChange.getElementsByTagNameNS(W_NS, W.rPr).item(0) as Element; + expect(previousRPr).toBeTruthy(); + expect(previousRPr.getElementsByTagNameNS(W_NS, W.highlight)).toHaveLength(1); + }); + }); + + test('multi-run deletion snapshots the chosen template run rPr in rPrChange', async ({ + given, + when, + then, + }: AllureBddContext) => { + let p: Element; + let rPrChange: Element; + + await given('a paragraph that spans an italic run followed by a bold run', () => { + const doc = makeDoc( + '' + + 'Hello ' + + 'World' + + '', + ); + p = firstParagraph(doc); + }); + + await when('a single replacement part covers the full span and requests bold', () => { + replaceParagraphTextRange( + p, + 0, + 11, + [{ text: 'New', addRunProps: { bold: true } }], + createRevisionContext({ + author: 'SafeDocX AI', + date: '2026-05-03T14:15:16Z', + idState: createRevisionIdState(), + }), + ); + rPrChange = p.getElementsByTagNameNS(W_NS, 'rPrChange').item(0) as Element; + }); + + await then('the rPrChange records the predominant-template prior rPr (italic) and the deleted runs preserve full per-run formatting', () => { + expect(p.getElementsByTagNameNS(W_NS, 'rPrChange')).toHaveLength(1); + const previousRPr = rPrChange.getElementsByTagNameNS(W_NS, W.rPr).item(0) as Element; + expect(previousRPr.getElementsByTagNameNS(W_NS, W.i)).toHaveLength(1); + expect(previousRPr.getElementsByTagNameNS(W_NS, W.b)).toHaveLength(0); + + const deletion = p.getElementsByTagNameNS(W_NS, 'del').item(0)!; + const deletedRuns = deletion.getElementsByTagNameNS(W_NS, W.r); + expect(deletedRuns).toHaveLength(2); + expect(deletedRuns.item(0)!.getElementsByTagNameNS(W_NS, W.i)).toHaveLength(1); + expect(deletedRuns.item(1)!.getElementsByTagNameNS(W_NS, W.b)).toHaveLength(1); + }); + }); + test('preserves per-run formatting inside tracked deletions spanning multiple runs', async ({ given, when, then }: AllureBddContext) => { let p: Element; let deletion: Element; diff --git a/packages/docx-core/src/primitives/text.ts b/packages/docx-core/src/primitives/text.ts index a1a60cf..e0959a2 100644 --- a/packages/docx-core/src/primitives/text.ts +++ b/packages/docx-core/src/primitives/text.ts @@ -314,22 +314,53 @@ function getDirectChild(parent: Element, localName: string): Element | null { return null; } +// OOXML on/off toggle properties (ECMA-376 ST_OnOff). Absence of w:val means +// "1", and the values "1"/"true"/"on" are equivalent (likewise for the falsy +// triple). We normalize so semantically-identical inputs hash the same. +const W_BOOL_TOGGLES = new Set([ + 'b', 'bCs', 'i', 'iCs', 'caps', 'smallCaps', 'strike', 'dstrike', + 'outline', 'shadow', 'emboss', 'imprint', 'vanish', 'specVanish', + 'webHidden', 'noProof', 'snapToGrid', 'rtl', 'cs', +]); + +function normalizedBoolValAttr(raw: string | null): string { + const s = raw === null ? '' : raw.trim().toLowerCase(); + if (s === '' || s === '1' || s === 'true' || s === 'on') return '1'; + if (s === '0' || s === 'false' || s === 'off') return '0'; + return s; +} + function rPrComparableSignature(rPr: Element | null): string { if (!rPr) return ''; const nodeSignature = (node: Node): string => { - if (node.nodeType === 3) return node.textContent ?? ''; + // Text nodes inside w:rPr are insignificant whitespace from pretty-printing; + // the schema only permits element children, so dropping them matches + // semantics and avoids false positives against re-emitted (whitespace-free) + // run-property blocks. if (node.nodeType !== 1) return ''; const el = node as Element; if (isW(el, 'rPrChange')) return ''; - const attrs = Array.from(el.attributes) - .map((attr) => { - const attrNs = attr.namespaceURI ?? (attr.name.startsWith('w:') ? OOXML.W_NS : ''); - const attrName = attr.name.includes(':') ? attr.name.slice(attr.name.indexOf(':') + 1) : attr.localName; - return [attrNs, attrName, attr.value] as const; - }) + const isWBoolToggle = + el.namespaceURI === OOXML.W_NS && W_BOOL_TOGGLES.has(el.localName ?? ''); + + const tuples = Array.from(el.attributes).map((attr) => { + const attrNs = attr.namespaceURI ?? (attr.name.startsWith('w:') ? OOXML.W_NS : ''); + const attrName = attr.name.includes(':') ? attr.name.slice(attr.name.indexOf(':') + 1) : attr.localName; + let value = attr.value; + if (isWBoolToggle && attrNs === OOXML.W_NS && attrName === 'val') { + value = normalizedBoolValAttr(value); + } + return [attrNs, attrName, value] as const; + }); + + if (isWBoolToggle && !tuples.some(([ns, name]) => ns === OOXML.W_NS && name === 'val')) { + tuples.push([OOXML.W_NS, 'val', '1']); + } + + const attrs = tuples .sort(([aNs, aName], [bNs, bName]) => aNs.localeCompare(bNs) || aName.localeCompare(bName)) .map(([ns, name, value]) => `${ns}:${name}=${value}`) .join('|');