diff --git a/packages/docx-core/src/baselines/atomizer/pipeline.field-validation.test.ts b/packages/docx-core/src/baselines/atomizer/pipeline.field-validation.test.ts
new file mode 100644
index 0000000..c8f4760
--- /dev/null
+++ b/packages/docx-core/src/baselines/atomizer/pipeline.field-validation.test.ts
@@ -0,0 +1,253 @@
+import { describe, expect } from 'vitest';
+import { testAllure, type AllureBddContext } from '../../testing/allure-test.js';
+import { validateFieldStructure } from './pipeline.js';
+
+const test = testAllure
+ .epic('Document Comparison')
+ .withLabels({ feature: 'Field Structure Validation (ECMA-376)' });
+
+const NS = 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"';
+
+function buildDoc(bodyXml: string): string {
+ return (
+ `` +
+ `` +
+ `${bodyXml}`
+ );
+}
+
+const COMPLETE_FIELD =
+ `` +
+ ` NUMPAGES ` +
+ `` +
+ `3` +
+ ``;
+
+// ECMA-376 conformant field-modification pattern: a field whose instruction
+// text is changing under track changes. The fldChars remain UNWRAPPED at the
+// sibling-run level (they cannot enter ), while the changed instrText
+// fragments into / wrappers. Research summary: c-rex ECMA-376
+// Part 4 fldChar topic + DeletedFieldCode placement constraint.
+const MODIFIED_FIELD_FRAGMENTED =
+ `` +
+ ` NUMPAGES ` +
+ ` PAGE ` +
+ `` +
+ `3` +
+ ``;
+
+describe('validateFieldStructure', () => {
+ test(
+ 'field-free document is valid',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = false;
+
+ await given('a document with only literal text runs', () => {
+ xml = buildDoc(`hello`);
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it passes', () => {
+ expect(ok).toBe(true);
+ });
+ },
+ );
+
+ test(
+ 'complete NUMPAGES field is valid',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = false;
+
+ await given('a paragraph containing a well-formed NUMPAGES complex field', () => {
+ xml = buildDoc(`${COMPLETE_FIELD}`);
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it passes', () => {
+ expect(ok).toBe(true);
+ });
+ },
+ );
+
+ test(
+ 'orphan w:instrText outside any field is rejected',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = true;
+
+ await given('a paragraph with a bare w:instrText and no surrounding w:fldChar', () => {
+ xml = buildDoc(` PAGE `);
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it is rejected', () => {
+ expect(ok).toBe(false);
+ });
+ },
+ );
+
+ test(
+ 'w:instrText after the separator (in the result section) is rejected',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = true;
+
+ await given('a field whose w:instrText is placed AFTER w:fldChar separate', () => {
+ xml = buildDoc(
+ `` +
+ `` +
+ `` +
+ ` NUMPAGES ` +
+ `` +
+ ``,
+ );
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it is rejected', () => {
+ expect(ok).toBe(false);
+ });
+ },
+ );
+
+ test(
+ 'unbalanced begin/end counts are rejected',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = true;
+
+ await given('a field with two begins and only one end', () => {
+ xml = buildDoc(
+ `` +
+ `` +
+ `` +
+ `` +
+ ``,
+ );
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it is rejected', () => {
+ expect(ok).toBe(false);
+ });
+ },
+ );
+
+ test(
+ 'ECMA-376 fragmented field modification (unwrapped fldChars + ins/del instrText) is valid',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = false;
+
+ await given(
+ 'a field where w:fldChar markers remain unwrapped while w:instrText/w:delInstrText fragment into /',
+ () => {
+ xml = buildDoc(`${MODIFIED_FIELD_FRAGMENTED}`);
+ },
+ );
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it passes', () => {
+ expect(ok).toBe(true);
+ });
+ },
+ );
+
+ test(
+ 'w:delInstrText outside is rejected (ECMA-376 DeletedFieldCode)',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = true;
+
+ await given('a field where w:delInstrText appears in a run NOT wrapped by ', () => {
+ xml = buildDoc(
+ `` +
+ `` +
+ ` NUMPAGES ` +
+ `` +
+ `` +
+ ``,
+ );
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it is rejected', () => {
+ expect(ok).toBe(false);
+ });
+ },
+ );
+
+ test(
+ 'w:delInstrText inside but outside any field body is rejected',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = true;
+
+ await given('a wrapping w:delInstrText with no enclosing field begin/separate', () => {
+ xml = buildDoc(
+ `` +
+ ` NUMPAGES ` +
+ ``,
+ );
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it is rejected', () => {
+ expect(ok).toBe(false);
+ });
+ },
+ );
+
+ test(
+ 'w:fldChar nested inside is rejected (ECMA-376 fatal violation)',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = true;
+
+ await given('a wrapping a balanced begin/end field-character pair', () => {
+ xml = buildDoc(
+ `` +
+ `` +
+ `` +
+ `` +
+ `` +
+ ``,
+ );
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it is rejected', () => {
+ expect(ok).toBe(false);
+ });
+ },
+ );
+
+ test(
+ 'w:fldChar inside is allowed (insertion of a new field is conformant)',
+ async ({ given, when, then }: AllureBddContext) => {
+ let xml = '';
+ let ok = false;
+
+ await given('an insertion wrapping a complete NUMPAGES field', () => {
+ xml = buildDoc(`${COMPLETE_FIELD}`);
+ });
+ await when('the document is validated', () => {
+ ok = validateFieldStructure(xml);
+ });
+ await then('it passes', () => {
+ expect(ok).toBe(true);
+ });
+ },
+ );
+});
diff --git a/packages/docx-core/src/baselines/atomizer/pipeline.ts b/packages/docx-core/src/baselines/atomizer/pipeline.ts
index 6e4c903..aab4e4f 100644
--- a/packages/docx-core/src/baselines/atomizer/pipeline.ts
+++ b/packages/docx-core/src/baselines/atomizer/pipeline.ts
@@ -345,18 +345,27 @@ function buildFailureSummary(
/**
* Validate field structure integrity in document XML.
*
- * Checks that fldChar begin/end are balanced and that w:instrText only
- * appears inside a proper field sequence (between begin and separate).
- * Orphaned instrText elements render as visible text in Word.
+ * Enforces three ECMA-376 Part 4 constraints on complex fields:
+ * 1. Global `w:fldChar` begin/end count balance.
+ * 2. Every `w:instrText` AND `w:delInstrText` sits inside an open field body
+ * (between `begin` and `separate`). Orphaned instruction text renders as
+ * literal text in Word.
+ * 3. `w:delInstrText` is nested inside a `` ancestor (DeletedFieldCode
+ * schema constraint), and conversely `w:fldChar` is NEVER inside ``
+ * (Word treats this as fatal and discards the field state machine).
+ *
+ * Called on both pre-accept/reject combined XML (with track-change wrappers)
+ * and on post-accept/reject XML (wrappers removed). Both cases must satisfy the
+ * field placement check; constraint (3) is vacuous post-accept/reject.
*/
export function validateFieldStructure(documentXml: string): boolean {
const root = parseDocumentXml(documentXml);
- // Walk the document in order, tracking field nesting
const allFldChars = findAllByTagName(root, 'w:fldChar');
const allInstrTexts = findAllByTagName(root, 'w:instrText');
+ const allDelInstrTexts = findAllByTagName(root, 'w:delInstrText');
- // Quick balance check
+ // Constraint (1): global fldChar begin/end balance.
let begins = 0;
let ends = 0;
for (const fc of allFldChars) {
@@ -366,19 +375,36 @@ export function validateFieldStructure(documentXml: string): boolean {
}
if (begins !== ends) return false;
- // Check that instrText elements are inside a field (between begin and separate).
- // Walk all elements in document order using a recursive scan.
- if (allInstrTexts.length === 0) return true; // No instrText, nothing to validate
+ if (
+ allFldChars.length === 0 &&
+ allInstrTexts.length === 0 &&
+ allDelInstrTexts.length === 0
+ ) {
+ return true;
+ }
- // Depth-first scan to check instrText placement
+ // Depth-first scan tracking field nesting (for constraint 2) and
+ // ancestor nesting (for constraint 3).
let depth = 0;
- const pastSeparatorAtDepth: number[] = []; // track separator state per depth
+ const pastSeparatorAtDepth: number[] = [];
+ let insideDelDepth = 0;
+
function scan(node: Element): boolean {
for (let child = node.firstChild; child; child = child.nextSibling) {
- if (child.nodeType !== 1) continue; // skip non-elements
+ if (child.nodeType !== 1) continue;
const el = child as Element;
+ const tag = el.tagName;
+
+ if (tag === 'w:del') {
+ insideDelDepth++;
+ const ok = scan(el);
+ insideDelDepth--;
+ if (!ok) return false;
+ continue;
+ }
- if (el.tagName === 'w:fldChar') {
+ if (tag === 'w:fldChar') {
+ if (insideDelDepth > 0) return false;
const type = el.getAttribute('w:fldCharType');
if (type === 'begin') {
depth++;
@@ -388,8 +414,10 @@ export function validateFieldStructure(documentXml: string): boolean {
} else if (type === 'end') {
if (depth > 0) depth--;
}
- } else if (el.tagName === 'w:instrText') {
- // instrText must be inside a field (depth > 0) and before the separator
+ } else if (tag === 'w:instrText') {
+ if (depth === 0 || pastSeparatorAtDepth[depth]) return false;
+ } else if (tag === 'w:delInstrText') {
+ if (insideDelDepth === 0) return false;
if (depth === 0 || pastSeparatorAtDepth[depth]) return false;
}