diff --git a/.github/workflows/lean-build.yml b/.github/workflows/lean-build.yml index 8269a40..afe9096 100644 --- a/.github/workflows/lean-build.yml +++ b/.github/workflows/lean-build.yml @@ -91,23 +91,25 @@ jobs: - name: Audit non-Spec modules are zero-sorry working-directory: verification/lean run: | - # All LeanSpike modules except Spec.lean (which intentionally holds - # exactly two sorries — the deferred specification targets). + # All LeanSpike and Tier2 modules except Spec.lean (which intentionally + # holds exactly one sorry — the deferred `inv_rt_001` target). # `find -print0 | xargs -0 grep` avoids the SC2086 word-splitting # foot-gun on an unquoted variable while still letting future modules - # under LeanSpike/ get picked up automatically. - if find LeanSpike.lean LeanSpike -name '*.lean' ! -name 'Spec.lean' -print0 \ + # under LeanSpike/ and Tier2/ get picked up automatically. + if find LeanSpike.lean LeanSpike Tier2.lean Tier2 -name '*.lean' ! -name 'Spec.lean' -print0 \ | xargs -0 grep -nwH "sorry"; then echo "ERROR: sorry found in non-Spec Lean modules. They must remain zero-sorry." exit 1 fi - - name: Audit Spec.lean has exactly 2 sorries + - name: Audit Spec.lean has exactly 1 sorry working-directory: verification/lean run: | + # `inv_field_001` is closed as of Tier 2; `inv_rt_001` remains the + # single deferred specification target. count=$(grep -cw "sorry" LeanSpike/Spec.lean || true) - if [ "$count" != "2" ]; then - echo "ERROR: Spec.lean expected exactly 2 sorries, found $count." + if [ "$count" != "1" ]; then + echo "ERROR: Spec.lean expected exactly 1 sorry, found $count." grep -nw "sorry" LeanSpike/Spec.lean || true exit 1 fi diff --git a/openspec/changes/add-ooxml-doc-subset-and-inv-field-001-proof/tasks.md b/openspec/changes/add-ooxml-doc-subset-and-inv-field-001-proof/tasks.md index 64a68b7..15cc6b1 100644 --- a/openspec/changes/add-ooxml-doc-subset-and-inv-field-001-proof/tasks.md +++ b/openspec/changes/add-ooxml-doc-subset-and-inv-field-001-proof/tasks.md @@ -8,34 +8,34 @@ Each task below is sized for a single `/codex-implement` run against its own Git ## 0. Generic walk lemmas (round-3 split) -- [ ] 0.1 (T0) Define and prove the shared walk lemmas in `verification/lean/Tier2/WalkLemmas.lean`. (a) `walkBlocks_append`: `walkBlocks r (l ++ m) = walkBlocks (walkBlocks r l) m` (fold-over-append specialized to `stepBlock`; mathlib's `List.foldl_append` likely lands this directly). (b) Context-extension lemma: `fieldContextNeutral bs → ∀ ctx, walkBlocks (.ok ctx) bs = .ok ctx`, plus the corollary that for any prior valid prefix that ends at `.ok ctx`, splicing or deleting `bs` at that position leaves every later state observation identical (uses (a)). (c) `DelInstrText → InstrText` rewrite lemma: under `fieldContextNeutral bs`, replacing any `DelInstrText` atom inside `bs` with `InstrText` yields a `bs'` that is still context-neutral AND, when later spliced into an outer document at the same position, the renamed atom passes `stepAtom` (the locally-pushed stack at that atom's position is non-empty with top bit `false`, so its top bit dominates whatever the outer context's top bit is). These three lemmas are the load-bearing generic results consumed by T4b and T5b. No `accept`/`reject`-specific reasoning here — the file is purely about the stack-valued walk over `List Block`. +- [x] 0.1 (T0) Define and prove the shared walk lemmas in `verification/lean/Tier2/WalkLemmas.lean`. (a) `walkBlocks_append`: `walkBlocks r (l ++ m) = walkBlocks (walkBlocks r l) m` (fold-over-append specialized to `stepBlock`; mathlib's `List.foldl_append` likely lands this directly). (b) Context-extension lemma: `fieldContextNeutral bs → ∀ ctx, walkBlocks (.ok ctx) bs = .ok ctx`, plus the corollary that for any prior valid prefix that ends at `.ok ctx`, splicing or deleting `bs` at that position leaves every later state observation identical (uses (a)). (c) `DelInstrText → InstrText` rewrite lemma: under `fieldContextNeutral bs`, replacing any `DelInstrText` atom inside `bs` with `InstrText` yields a `bs'` that is still context-neutral AND, when later spliced into an outer document at the same position, the renamed atom passes `stepAtom` (the locally-pushed stack at that atom's position is non-empty with top bit `false`, so its top bit dominates whatever the outer context's top bit is). These three lemmas are the load-bearing generic results consumed by T4b and T5b. No `accept`/`reject`-specific reasoning here — the file is purely about the stack-valued walk over `List Block`. ## 1. Model -- [ ] 1.1 (T1) Define `Tier2.OoxmlModel` datatypes in `verification/lean/Tier2/OoxmlModel.lean`. No proofs, just the inductive families: `Doc`, `Paragraph`, `Block` (with `Run`, `Ins`, `Del`, `MoveFrom`, `MoveTo` constructors), `Run`, `Atom` (with `Text`, `DelText`, `InstrText`, `DelInstrText`, `FldChar` constructors), `FldCharKind` (`Begin`, `Separate`, `End`). Document the shape with file:line citations to `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts`. +- [x] 1.1 (T1) Define `Tier2.OoxmlModel` datatypes in `verification/lean/Tier2/OoxmlModel.lean`. No proofs, just the inductive families: `Doc`, `Paragraph`, `Block` (with `Run`, `Ins`, `Del`, `MoveFrom`, `MoveTo` constructors), `Run`, `Atom` (with `Text`, `DelText`, `InstrText`, `DelInstrText`, `FldChar` constructors), `FldCharKind` (`Begin`, `Separate`, `End`). Document the shape with file:line citations to `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts`. ## 2. Predicates -- [ ] 2.1 (T2) Define `Tier2.FieldStructure.FieldCtx := List Bool`, `WalkResult`, `stepAtom`, `stepBlock`, `walkBlocks`, `validateFieldStructure : Doc → Bool`, `fieldContextNeutral : List Block → Prop`, and `recursivelyWellformed : Doc → Prop` in `verification/lean/Tier2/FieldStructure.lean`. The walk uses a depth-indexed stack of "separator-seen" bits exactly mirroring `pipeline.ts:374-389` (`pastSeparatorAtDepth: number[]`): `Begin` pushes `false`; `Separate` sets the top bit to `true` (no-op when stack is empty, matching `pipeline.ts:387`'s `if (depth > 0)` guard); `End` pops the stack (no-op when empty, matching `pipeline.ts:389`'s guard); `InstrText` and `DelInstrText` produce `WalkResult.invalid` when the stack is empty or its top bit is `true`. `validateFieldStructure` checks (a) global `Begin` count = global `End` count AND (b) `walkBlocks (.ok []) (paragraphs.bind body) ≠ .invalid`. `fieldContextNeutral bs := ∀ ctx, walkBlocks (.ok ctx) bs = .ok ctx`. `recursivelyWellformed d := validateFieldStructure d = true ∧ ∀ subtree ∈ allWrapperSubtrees d, fieldContextNeutral subtree`. See `design.md > validateFieldStructure, FieldCtx, and recursivelyWellformed` for the round-3 counterexamples that drove this exact shape; no proofs in this task, only definitions plus helper folds. +- [x] 2.1 (T2) Define `Tier2.FieldStructure.FieldCtx := List Bool`, `WalkResult`, `stepAtom`, `stepBlock`, `walkBlocks`, `validateFieldStructure : Doc → Bool`, `fieldContextNeutral : List Block → Prop`, and `recursivelyWellformed : Doc → Prop` in `verification/lean/Tier2/FieldStructure.lean`. The walk uses a depth-indexed stack of "separator-seen" bits exactly mirroring `pipeline.ts:374-389` (`pastSeparatorAtDepth: number[]`): `Begin` pushes `false`; `Separate` sets the top bit to `true` (no-op when stack is empty, matching `pipeline.ts:387`'s `if (depth > 0)` guard); `End` pops the stack (no-op when empty, matching `pipeline.ts:389`'s guard); `InstrText` and `DelInstrText` produce `WalkResult.invalid` when the stack is empty or its top bit is `true`. `validateFieldStructure` checks (a) global `Begin` count = global `End` count AND (b) `walkBlocks (.ok []) (paragraphs.bind body) ≠ .invalid`. `fieldContextNeutral bs := ∀ ctx, walkBlocks (.ok ctx) bs = .ok ctx`. `recursivelyWellformed d := validateFieldStructure d = true ∧ ∀ subtree ∈ allWrapperSubtrees d, fieldContextNeutral subtree`. See `design.md > validateFieldStructure, FieldCtx, and recursivelyWellformed` for the round-3 counterexamples that drove this exact shape; no proofs in this task, only definitions plus helper folds. ## 3. Operations -- [ ] 3.1 (T3a) Define `Tier2.AcceptReject.accept : Doc → Doc` in `verification/lean/Tier2/AcceptReject.lean`. Behaviors required (per `trackChangesAcceptorAst.ts:368-506`): drop `Del`/`MoveFrom` content; unwrap `Ins`/`MoveTo` (keep children); drop paragraphs whose body collapses to empty after the previous rules. No proofs. -- [ ] 3.2 (T3b) Define `Tier2.AcceptReject.reject : Doc → Doc` in the same file. Behaviors required (per `trackChangesAcceptorAst.ts:509-659`, noting that the TS engine unwraps both `w:del` AND `w:moveFrom` *before* renaming `w:delInstrText → w:instrText` over the whole tree at lines 602-616): drop `Ins`/`MoveTo`; unwrap `Del`/`MoveFrom`; then rewrite `DelText → Text` and `DelInstrText → InstrText` over the resulting tree. The rewrite is global at the operation level — the precondition that `DelInstrText` only originates inside a wrapper that `reject` will unwrap is enforced by `recursivelyWellformed` on the *input*, NOT by the bare datatype. No proofs. +- [x] 3.1 (T3a) Define `Tier2.AcceptReject.accept : Doc → Doc` in `verification/lean/Tier2/AcceptReject.lean`. Behaviors required (per `trackChangesAcceptorAst.ts:368-506`): drop `Del`/`MoveFrom` content; unwrap `Ins`/`MoveTo` (keep children); drop paragraphs whose body collapses to empty after the previous rules. No proofs. +- [x] 3.2 (T3b) Define `Tier2.AcceptReject.reject : Doc → Doc` in the same file. Behaviors required (per `trackChangesAcceptorAst.ts:509-659`, noting that the TS engine unwraps both `w:del` AND `w:moveFrom` *before* renaming `w:delInstrText → w:instrText` over the whole tree at lines 602-616): drop `Ins`/`MoveTo`; unwrap `Del`/`MoveFrom`; then rewrite `DelText → Text` and `DelInstrText → InstrText` over the resulting tree. The rewrite is global at the operation level — the precondition that `DelInstrText` only originates inside a wrapper that `reject` will unwrap is enforced by `recursivelyWellformed` on the *input*, NOT by the bare datatype. No proofs. ## 4. Preservation lemma — `accept` side -- [ ] 4.1 (T4a) State the `accept`-specific helper lemmas in `verification/lean/Tier2/InvFieldOne.lean` *consuming* T0's generic results: dropping a `fieldContextNeutral` `Del`/`MoveFrom` wrapper subtree leaves every later observation of the outer walk identical (direct application of T0(b) to the wrapper's child block list, since `stepBlock` of a wrapper is `walkBlocks` over its children); unwrapping a `fieldContextNeutral` `Ins`/`MoveTo` wrapper at the wrapper's position is a no-op for state observations outside the children's span (also T0(b)); empty-paragraph drops contribute no `FldChar` / `InstrText` / `DelInstrText` atoms (trivial). Do NOT redo the walk-over-append / context-extension reasoning here — that is T0's job. -- [ ] 4.2 (T4b) Prove the `accept` half of the preservation lemma `field_structure_preserved`: `∀ d, recursivelyWellformed d → validateFieldStructure (accept d) = true`, using 4.1 (which uses T0). +- [x] 4.1 (T4a) State the `accept`-specific helper lemmas in `verification/lean/Tier2/InvFieldOne.lean` *consuming* T0's generic results: dropping a `fieldContextNeutral` `Del`/`MoveFrom` wrapper subtree leaves every later observation of the outer walk identical (direct application of T0(b) to the wrapper's child block list, since `stepBlock` of a wrapper is `walkBlocks` over its children); unwrapping a `fieldContextNeutral` `Ins`/`MoveTo` wrapper at the wrapper's position is a no-op for state observations outside the children's span (also T0(b)); empty-paragraph drops contribute no `FldChar` / `InstrText` / `DelInstrText` atoms (trivial). Do NOT redo the walk-over-append / context-extension reasoning here — that is T0's job. +- [x] 4.2 (T4b) Prove the `accept` half of the preservation lemma `field_structure_preserved`: `∀ d, recursivelyWellformed d → validateFieldStructure (accept d) = true`, using 4.1 (which uses T0). ## 5. Preservation lemma — `reject` side -- [ ] 5.1 (T5a) State the `reject`-specific helper lemmas: dropping a `fieldContextNeutral` `Ins`/`MoveTo` wrapper subtree leaves the outer walk state at that position unchanged (symmetric to 4.1, again via T0(b)); unwrapping a `fieldContextNeutral` `Del`/`MoveFrom` likewise; the `DelInstrText → InstrText` rewrite is safe at every rewrite position (direct application of T0(c) — note the rewrite is global at the operation level after both unwraps complete, but `recursivelyWellformed` constrains the input shape so only `DelInstrText` atoms that originated inside a `Del` or `MoveFrom` wrapper ever existed); `DelText → Text` is irrelevant to the field-structure predicate. -- [ ] 5.2 (T5b) Prove the `reject` half of the preservation lemma `∀ d, recursivelyWellformed d → validateFieldStructure (reject d) = true`, using 5.1 (which uses T0); combine 4.2 and 5.2 into the final `field_structure_preserved` theorem. +- [x] 5.1 (T5a) State the `reject`-specific helper lemmas: dropping a `fieldContextNeutral` `Ins`/`MoveTo` wrapper subtree leaves the outer walk state at that position unchanged (symmetric to 4.1, again via T0(b)); unwrapping a `fieldContextNeutral` `Del`/`MoveFrom` likewise; the `DelInstrText → InstrText` rewrite is safe at every rewrite position (direct application of T0(c) — note the rewrite is global at the operation level after both unwraps complete, but `recursivelyWellformed` constrains the input shape so only `DelInstrText` atoms that originated inside a `Del` or `MoveFrom` wrapper ever existed); `DelText → Text` is irrelevant to the field-structure predicate. +- [x] 5.2 (T5b) Prove the `reject` half of the preservation lemma `∀ d, recursivelyWellformed d → validateFieldStructure (reject d) = true`, using 5.1 (which uses T0); combine 4.2 and 5.2 into the final `field_structure_preserved` theorem. ## 6. `Spec.lean` rewire, axiom, closure, bridge, docs -- [ ] 6.1 (T6) Land the closure in `verification/lean/LeanSpike/Spec.lean`: rewire `OoxmlDoc`, `acceptAllChanges`, `rejectAllChanges`, `validateFieldStructure` from `axiom` to `abbrev` / `def` over the Tier 2 types; add the new `axiom compareDocumentXml_output_recursivelyWellformed` (with the engine-specific, fixture-only docstring from `design.md`); replace the `sorry` at line 71 with the two-line proof composing the new axiom and `Tier2.InvFieldOne.field_structure_preserved`. Add one field-bearing case to `packages/docx-core/src/integration/lean-spec-bridge.test.ts` exercising the new axiom against a fixture-derived field-bearing input. Add `verification/lean/Tier2/README.md` scaffolding. Extend `verification/lean/README.md` Specification Gap section to record what Tier 2 closes vs. what remains, naming `compareDocumentXml_output_recursivelyWellformed` as the single named residual axiom. Update `verification/ROADMAP.md` Tier 2 section to reflect the "preservation + named residual axiom" framing. +- [x] 6.1 (T6) Land the closure in `verification/lean/LeanSpike/Spec.lean`: rewire `OoxmlDoc`, `acceptAllChanges`, `rejectAllChanges`, `validateFieldStructure` from `axiom` to `abbrev` / `def` over the Tier 2 types; add the new `axiom compareDocumentXml_output_recursivelyWellformed` (with the engine-specific, fixture-only docstring from `design.md`); replace the `sorry` at line 71 with the two-line proof composing the new axiom and `Tier2.InvFieldOne.field_structure_preserved`. Add one field-bearing case to `packages/docx-core/src/integration/lean-spec-bridge.test.ts` exercising the new axiom against a fixture-derived field-bearing input. Add `verification/lean/Tier2/README.md` scaffolding. Extend `verification/lean/README.md` Specification Gap section to record what Tier 2 closes vs. what remains, naming `compareDocumentXml_output_recursivelyWellformed` as the single named residual axiom. Update `verification/ROADMAP.md` Tier 2 section to reflect the "preservation + named residual axiom" framing. ## Out of scope (separate future changes) diff --git a/packages/docx-core/src/integration/lean-spec-bridge.test.ts b/packages/docx-core/src/integration/lean-spec-bridge.test.ts index ab3765a..bc9e181 100644 --- a/packages/docx-core/src/integration/lean-spec-bridge.test.ts +++ b/packages/docx-core/src/integration/lean-spec-bridge.test.ts @@ -47,6 +47,8 @@ */ import fc from 'fast-check'; +import JSZip from 'jszip'; +import { DOMParser } from '@xmldom/xmldom'; import { describe } from 'vitest'; import { compareDocuments, type ReconstructionMode } from '../index.js'; import { validateFieldStructure } from '../baselines/atomizer/pipeline.js'; @@ -674,6 +676,109 @@ async function assertRoundTripInvariant( } } +// ============================================================================= +// Field-bearing falsifiability layer for `compareDocumentXml_output_recursivelyWellformed` +// +// The new Tier 2 axiom in `verification/lean/LeanSpike/Spec.lean` asserts that +// inplace comparison output satisfies `recursivelyWellformed`: the whole +// document passes `validateFieldStructure`, AND every `w:ins` / `w:del` / +// `w:moveFrom` / `w:moveTo` wrapper subtree is `fieldContextNeutral`. +// +// The fast-check generators above are field-free and only check the *consequence* +// of the axiom (validateFieldStructure post-accept/reject). The single fixture +// case below exercises a TS-side analogue of the *precondition* itself against +// the live engine — a falsifiability layer, NOT empirical grounding for a +// universal claim. +// ============================================================================= + +async function buildFieldDocx(bodyXml: string): Promise { + const documentXml = + `` + + `` + + `${bodyXml}`; + const contentTypesXml = + `` + + `` + + `` + + `` + + `` + + ``; + const rootRelsXml = + `` + + `` + + `` + + ``; + const zip = new JSZip(); + zip.file('[Content_Types].xml', contentTypesXml); + zip.file('_rels/.rels', rootRelsXml); + zip.file('word/document.xml', documentXml); + return await zip.generateAsync({ type: 'nodebuffer' }); +} + +/** + * TS-side analogue of `Tier2.FieldStructure.fieldContextNeutral` for one wrapper + * subtree. Walks every descendant `w:fldChar` / `w:instrText` / `w:delInstrText` + * atom in document order over a depth-indexed `pastSeparatorAtDepth` stack — + * the exact model the Lean walk and `pipeline.ts:374-389` use. + * + * Pop-on-empty and separate-on-empty are treated as FAILURE (not the no-op the + * whole-document walk uses): an inner subtree that pops or flips a separator bit + * on the empty local stack would disturb an outer field context, so it is not + * context-neutral under the universal quantifier `∀ ctx`. + */ +function isFieldContextNeutral(wrapper: Element): boolean { + const descendants = wrapper.getElementsByTagName('*'); + const stack: boolean[] = []; + for (let i = 0; i < descendants.length; i++) { + const el = descendants[i]; + if (!el) continue; + const tag = el.nodeName; + if (tag === 'w:fldChar') { + const kind = el.getAttribute('w:fldCharType'); + if (kind === 'begin') { + stack.push(false); + } else if (kind === 'separate') { + if (stack.length === 0) return false; + stack[stack.length - 1] = true; + } else if (kind === 'end') { + if (stack.length === 0) return false; + stack.pop(); + } + } else if (tag === 'w:instrText' || tag === 'w:delInstrText') { + if (stack.length === 0 || stack[stack.length - 1] === true) return false; + } + } + return stack.length === 0; +} + +function assertRecursivelyWellformed( + invariant: string, + context: Record, + combinedXml: string, +): void { + if (!validateFieldStructure(combinedXml)) { + throw new Error( + `${invariant}: triage=engine-bug whole-document validateFieldStructure failed on ` + + `inplace comparison output. context=${JSON.stringify(context)}`, + ); + } + const doc = new DOMParser().parseFromString(combinedXml, 'application/xml'); + for (const tag of ['w:ins', 'w:del', 'w:moveFrom', 'w:moveTo']) { + const wrappers = doc.getElementsByTagName(tag); + for (let i = 0; i < wrappers.length; i++) { + const wrapper = wrappers[i]; + if (!wrapper) continue; + if (!isFieldContextNeutral(wrapper as unknown as Element)) { + throw new Error( + `${invariant}: triage=engine-bug wrapper subtree <${tag}>[${i}] is not ` + + `field-context-neutral — recursivelyWellformed precondition violated. ` + + `context=${JSON.stringify(context)}`, + ); + } + } + } +} + describe('Lean Spec Bridge - Inplace Reconstruction', { timeout: 60_000 }, () => { test( 'INV-FIELD-001: field structure preserved after accept-all and reject-all on inplace comparison output', @@ -792,4 +897,170 @@ describe('Lean Spec Bridge - Inplace Reconstruction', { timeout: 60_000 }, () => }); }, ); + + test( + 'INV-FIELD-001: field-bearing inplace comparison output is recursivelyWellformed (axiom falsifiability layer)', + async ({ given, when, then }: AllureBddContext) => { + await given('a field-free original and a revised document with a complete NUMPAGES field inserted', async () => {}); + + await when('the live inplace comparison output is computed', async () => {}); + + await then( + 'the combined document validates and every wrapper subtree is field-context-neutral', + async () => { + const field = + `` + + ` NUMPAGES ` + + `` + + `3` + + ``; + const original = await buildFieldDocx( + `Total pages here.`, + ); + const revised = await buildFieldDocx( + `Total pages ${field} here.`, + ); + + const result = await compareDocumentBuffers(original, revised); + const context = { fixture: 'numpages-field-insert' }; + + assertInplaceResult('INV-FIELD-001 field-bearing', context, result); + // The new axiom's precondition: recursivelyWellformed on inplace output. + assertRecursivelyWellformed('INV-FIELD-001 field-bearing', context, result.combined); + // The axiom's consequence: field structure survives accept/reject. + assertFieldInvariant('INV-FIELD-001 field-bearing', context, result.combined); + }, + ); + }, + ); + + test( + 'INV-FIELD-001: deleting a complete field produces recursivelyWellformed inplace output (delInstrText axiom coverage)', + async ({ given, when, then }: AllureBddContext) => { + await given( + 'an original document containing a complete NUMPAGES field and a revised document with the field deleted', + async () => {}, + ); + + await when('the live inplace comparison output is computed', async () => {}); + + await then( + 'the combined document validates and every wrapper subtree (including the containing w:delInstrText) is field-context-neutral', + async () => { + // safe-docx's inplace atomizer emits deleted complete fields as a single + // containing the full begin/instrText/separate/result/end run + // sequence (inPlaceModifier.ts:938). After conversion, the instrText + // becomes delInstrText. This fixture exercises the w:delInstrText atom + // case in `isFieldContextNeutral` (which the NUMPAGES insertion fixture + // never reaches). + const field = + `` + + ` NUMPAGES ` + + `` + + `3` + + ``; + const original = await buildFieldDocx( + `Total pages ${field} here.`, + ); + const revised = await buildFieldDocx( + `Total pages here.`, + ); + + const result = await compareDocumentBuffers(original, revised); + const context = { fixture: 'numpages-field-delete' }; + + assertInplaceResult('INV-FIELD-001 field-bearing delete', context, result); + assertRecursivelyWellformed('INV-FIELD-001 field-bearing delete', context, result.combined); + assertFieldInvariant('INV-FIELD-001 field-bearing delete', context, result.combined); + }, + ); + }, + ); + + test( + 'isFieldContextNeutral rejects standalone separator, end, and begin+separate fragments (regression guard)', + async ({ given, when, then }: AllureBddContext) => { + const cases: { name: string; xml: string }[] = []; + + await given( + 'three crafted wrapper XML fragments that each disturb the outer field context (standalone separate, standalone end, begin+separate)', + () => { + cases.push( + { + name: 'standalone separate', + xml: + `` + + `` + + ``, + }, + { + name: 'standalone end', + xml: + `` + + `` + + ``, + }, + { + name: 'begin without matching end', + xml: + `` + + `` + + `` + + ``, + }, + ); + }, + ); + + await when('isFieldContextNeutral is applied to each', () => {}); + + await then('every fragment is rejected as not context-neutral', () => { + for (const c of cases) { + const doc = new DOMParser().parseFromString(c.xml, 'application/xml'); + const wrapper = doc.documentElement as unknown as Element; + if (!wrapper) throw new Error(`${c.name}: failed to parse wrapper`); + if (isFieldContextNeutral(wrapper)) { + throw new Error( + `isFieldContextNeutral regression: case "${c.name}" should be non-neutral but returned true`, + ); + } + } + }); + }, + ); + + test( + 'isFieldContextNeutral accepts a wrapper containing a complete self-contained field (regression guard)', + async ({ given, when, then }: AllureBddContext) => { + let wrapper: Element | null = null; + let result = false; + + await given( + 'a wrapping a complete NUMPAGES begin/instrText/separate/result/end sequence', + () => { + const xml = + `` + + `` + + ` NUMPAGES ` + + `` + + `3` + + `` + + ``; + wrapper = new DOMParser().parseFromString(xml, 'application/xml') + .documentElement as unknown as Element; + }, + ); + + await when('isFieldContextNeutral is applied', () => { + if (!wrapper) throw new Error('wrapper failed to parse'); + result = isFieldContextNeutral(wrapper); + }); + + await then('the wrapper is accepted as context-neutral', () => { + if (!result) { + throw new Error('isFieldContextNeutral regression: complete-field wrapper should be neutral'); + } + }); + }, + ); }); diff --git a/verification/ROADMAP.md b/verification/ROADMAP.md index b09c804..dbf5df9 100644 --- a/verification/ROADMAP.md +++ b/verification/ROADMAP.md @@ -1,6 +1,6 @@ # safe-docx Verification — Roadmap -**Status (2026-05-12)**: Stages 1-6 of the Lean 4 verification spike shipped via PR #164 (merged 2026-05-11). Tiers 1, 1.5, and 1.6 are complete; Tier 2 kickoff is in progress via OpenSpec change `add-ooxml-doc-subset-and-inv-field-001-proof` (issue #201); Tier 2.5 / 3 / 3+ remain not started. +**Status (2026-05-15)**: Stages 1-6 of the Lean 4 verification spike shipped via PR #164 (merged 2026-05-11). Tiers 1, 1.5, and 1.6 are complete. Tier 2 is **in progress**: OpenSpec change `add-ooxml-doc-subset-and-inv-field-001-proof` (issue #201) lands the definitional `OoxmlDoc` subset and **closes `inv_field_001`** as "preservation lemma + single named residual axiom". `inv_rt_001` remains `sorry`'d (deferred to `add-inv-rt-001-proof`); Tier 2.5 / 3 / 3+ remain not started. ## How to use this document @@ -40,9 +40,9 @@ This tier is the tooling-validation layer. It shows that Lean 4 plus mathlib can Specification targets are stated over an uninterpreted Lean signature plus an empirical bridge against the live TS engine. Lives in `verification/lean/LeanSpike/Spec.lean` and `packages/docx-core/src/integration/lean-spec-bridge.test.ts`. -- `INV-FIELD-001` — **sorry'd**. Field-structure preservation across accept/reject of inplace comparison output. `Spec.lean`. +- `INV-FIELD-001` — shipped **sorry'd** in Tier 1.5; **closed in Tier 2** (see below). `Spec.lean`. - `INV-RT-001` — **sorry'd**. Paired round-trip text equality under normalization. `Spec.lean`. -- fast-check bridge — empirically exercises both invariants at 100 runs/property × 2 properties against the live TS engine, gated on `reconstructionModeUsed === 'inplace'`. 0 falsifications to date. +- fast-check bridge — empirically exercises both invariants at 100 runs/property against the live TS engine, gated on `reconstructionModeUsed === 'inplace'`. 0 falsifications to date. Tier 2 adds one field-bearing fixture case as a falsifiability layer for the Tier 2 residual axiom. This tier is intentionally not a proof claim about the production engine. It is a named specification surface plus a falsifiability layer over live runtime behavior. The TS evidence motivates the targets, but it does not justify universal Lean theorems while `Spec.lean` is still an uninterpreted axiom surface. @@ -70,29 +70,48 @@ Status: Stage 6 of the spike shipped on PR #164. Single workflow file at `.githu This tier is operational, not mathematical. It does not expand proof scope; it preserves the value of Tier 1 and Tier 1.5 by making toolchain regressions visible in normal PR flow. -## Tier 2 — Definitional `OoxmlDoc` model + closed proofs of `Spec.lean` invariants - -**Status: NOT STARTED.** - -The `Spec.lean` theorems are currently sorry'd over a fully uninterpreted axiom signature. Closing them requires replacing the axioms with a definitional Lean model of OOXML, or at least a tractable subset, and then proving the invariants by structural induction on that model. - -Estimated rough effort at the spike's intermittent agent-driven cadence: **4-12 months elapsed**. The dominant risk is not proof tactics; it is choosing a document model that is faithful enough to matter and narrow enough to stay provable. - -Sub-deliverables: - -- `Tier2.OoxmlModel` — definitional Lean datatypes for a minimal but faithful OOXML subset: paragraphs, runs, ins/del markers, basic field markup (`w:fldChar`, `w:instrText`), and text content. -- `Tier2.AcceptReject` — definitional Lean operations mirroring `acceptAllChanges` and `rejectAllChanges` from `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts`. -- `Tier2.FieldStructure` — definitional Lean predicate mirroring `validateFieldStructure` from `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-402`. -- `Tier2.INV_FIELD_001` — closed proof replacing the `INV-FIELD-001` sorry in `Spec.lean`. -- `Tier2.INV_RT_001` — closed proof replacing the `INV-RT-001` sorry in `Spec.lean`. - -The central design question is the model boundary. Too narrow, and the proof stops saying anything about real bugs. Too broad, and the definitions become a second OOXML engine inside Lean. - -Practical unknowns before Tier 2 starts: - -- Whether fields should be modeled syntactically, semantically, or both. -- How much of run splitting and paragraph structure is necessary to state the existing invariants cleanly. -- Whether the right first target is full accept/reject semantics or a smaller normalization layer that factors the proofs. +## Tier 2 — Definitional `OoxmlDoc` model + closed proof of `inv_field_001` + +**Status: IN PROGRESS.** `inv_field_001` is closed; `inv_rt_001` deferred. + +OpenSpec change `add-ooxml-doc-subset-and-inv-field-001-proof` (issue #201) +replaces the uninterpreted document-level axioms with a definitional Lean model +of a tractable OOXML subset and **closes `inv_field_001`**. The closure is framed +as **"machine-checked preservation lemma + single named residual axiom"**, not a +discharge of the comparison engine: the proof is honest about exactly one +remaining assumption rather than hiding it in a hand-wave. + +Delivered: + +- `Tier2/OoxmlModel.lean` — definitional datatypes for a tree-structured OOXML + subset: paragraphs, runs, `ins`/`del`/`moveFrom`/`moveTo` wrappers, and + `w:fldChar` / `w:instrText` / `w:delInstrText` field atoms. +- `Tier2/FieldStructure.lean` — the stack-valued field-context walk (mirroring + `pastSeparatorAtDepth: number[]` at `pipeline.ts:374-389`), definitional + `validateFieldStructure` (`pipeline.ts:352-402`), `fieldContextNeutral`, and + the recursive precondition `recursivelyWellformed`. +- `Tier2/WalkLemmas.lean` — generic walk lemmas (`walkBlocks_append`, + context-neutral no-op, `neutral_balanced`, `delInstrText` rewrite-safety). +- `Tier2/AcceptReject.lean` — definitional `accept` / `reject` mirroring + `trackChangesAcceptorAst.ts:368-659`. +- `Tier2/InvFieldOne.lean` — **closed** preservation lemma + `field_structure_preserved` (zero `sorry`). +- `LeanSpike/Spec.lean` — `OoxmlDoc` / `acceptAllChanges` / `rejectAllChanges` / + `validateFieldStructure` rewired from `axiom` to the Tier 2 definitions; + `inv_field_001` closed by composing `field_structure_preserved` with the single + named residual axiom `compareDocumentXml_output_recursivelyWellformed`. + +The residual axiom asserts that this repo's inplace atomizer output is +`recursivelyWellformed`. Discharging it by modeling `compareDocumentXml` +definitionally is Tier 3 work. + +Still open in Tier 2 / deferred: + +- `inv_rt_001` — paragraph round-trip text equality. Deferred to the + `add-inv-rt-001-proof` successor change (`extractTextWithParagraphs` and + `normalizeText` stay axiomatic until then). +- A full field-bearing fast-check arbitrary for the bridge test — only one + fixture case ships here; the arbitrary opens as `add-field-bearing-bridge-arbitrary`. Tier 2 is the first place where the roadmap becomes specification-heavy enough to deserve an OpenSpec change on start. diff --git a/verification/lean/LeanSpike.lean b/verification/lean/LeanSpike.lean index c99a28f..c789137 100644 --- a/verification/lean/LeanSpike.lean +++ b/verification/lean/LeanSpike.lean @@ -2,3 +2,4 @@ import LeanSpike.Atom import LeanSpike.AtomsEqual import LeanSpike.Lcs import LeanSpike.Spec +import Tier2 diff --git a/verification/lean/LeanSpike/Spec.lean b/verification/lean/LeanSpike/Spec.lean index 57cb306..6330f0b 100644 --- a/verification/lean/LeanSpike/Spec.lean +++ b/verification/lean/LeanSpike/Spec.lean @@ -1,11 +1,18 @@ import Mathlib.Data.List.Basic +import Tier2.OoxmlModel +import Tier2.FieldStructure +import Tier2.AcceptReject +import Tier2.InvFieldOne namespace LeanSpike -/-- Abstract Lean document type for the `document.xml` surface threaded through the +/-- The Lean document type for the `document.xml` surface threaded through the atomizer comparison and safety-check pipeline in - `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-817`. -/ -axiom OoxmlDoc : Type + `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-817`. + + As of Tier 2 this is no longer an opaque `axiom`: it is the definitional + tree-structured OOXML subset `Tier2.OoxmlModel.Doc`. -/ +abbrev OoxmlDoc : Type := Tier2.OoxmlModel.Doc /-- Abstract Lean symbol for the **inplace-mode** comparison-output XML produced by `compareDocumentsAtomizer` as `newDocumentXml` in @@ -20,33 +27,88 @@ axiom OoxmlDoc : Type falls back to rebuild mode and the inplace candidate that the Stage 4 specs would have constrained is never emitted. - The rebuild-mode output (`modifyRevisedDocument` branch at `pipeline.ts:638` - vs `reconstructDocument` branch at `pipeline.ts:647`) bypasses - `evaluateSafetyChecks`, so the Stage 4 specifications below only target the - inplace surface — not arbitrary comparison output, and not the rebuild - fallback. -/ + This **remains axiomatic**: modeling `compareDocumentXml` definitionally is + Tier 3 work. The residual obligation about its output well-formedness is + captured by the named axiom `compareDocumentXml_output_recursivelyWellformed` + below. -/ axiom compareDocumentXml : OoxmlDoc → OoxmlDoc → Option OoxmlDoc -/-- Abstract Lean symbol mirroring `acceptAllChanges` in - `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:368-506`. -/ -axiom acceptAllChanges : OoxmlDoc → OoxmlDoc +/-- `acceptAllChanges`, mirroring + `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:368-506`. + As of Tier 2 this is the definitional `Tier2.AcceptReject.accept`. -/ +def acceptAllChanges : OoxmlDoc → OoxmlDoc := Tier2.AcceptReject.accept -/-- Abstract Lean symbol mirroring `rejectAllChanges` in - `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:519-626`. -/ -axiom rejectAllChanges : OoxmlDoc → OoxmlDoc +/-- `rejectAllChanges`, mirroring + `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:509-659`. + As of Tier 2 this is the definitional `Tier2.AcceptReject.reject`. -/ +def rejectAllChanges : OoxmlDoc → OoxmlDoc := Tier2.AcceptReject.reject -/-- Abstract Lean symbol mirroring `validateFieldStructure` in - `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-402`. -/ -axiom validateFieldStructure : OoxmlDoc → Bool +/-- `validateFieldStructure`, mirroring + `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-402`. + As of Tier 2 this is the definitional `Tier2.FieldStructure.validateFieldStructure`. -/ +def validateFieldStructure : OoxmlDoc → Bool := Tier2.FieldStructure.validateFieldStructure /-- Abstract Lean symbol mirroring `extractTextWithParagraphs` in - `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:660-688`. -/ + `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:660-688`. + Remains axiomatic — owned by the `inv_rt_001` successor change. -/ axiom extractTextWithParagraphs : OoxmlDoc → String /-- Abstract Lean symbol mirroring `normalizeText` in - `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:701-711`. -/ + `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:701-711`. + Remains axiomatic — owned by the `inv_rt_001` successor change. -/ axiom normalizeText : String → String +/-- **Residual obligation.** This repo's inplace atomizer output (`compareDocumentXml` + in inplace mode, `pipeline.ts:635-650` then the inplace path at `pipeline.ts:669`) + is recursively well-formed under the stack-valued field context — i.e. it + satisfies `Tier2.FieldStructure.recursivelyWellformed`: the whole document + passes `validateFieldStructure`, and every `w:ins` / `w:del` / `w:moveFrom` / + `w:moveTo` wrapper subtree (transitively) is `fieldContextNeutral`. + + This axiom is the single load-bearing assumption behind the `inv_field_001` + closure. Tier 3 will discharge it by modeling `compareDocumentXml` + definitionally. + + **Why `fieldContextNeutral` (the `∀ ctx` strength) holds for THIS engine.** + safe-docx's current inplace atomizer emits whole field sequences as a single + track-change wrapper (`` or `` around a complete + begin/instrText/separate/result/end run sequence), never splitting a field + across wrapper boundaries. See: + * `inPlaceModifier.ts:717` — `getAtomRuns` returns all field runs as one unit. + * `inPlaceModifier.ts:938` — deleted field replay wraps cloned field runs in a + single ``. + * `inPlaceModifier.ts:1505, 1671` — split logic explicitly skips collapsed + fields and field characters. + * `inPlaceModifier.ts:1957, 2300` — insert/move-destination handlers wrap + the whole atom-run set in a single wrapper. + * `collapsed-field-inplace.test.ts:211` — tests assert multi-run complete + field wrappers, not partial wrappers. + + **Why this is engine-specific, not spec-conformant.** Under ECMA-376 Part 4 + (DeletedFieldCode and fldChar topics), a fully conformant track-change + emitter MUST fragment fields across wrapper boundaries when a field is + modified: `w:fldChar` is strictly barred from ``, so a modified field + has its `w:fldChar begin/separate/end` markers unwrapped at the run-sibling + level while ``/`` wrap only the changed `w:instrText` / + `w:delInstrText` payloads. Such fragmented wrapper subtrees are NOT + `fieldContextNeutral` under `∀ ctx`. If/when the safe-docx engine becomes + ECMA-376-conformant for field modifications (tracked in #217), this axiom + and the per-subtree neutrality predicate will need to be replaced with a + document-level preservation property; the Lean proof in + `Tier2.InvFieldOne` will be refactored accordingly. + + Evidence as of this change is the existing field-free fast-check bridge + cases (`packages/docx-core/src/integration/lean-spec-bridge.test.ts` + explicitly excludes field-bearing inputs and only checks the *consequence* — + `validateFieldStructure` post-accept/reject — not the recursive precondition + itself) plus dedicated field-bearing fixtures added by this change as a + falsifiability layer (NUMPAGES insertion, NUMPAGES deletion). The axiom is + engine-specific to this repo's inplace atomizer, universal in `(a, b)`, and + load-bearing — NOT empirically grounded across the full ECMA-376 surface. -/ +axiom compareDocumentXml_output_recursivelyWellformed : + ∀ a b combined, compareDocumentXml a b = some combined → + Tier2.FieldStructure.recursivelyWellformed combined + /-- INV-FIELD-001: field-structure preservation across accept-all and reject-all, scoped to the successful inplace-mode comparison output `compareDocumentXml a b = some combined`. Doc pairs for which inplace mode @@ -59,16 +121,19 @@ axiom normalizeText : String → String (`evaluateSafetyChecks`), whose actual field-structure call site at `pipeline.ts:439-440` is `validateFieldStructure(acceptedXml) && validateFieldStructure(rejectedXml)`. - The check is gated on `reconstructionMode === 'inplace'` at `pipeline.ts:669`; - rebuild-mode candidates bypass it entirely. The TypeScript does not claim that - accept/reject repairs malformed arbitrary input, so the Lean statement is - intentionally scoped to a *successful* inplace comparison candidate. -/ + + As of Tier 2 this theorem is **closed** with a complete machine-checked + proof: it composes the named residual axiom + `compareDocumentXml_output_recursivelyWellformed` with the preservation + lemma `Tier2.InvFieldOne.field_structure_preserved`. -/ theorem inv_field_001 : ∀ (a b combined : OoxmlDoc), compareDocumentXml a b = some combined → validateFieldStructure (acceptAllChanges combined) = true ∧ validateFieldStructure (rejectAllChanges combined) = true := by - sorry + intro a b combined h + have hRW := compareDocumentXml_output_recursivelyWellformed a b combined h + exact Tier2.InvFieldOne.field_structure_preserved combined hRW /-- INV-RT-001: paired round-trip text equality under normalization, with accept-all recovering `b` and reject-all recovering `a`. Scoped to the @@ -83,8 +148,10 @@ theorem inv_field_001 : `packages/docx-core/src/integration/nvca-coi-regression.test.ts:77-103`, together with the text helpers at `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:660-711`. - The statement is intentionally paired, not one-sided, because the test suite - checks both directions modulo `normalizeText`. -/ + + This theorem **remains unproved** — it is explicitly deferred to the + `add-inv-rt-001-proof` successor change, which owns `extractTextWithParagraphs` + and `normalizeText`. -/ theorem inv_rt_001 : ∀ (a b combined : OoxmlDoc), compareDocumentXml a b = some combined → diff --git a/verification/lean/README.md b/verification/lean/README.md index 205095f..fe8a87d 100644 --- a/verification/lean/README.md +++ b/verification/lean/README.md @@ -13,6 +13,8 @@ This spike now includes a Lean model of the atom-level LCS computation from `pac - `LeanSpike/AtomsEqual.lean` - `LeanSpike/Lcs.lean` - `LeanSpike/Spec.lean` +- `Tier2.lean` and `Tier2/` — the Tier 2 definitional `OoxmlDoc` subset and the + closed `inv_field_001` proof. See `Tier2/README.md`. ## Current proof scope @@ -24,14 +26,38 @@ Currently proved in this stage: - `INV-LCS-003` in `LeanSpike/Lcs.lean`: **strict index monotonicity.** The reported match pairs are pairwise strictly increasing in both original and revised indices. - `INV-LCS-004` in `LeanSpike/Lcs.lean`: **partition completeness.** Matched and deleted original indices partition `range original.length`, and matched and inserted revised indices partition `range revised.length`. -## Specification targets (sorry'd, uninterpreted signature) - -This stage also adds named specification targets over an uninterpreted Lean signature, intended for later empirical bridge/proof work. The motivating TypeScript checks and tests motivate these targets; with a fully uninterpreted `axiom` surface and two `sorry`s, the TS evidence does not justify a universal Lean claim on its own. The current Lean file keeps every document-level operation abstract via `axiom`, so `LeanSpike/Spec.lean` is a target surface for later stages rather than a closed proof artifact. - -- `INV-FIELD-001` in `LeanSpike/Spec.lean`: scope `validateFieldStructure` to the inplace-mode comparison output `compareDocumentXml a b`, then require both `acceptAllChanges` and `rejectAllChanges` to preserve field structure. This mirrors the syntactic scan in `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-402`, the actual field-structure call site at `pipeline.ts:439-440` inside `evaluateSafetyChecks` (`pipeline.ts:404-440`, gated on the inplace branch at `pipeline.ts:669`), and the inplace-mode comparison-output surface assigned at `pipeline.ts:635-650`. -- `INV-RT-001` in `LeanSpike/Spec.lean`: paired round-trip text recovery, with `acceptAllChanges` matching the revised document and `rejectAllChanges` matching the original document after `normalizeText ∘ extractTextWithParagraphs`. This mirrors the helper functions in `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:660-688` (`extractTextWithParagraphs`) and `trackChangesAcceptorAst.ts:701-711` (`normalizeText`), plus the gold-standard paired assertions in `packages/docx-core/src/integration/round-trip-inplace.test.ts:56-63` and `:87-94`, with a second paired fixture at `packages/docx-core/src/integration/nvca-coi-regression.test.ts:77-103`. - -`Spec.lean` must remain isolated from `AtomsEqual.lean` and `Lcs.lean`. The zero-sorry property of Stage 1-3 modules is preserved; `Spec.lean` is the only `sorry`-bearing file. +## Specification targets + +`LeanSpike/Spec.lean` carries two named specification targets. As of Tier 2, +`INV-FIELD-001` is **closed**; `INV-RT-001` remains a `sorry`. + +- `INV-FIELD-001` in `LeanSpike/Spec.lean`: **closed.** `validateFieldStructure`, + scoped to the inplace-mode comparison output `compareDocumentXml a b`, is + preserved by both `acceptAllChanges` and `rejectAllChanges`. This mirrors the + syntactic scan in `packages/docx-core/src/baselines/atomizer/pipeline.ts:352-402`, + the actual field-structure call site at `pipeline.ts:439-440` inside + `evaluateSafetyChecks` (`pipeline.ts:404-440`, gated on the inplace branch at + `pipeline.ts:669`), and the inplace-mode comparison-output surface assigned at + `pipeline.ts:635-650`. The closure rewires the `OoxmlDoc` / `acceptAllChanges` / + `rejectAllChanges` / `validateFieldStructure` axioms to the definitional Tier 2 + model (`Tier2/`) and composes the machine-checked preservation lemma + `Tier2.InvFieldOne.field_structure_preserved` with a single named residual + axiom — see the Specification Gap section below. +- `INV-RT-001` in `LeanSpike/Spec.lean`: **still `sorry`'d.** Paired round-trip + text recovery, with `acceptAllChanges` matching the revised document and + `rejectAllChanges` matching the original document after + `normalizeText ∘ extractTextWithParagraphs`. This mirrors the helper functions + in `packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:660-688` + (`extractTextWithParagraphs`) and `trackChangesAcceptorAst.ts:701-711` + (`normalizeText`), plus the gold-standard paired assertions in + `packages/docx-core/src/integration/round-trip-inplace.test.ts:56-63` and + `:87-94`, with a second paired fixture at + `packages/docx-core/src/integration/nvca-coi-regression.test.ts:77-103`. It is + deferred to the `add-inv-rt-001-proof` successor change. + +The zero-sorry property of every non-`Spec.lean` module (Stage 1-3 modules and +all of `Tier2/`) is preserved; `Spec.lean` is the only `sorry`-bearing file, and +it now carries exactly one `sorry` (`inv_rt_001`). For interactive auditing, inspect `#print sorries`, `#print axioms inv_field_001`, and `#print axioms inv_rt_001`. @@ -40,8 +66,9 @@ For interactive auditing, inspect `#print sorries`, `#print axioms inv_field_001 This spike still does not cover: - closed proofs of reconstruction invariants -- closed proofs of round-trip text equality -- closed proofs of field-structure / document-shape preservation +- closed proofs of round-trip text equality (`inv_rt_001` is still `sorry`'d) +- a discharged proof of `compareDocumentXml_output_recursivelyWellformed` — the + `inv_field_001` closure carries it as a named residual axiom (Tier 3 work) - extensional equivalence between the Lean model and the production TypeScript implementation ## Lean model @@ -56,6 +83,27 @@ The projection intentionally flattens `contentElement.textContent ?? ""` into a ## Specification Gap +### Tier 2 residual axiom (`inv_field_001`) + +The closed `inv_field_001` proof carries exactly one named residual axiom: +`compareDocumentXml_output_recursivelyWellformed` in `LeanSpike/Spec.lean`. It +asserts that this repo's inplace atomizer output satisfies +`Tier2.FieldStructure.recursivelyWellformed` — scoped to this repo's inplace +atomizer, NOT to OOXML comparison engines in general. Discharging it by modeling +`compareDocumentXml` definitionally is **Tier 3** work. Extensional equivalence +between the Lean `accept` / `reject` and the production TS +`acceptAllChanges` / `rejectAllChanges` is **Tier 2.5** work and is not +established here. The production engine's runtime `validateFieldStructure` check +is not made redundant by this proof. The model also deliberately narrows the TS +paragraph-removal logic (`trackChangesAcceptorAst.ts:411,456,564`) by treating +only wrapper blocks as substantive. Full detail is in `Tier2/README.md`. + +A TS-side falsifiability layer for the residual axiom — one field-bearing fixture +case checking a TS analogue of `recursivelyWellformed` against the live engine — +lives in `packages/docx-core/src/integration/lean-spec-bridge.test.ts`. + +### Tier 1 `Atom` projection gap + The Lean `Atom` is intentionally narrower than the TypeScript `ComparisonUnitAtom` in `packages/docx-core/src/core-types.ts`. The projection omits the nested `contentElement` object itself and all non-equality metadata, including: @@ -94,10 +142,14 @@ lake update lake build ``` -To confirm the Stage 1-3 proof modules remain `sorry`-free, and that `Spec.lean` -contains only the two intentional placeholders: +To confirm every non-`Spec.lean` module remains `sorry`-free, and that `Spec.lean` +contains only the single intentional placeholder (`inv_rt_001`): ```bash -rg -n "sorry" LeanSpike/AtomsEqual.lean LeanSpike/Lcs.lean # must be empty -rg -n "sorry" LeanSpike/Spec.lean # exactly 2 hits +grep -rnw "sorry" LeanSpike/AtomsEqual.lean LeanSpike/Lcs.lean Tier2/ # must be empty +grep -cw "sorry" LeanSpike/Spec.lean # exactly 1 hit ``` + +For interactive auditing, inspect `#print axioms LeanSpike.inv_field_001` — it +depends only on `propext`, `Quot.sound`, `compareDocumentXml`, and the single +residual axiom `compareDocumentXml_output_recursivelyWellformed` (no `sorryAx`). diff --git a/verification/lean/Tier2.lean b/verification/lean/Tier2.lean new file mode 100644 index 0000000..1ed4d95 --- /dev/null +++ b/verification/lean/Tier2.lean @@ -0,0 +1,5 @@ +import Tier2.OoxmlModel +import Tier2.FieldStructure +import Tier2.WalkLemmas +import Tier2.AcceptReject +import Tier2.InvFieldOne diff --git a/verification/lean/Tier2/AcceptReject.lean b/verification/lean/Tier2/AcceptReject.lean new file mode 100644 index 0000000..54cffbb --- /dev/null +++ b/verification/lean/Tier2/AcceptReject.lean @@ -0,0 +1,87 @@ +/- +Tier 2 — definitional `accept` / `reject`. + +Mirrors `acceptAllChanges` / `rejectAllChanges` in +`packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts:368-659` +at the granularity the `OoxmlModel` subset exposes. + + * `accept` (`trackChangesAcceptorAst.ts:368-506`): drop `del` / `moveFrom` + subtrees entirely; unwrap `ins` / `moveTo` (keep children); drop paragraphs + whose body collapses to empty. + * `reject` (`trackChangesAcceptorAst.ts:509-659`): drop `ins` / `moveTo`; + unwrap `del` / `moveFrom`; THEN rewrite `delText → text` and + `delInstrText → instrText` globally over the result — matching the TS line + ordering at `trackChangesAcceptorAst.ts:602-616`, which performs both + unwraps before the rename pass. Locality of `delInstrText` to deleted-content + wrappers is enforced by `recursivelyWellformed` on the *input*, not by the + bare `OoxmlModel` datatype. +-/ +import Tier2.FieldStructure + +namespace Tier2.AcceptReject + +open Tier2.OoxmlModel + +/-! ### accept -/ + +/-- Accept track changes within a block sequence. `other` containers are kept; + their children are recursively accepted. -/ +def acceptBlocks : List Block → List Block + | [] => [] + | .run r :: rest => .run r :: acceptBlocks rest + | .ins bs :: rest => acceptBlocks bs ++ acceptBlocks rest + | .moveTo bs :: rest => acceptBlocks bs ++ acceptBlocks rest + | .del _ :: rest => acceptBlocks rest + | .moveFrom _ :: rest => acceptBlocks rest + | .other tag bs :: rest => .other tag (acceptBlocks bs) :: acceptBlocks rest +termination_by bs => sizeOf bs + +/-- Accept all track changes in a document, dropping paragraphs that collapse to + an empty body. -/ +def accept : Doc → Doc + | [] => [] + | p :: ps => + if (acceptBlocks p.body).isEmpty then accept ps + else ⟨p.pPr, acceptBlocks p.body⟩ :: accept ps + +/-! ### reject -/ + +/-- Reject the wrapper structure within a block sequence: drop `ins` / `moveTo`, + unwrap `del` / `moveFrom`. `other` containers are kept; their children are + recursively rejected. The `delText` / `delInstrText` rename is a separate + global pass (`renameBlocks`). -/ +def rejectBlocks : List Block → List Block + | [] => [] + | .run r :: rest => .run r :: rejectBlocks rest + | .ins _ :: rest => rejectBlocks rest + | .moveTo _ :: rest => rejectBlocks rest + | .del bs :: rest => rejectBlocks bs ++ rejectBlocks rest + | .moveFrom bs :: rest => rejectBlocks bs ++ rejectBlocks rest + | .other tag bs :: rest => .other tag (rejectBlocks bs) :: rejectBlocks rest +termination_by bs => sizeOf bs + +/-- Rewrite a single deleted-content atom back to its accepted form. -/ +def renameAtom : Atom → Atom + | .delText s => .text s + | .delInstrText s => .instrText s + | a => a + +/-- The global `delText → text` / `delInstrText → instrText` rename pass. Applied + to every run after `rejectBlocks` completes (`trackChangesAcceptorAst.ts:602-616`). + Descends through `other` containers transparently, preserving their tags. -/ +def renameBlocks : List Block → List Block + | [] => [] + | .run r :: rest => .run ⟨r.rPr, r.content.map renameAtom⟩ :: renameBlocks rest + | .ins bs :: rest => .ins (renameBlocks bs) :: renameBlocks rest + | .del bs :: rest => .del (renameBlocks bs) :: renameBlocks rest + | .moveFrom bs :: rest => .moveFrom (renameBlocks bs) :: renameBlocks rest + | .moveTo bs :: rest => .moveTo (renameBlocks bs) :: renameBlocks rest + | .other tag bs :: rest => .other tag (renameBlocks bs) :: renameBlocks rest +termination_by bs => sizeOf bs + +/-- Reject all track changes in a document: unwrap then global rename. -/ +def reject : Doc → Doc + | [] => [] + | p :: ps => ⟨p.pPr, renameBlocks (rejectBlocks p.body)⟩ :: reject ps + +end Tier2.AcceptReject diff --git a/verification/lean/Tier2/FieldStructure.lean b/verification/lean/Tier2/FieldStructure.lean new file mode 100644 index 0000000..e48c8b1 --- /dev/null +++ b/verification/lean/Tier2/FieldStructure.lean @@ -0,0 +1,162 @@ +/- +Tier 2 — field-structure predicate. + +Definitional mirror of the two checks in `validateFieldStructure` +(`packages/docx-core/src/baselines/atomizer/pipeline.ts:352-402`): + + 1. global `w:fldChar` begin/end counts are balanced; + 2. every `w:instrText` / `w:delInstrText` sits inside an open, pre-`separate` + field body. + +Check (2) walks every atom in document order carrying a *stack* of +"separator-seen" bits indexed by field depth — exactly the TS engine's +`pastSeparatorAtDepth: number[]` at `pipeline.ts:374-389`. Round 3 of peer +review (see `design.md`) showed a single mutable boolean is unsound: an inner +`End` would reset a separator bit belonging to an outer field. + +`fieldContextNeutral` is the per-subtree strengthening that survives the three +round-3 counterexample families. `recursivelyWellformed` bundles the whole-doc +check with context-neutrality of every wrapper subtree. +-/ +import Tier2.OoxmlModel + +namespace Tier2.FieldStructure + +open Tier2.OoxmlModel + +/-- The field context: a stack of `pastSeparatorAtDepth` bits, innermost field on + top. `true` means the `w:separate` for that field has already been seen. -/ +abbrev FieldCtx : Type := List Bool + +/-- Result of walking a span of atoms/blocks: either a live context, or a + permanently-failed walk. `invalid` is absorbing. -/ +inductive WalkResult + | ok (ctx : FieldCtx) + | invalid + deriving DecidableEq, Repr, Inhabited + +/-- `true` iff the walk has not failed. -/ +def WalkResult.isValid : WalkResult → Bool + | .ok _ => true + | .invalid => false + +/-- Step the field-context walk across one atom. Mirrors `pipeline.ts:374-389`: + `begin` pushes a fresh `false`; `separate` sets the top bit (no-op on the + empty stack, matching the `if (depth > 0)` guard at `pipeline.ts:387`); + `end` pops (no-op on empty, matching `pipeline.ts:389`); `instrText` / + `delInstrText` require a non-empty stack whose top bit is `false`. -/ +def stepAtom (r : WalkResult) (a : Atom) : WalkResult := + match r with + | .invalid => .invalid + | .ok ctx => + match a with + | .text _ => .ok ctx + | .delText _ => .ok ctx + | .fldChar .begin => .ok (false :: ctx) + | .fldChar .separate => + match ctx with + | [] => .ok [] + | _ :: rest => .ok (true :: rest) + | .fldChar .endf => + match ctx with + | [] => .ok [] + | _ :: rest => .ok rest + | .instrText _ => + match ctx with + | false :: _ => .ok ctx + | _ => .invalid + | .delInstrText _ => + match ctx with + | false :: _ => .ok ctx + | _ => .invalid + +/-- Step the field-context walk across a run's atoms, left to right. -/ +def stepAtoms (r : WalkResult) (as : List Atom) : WalkResult := + as.foldl stepAtom r + +/-- Walk the field context across a block sequence in document order. Wrappers + (`ins` / `del` / `moveFrom` / `moveTo`) and transparent containers (`other`) + are all transparent for the walk: the walk simply descends into their + children, because the TS `validateFieldStructure` scans every element + regardless of tag (`pipeline.ts:396`). -/ +def walkBlocks : WalkResult → List Block → WalkResult + | r, [] => r + | r, .run run :: rest => walkBlocks (stepAtoms r run.content) rest + | r, .ins bs :: rest => walkBlocks (walkBlocks r bs) rest + | r, .del bs :: rest => walkBlocks (walkBlocks r bs) rest + | r, .moveFrom bs :: rest => walkBlocks (walkBlocks r bs) rest + | r, .moveTo bs :: rest => walkBlocks (walkBlocks r bs) rest + | r, .other _ bs :: rest => walkBlocks (walkBlocks r bs) rest +termination_by _ bs => sizeOf bs + +/-- Walk a single block. Provided for parity with `design.md`'s stated API; the + primary recursive object is `walkBlocks`. -/ +def stepBlock (r : WalkResult) (b : Block) : WalkResult := + walkBlocks r [b] + +/-- Atom-level predicate: this atom is a `w:fldChar` with `w:fldCharType="begin"`. -/ +def Atom.isBegin : Atom → Bool + | .fldChar .begin => true + | _ => false + +/-- Atom-level predicate: this atom is a `w:fldChar` with `w:fldCharType="end"`. -/ +def Atom.isEnd : Atom → Bool + | .fldChar .endf => true + | _ => false + +/-- Count atoms satisfying `p` across a block sequence, descending into wrappers + and transparent containers. -/ +def countBlocks (p : Atom → Bool) : List Block → Nat + | [] => 0 + | .run run :: rest => run.content.countP p + countBlocks p rest + | .ins bs :: rest => countBlocks p bs + countBlocks p rest + | .del bs :: rest => countBlocks p bs + countBlocks p rest + | .moveFrom bs :: rest => countBlocks p bs + countBlocks p rest + | .moveTo bs :: rest => countBlocks p bs + countBlocks p rest + | .other _ bs :: rest => countBlocks p bs + countBlocks p rest +termination_by bs => sizeOf bs + +/-- Check (1): global `w:fldChar` begin/end counts are equal. -/ +def fldCharBalanced (d : Doc) : Bool := + countBlocks Atom.isBegin d.blocks == countBlocks Atom.isEnd d.blocks + +/-- `validateFieldStructure` — definitional mirror of `pipeline.ts:352-402`. -/ +def validateFieldStructure (d : Doc) : Bool := + fldCharBalanced d && (walkBlocks (.ok []) d.blocks).isValid + +/-- A wrapper subtree's block list is *field-context-neutral* iff, scanned under + **any** outer field context, it leaves that context unchanged and never + produces `invalid`. Strictly stronger than per-subtree + `validateFieldStructure`; this is the predicate the preservation lemma needs + and the property `compareDocumentXml_output_recursivelyWellformed` asserts. -/ +def fieldContextNeutral (bs : List Block) : Prop := + ∀ ctx, walkBlocks (.ok ctx) bs = .ok ctx + +/-- Every track-change wrapper child block list in a block sequence, transitively. + `other` containers are NOT track-change wrappers — they are traversed but + their own children are not emitted as a wrapper subtree (only any wrappers + nested inside them are). -/ +def wrapperSubtreesBlocks : List Block → List (List Block) + | [] => [] + | .run _ :: rest => wrapperSubtreesBlocks rest + | .ins bs :: rest => (bs :: wrapperSubtreesBlocks bs) ++ wrapperSubtreesBlocks rest + | .del bs :: rest => (bs :: wrapperSubtreesBlocks bs) ++ wrapperSubtreesBlocks rest + | .moveFrom bs :: rest => (bs :: wrapperSubtreesBlocks bs) ++ wrapperSubtreesBlocks rest + | .moveTo bs :: rest => (bs :: wrapperSubtreesBlocks bs) ++ wrapperSubtreesBlocks rest + | .other _ bs :: rest => wrapperSubtreesBlocks bs ++ wrapperSubtreesBlocks rest +termination_by bs => sizeOf bs + +/-- Every wrapper subtree of `d` (transitively). -/ +def allWrapperSubtrees (d : Doc) : List (List Block) := + wrapperSubtreesBlocks d.blocks + +/-- Every wrapper subtree of a block sequence is field-context-neutral. -/ +def allNeutral (bs : List Block) : Prop := + ∀ sub ∈ wrapperSubtreesBlocks bs, fieldContextNeutral sub + +/-- The recursive well-formedness precondition of the preservation lemma: + the whole document validates, and every wrapper subtree is context-neutral. -/ +def recursivelyWellformed (d : Doc) : Prop := + validateFieldStructure d = true ∧ allNeutral d.blocks + +end Tier2.FieldStructure diff --git a/verification/lean/Tier2/InvFieldOne.lean b/verification/lean/Tier2/InvFieldOne.lean new file mode 100644 index 0000000..d29888d --- /dev/null +++ b/verification/lean/Tier2/InvFieldOne.lean @@ -0,0 +1,423 @@ +/- +Tier 2 — the preservation lemma. + +`field_structure_preserved`: for any document satisfying `recursivelyWellformed`, +both `accept` and `reject` produce output that satisfies `validateFieldStructure`. + +This is the core machine-checked content of the `inv_field_001` closure. The +proof consumes the generic walk lemmas from `WalkLemmas.lean`: + + * `accept` drops `del` / `moveFrom` subtrees and unwraps `ins` / `moveTo`. + Unwrapping is walk-transparent unconditionally (it is just list append, L1); + dropping a `fieldContextNeutral` subtree is a walk no-op (L2). Begin/end + balance survives because a context-neutral subtree is begin/end-balanced + (`neutral_balanced`). + * `reject` is symmetric; its global `delInstrText → instrText` rename pass is + walk- and count-transparent because `stepAtom` does not distinguish the two + atoms (L3, `delInstrText_rewrite_safe`). +-/ +import Tier2.AcceptReject +import Tier2.WalkLemmas + +namespace Tier2.InvFieldOne + +open Tier2.OoxmlModel Tier2.FieldStructure Tier2.AcceptReject Tier2.WalkLemmas + +/-! ### `allNeutral` decomposition -/ + +theorem allNeutral_run {r : Run} {rest : List Block} + (h : allNeutral (.run r :: rest)) : allNeutral rest := by + unfold allNeutral at h ⊢ + simpa only [wrapperSubtreesBlocks] using h + +theorem allNeutral_ins {bs rest : List Block} (h : allNeutral (.ins bs :: rest)) : + fieldContextNeutral bs ∧ allNeutral bs ∧ allNeutral rest := by + unfold allNeutral at h + simp only [wrapperSubtreesBlocks, List.mem_append, List.mem_cons] at h + refine ⟨h bs (Or.inl (Or.inl rfl)), ?_, ?_⟩ + · intro sub hsub; exact h sub (Or.inl (Or.inr hsub)) + · intro sub hsub; exact h sub (Or.inr hsub) + +theorem allNeutral_del {bs rest : List Block} (h : allNeutral (.del bs :: rest)) : + fieldContextNeutral bs ∧ allNeutral bs ∧ allNeutral rest := by + unfold allNeutral at h + simp only [wrapperSubtreesBlocks, List.mem_append, List.mem_cons] at h + refine ⟨h bs (Or.inl (Or.inl rfl)), ?_, ?_⟩ + · intro sub hsub; exact h sub (Or.inl (Or.inr hsub)) + · intro sub hsub; exact h sub (Or.inr hsub) + +theorem allNeutral_moveFrom {bs rest : List Block} + (h : allNeutral (.moveFrom bs :: rest)) : + fieldContextNeutral bs ∧ allNeutral bs ∧ allNeutral rest := by + unfold allNeutral at h + simp only [wrapperSubtreesBlocks, List.mem_append, List.mem_cons] at h + refine ⟨h bs (Or.inl (Or.inl rfl)), ?_, ?_⟩ + · intro sub hsub; exact h sub (Or.inl (Or.inr hsub)) + · intro sub hsub; exact h sub (Or.inr hsub) + +theorem allNeutral_moveTo {bs rest : List Block} + (h : allNeutral (.moveTo bs :: rest)) : + fieldContextNeutral bs ∧ allNeutral bs ∧ allNeutral rest := by + unfold allNeutral at h + simp only [wrapperSubtreesBlocks, List.mem_append, List.mem_cons] at h + refine ⟨h bs (Or.inl (Or.inl rfl)), ?_, ?_⟩ + · intro sub hsub; exact h sub (Or.inl (Or.inr hsub)) + · intro sub hsub; exact h sub (Or.inr hsub) + +/-- `.other` is NOT a track-change wrapper: its child block list is not added + as a wrapper subtree (only wrappers nested inside it are). So `allNeutral` + decomposes into `allNeutral` on children and on rest, with no extra + `fieldContextNeutral` obligation on the container's children. -/ +theorem allNeutral_other {tag : String} {bs rest : List Block} + (h : allNeutral (.other tag bs :: rest)) : + allNeutral bs ∧ allNeutral rest := by + unfold allNeutral at h + simp only [wrapperSubtreesBlocks, List.mem_append] at h + refine ⟨?_, ?_⟩ + · intro sub hsub; exact h sub (Or.inl hsub) + · intro sub hsub; exact h sub (Or.inr hsub) + +/-! ### Operations distribute over append -/ + +theorem acceptBlocks_append (l m : List Block) : + acceptBlocks (l ++ m) = acceptBlocks l ++ acceptBlocks m := by + induction l with + | nil => simp [acceptBlocks] + | cons b l ih => cases b <;> simp [acceptBlocks, ih, List.append_assoc] + +theorem rejectBlocks_append (l m : List Block) : + rejectBlocks (l ++ m) = rejectBlocks l ++ rejectBlocks m := by + induction l with + | nil => simp [rejectBlocks] + | cons b l ih => cases b <;> simp [rejectBlocks, ih, List.append_assoc] + +theorem renameBlocks_append (l m : List Block) : + renameBlocks (l ++ m) = renameBlocks l ++ renameBlocks m := by + induction l with + | nil => simp [renameBlocks] + | cons b l ih => cases b <;> simp [renameBlocks, ih] + +/-! ### Document-level block extraction -/ + +theorem accept_blocks (d : Doc) : + (accept d).blocks = acceptBlocks d.blocks := by + simp only [Doc.blocks] + induction d with + | nil => simp [accept, acceptBlocks] + | cons p ps ih => + rw [List.flatMap_cons, acceptBlocks_append] + simp only [accept] + split + · next hb => + have hnil : acceptBlocks p.body = [] := by simpa using hb + rw [hnil, List.nil_append] + exact ih + · next hb => + rw [List.flatMap_cons, ih] + +theorem reject_blocks (d : Doc) : + (reject d).blocks = renameBlocks (rejectBlocks d.blocks) := by + simp only [Doc.blocks] + induction d with + | nil => simp [reject, rejectBlocks, renameBlocks] + | cons p ps ih => + rw [List.flatMap_cons, rejectBlocks_append, renameBlocks_append] + simp only [reject] + rw [List.flatMap_cons, ih] + +/-! ### `renameBlocks` is walk- and count-transparent (L3) -/ + +theorem stepAtom_renameAtom (r : WalkResult) (a : Atom) : + stepAtom r (renameAtom a) = stepAtom r a := by + cases a with + | text s => rfl + | delText s => cases r <;> rfl + | instrText s => rfl + | delInstrText s => exact delInstrText_rewrite_safe r s s + | fldChar k => rfl + +theorem stepAtoms_renameAtom (r : WalkResult) (as : List Atom) : + stepAtoms r (as.map renameAtom) = stepAtoms r as := by + induction as generalizing r with + | nil => rfl + | cons a as ih => + rw [List.map_cons, stepAtoms_cons, stepAtoms_cons, stepAtom_renameAtom] + exact ih _ + +theorem walkBlocks_renameBlocks (r : WalkResult) (bs : List Block) : + walkBlocks r (renameBlocks bs) = walkBlocks r bs := by + have key : ∀ (bs : List Block) (r : WalkResult), + walkBlocks r (renameBlocks bs) = walkBlocks r bs := by + intro bs + induction bs using renameBlocks.induct with + | case1 => intro r; simp only [renameBlocks] + | case2 r' rest ih => + intro r + simp only [renameBlocks, walkBlocks] + rw [stepAtoms_renameAtom] + exact ih _ + | case3 bs rest ih1 ih2 => + intro r + simp only [renameBlocks, walkBlocks] + rw [ih1] + exact ih2 _ + | case4 bs rest ih1 ih2 => + intro r + simp only [renameBlocks, walkBlocks] + rw [ih1] + exact ih2 _ + | case5 bs rest ih1 ih2 => + intro r + simp only [renameBlocks, walkBlocks] + rw [ih1] + exact ih2 _ + | case6 bs rest ih1 ih2 => + intro r + simp only [renameBlocks, walkBlocks] + rw [ih1] + exact ih2 _ + | case7 _ bs rest ih1 ih2 => + intro r + simp only [renameBlocks, walkBlocks] + rw [ih1] + exact ih2 _ + exact key bs r + +theorem countBlocks_renameBlocks (p : Atom → Bool) + (hp : ∀ a, p (renameAtom a) = p a) (bs : List Block) : + countBlocks p (renameBlocks bs) = countBlocks p bs := by + induction bs using renameBlocks.induct with + | case1 => simp [renameBlocks, countBlocks] + | case2 r rest ih => + simp only [renameBlocks, countBlocks] + rw [List.countP_map] + have hfun : p ∘ renameAtom = p := funext hp + rw [hfun, ih] + | case3 bs rest ih1 ih2 => + simp only [renameBlocks, countBlocks]; rw [ih1, ih2] + | case4 bs rest ih1 ih2 => + simp only [renameBlocks, countBlocks]; rw [ih1, ih2] + | case5 bs rest ih1 ih2 => + simp only [renameBlocks, countBlocks]; rw [ih1, ih2] + | case6 bs rest ih1 ih2 => + simp only [renameBlocks, countBlocks]; rw [ih1, ih2] + | case7 _ bs rest ih1 ih2 => + simp only [renameBlocks, countBlocks]; rw [ih1, ih2] + +theorem isBegin_renameAtom (a : Atom) : Atom.isBegin (renameAtom a) = Atom.isBegin a := by + cases a <;> rfl + +theorem isEnd_renameAtom (a : Atom) : Atom.isEnd (renameAtom a) = Atom.isEnd a := by + cases a <;> rfl + +/-! ### Walk preservation for `acceptBlocks` / `rejectBlocks` -/ + +theorem walkBlocks_acceptBlocks : + ∀ (bs : List Block), allNeutral bs → + ∀ r, walkBlocks r (acceptBlocks bs) = walkBlocks r bs := by + intro bs + induction bs using acceptBlocks.induct with + | case1 => intro _ r; simp only [acceptBlocks] + | case2 r' rest ih => + intro hn r + simp only [acceptBlocks, walkBlocks] + exact ih (allNeutral_run hn) _ + | case3 bs rest ih1 ih2 => + intro hn r + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_ins hn + simp only [acceptBlocks, walkBlocks] + rw [walkBlocks_append, ih1 hnbs, ih2 hnrest] + | case4 bs rest ih1 ih2 => + intro hn r + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_moveTo hn + simp only [acceptBlocks, walkBlocks] + rw [walkBlocks_append, ih1 hnbs, ih2 hnrest] + | case5 bs rest ih => + intro hn r + obtain ⟨hcn, _, hnrest⟩ := allNeutral_del hn + simp only [acceptBlocks, walkBlocks] + rw [ih hnrest, walkBlocks_neutral_ok hcn r] + | case6 bs rest ih => + intro hn r + obtain ⟨hcn, _, hnrest⟩ := allNeutral_moveFrom hn + simp only [acceptBlocks, walkBlocks] + rw [ih hnrest, walkBlocks_neutral_ok hcn r] + | case7 _ bs rest ih1 ih2 => + intro hn r + obtain ⟨hnbs, hnrest⟩ := allNeutral_other hn + simp only [acceptBlocks, walkBlocks] + rw [ih1 hnbs, ih2 hnrest] + +theorem walkBlocks_rejectBlocks : + ∀ (bs : List Block), allNeutral bs → + ∀ r, walkBlocks r (rejectBlocks bs) = walkBlocks r bs := by + intro bs + induction bs using rejectBlocks.induct with + | case1 => intro _ r; simp only [rejectBlocks] + | case2 r' rest ih => + intro hn r + simp only [rejectBlocks, walkBlocks] + exact ih (allNeutral_run hn) _ + | case3 bs rest ih => + intro hn r + obtain ⟨hcn, _, hnrest⟩ := allNeutral_ins hn + simp only [rejectBlocks, walkBlocks] + rw [ih hnrest, walkBlocks_neutral_ok hcn r] + | case4 bs rest ih => + intro hn r + obtain ⟨hcn, _, hnrest⟩ := allNeutral_moveTo hn + simp only [rejectBlocks, walkBlocks] + rw [ih hnrest, walkBlocks_neutral_ok hcn r] + | case5 bs rest ih1 ih2 => + intro hn r + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_del hn + simp only [rejectBlocks, walkBlocks] + rw [walkBlocks_append, ih1 hnbs, ih2 hnrest] + | case6 bs rest ih1 ih2 => + intro hn r + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_moveFrom hn + simp only [rejectBlocks, walkBlocks] + rw [walkBlocks_append, ih1 hnbs, ih2 hnrest] + | case7 _ bs rest ih1 ih2 => + intro hn r + obtain ⟨hnbs, hnrest⟩ := allNeutral_other hn + simp only [rejectBlocks, walkBlocks] + rw [ih1 hnbs, ih2 hnrest] + +/-! ### Count rebalancing for `acceptBlocks` / `rejectBlocks` -/ + +theorem countBlocks_acceptBlocks_balance : + ∀ (bs : List Block), allNeutral bs → + countBlocks Atom.isBegin (acceptBlocks bs) + countBlocks Atom.isEnd bs + = countBlocks Atom.isEnd (acceptBlocks bs) + countBlocks Atom.isBegin bs := by + intro bs + induction bs using acceptBlocks.induct with + | case1 => intro _; simp [acceptBlocks, countBlocks] + | case2 r' rest ih => + intro hn + simp only [acceptBlocks, countBlocks] + have := ih (allNeutral_run hn) + omega + | case3 bs rest ih1 ih2 => + intro hn + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_ins hn + simp only [acceptBlocks, countBlocks] + rw [countBlocks_append, countBlocks_append] + have h1 := ih1 hnbs + have h2 := ih2 hnrest + omega + | case4 bs rest ih1 ih2 => + intro hn + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_moveTo hn + simp only [acceptBlocks, countBlocks] + rw [countBlocks_append, countBlocks_append] + have h1 := ih1 hnbs + have h2 := ih2 hnrest + omega + | case5 bs rest ih => + intro hn + obtain ⟨hcn, _, hnrest⟩ := allNeutral_del hn + simp only [acceptBlocks, countBlocks] + have hbal := neutral_balanced hcn + have := ih hnrest + omega + | case6 bs rest ih => + intro hn + obtain ⟨hcn, _, hnrest⟩ := allNeutral_moveFrom hn + simp only [acceptBlocks, countBlocks] + have hbal := neutral_balanced hcn + have := ih hnrest + omega + | case7 _ bs rest ih1 ih2 => + intro hn + obtain ⟨hnbs, hnrest⟩ := allNeutral_other hn + simp only [acceptBlocks, countBlocks] + have h1 := ih1 hnbs + have h2 := ih2 hnrest + omega + +theorem countBlocks_rejectBlocks_balance : + ∀ (bs : List Block), allNeutral bs → + countBlocks Atom.isBegin (rejectBlocks bs) + countBlocks Atom.isEnd bs + = countBlocks Atom.isEnd (rejectBlocks bs) + countBlocks Atom.isBegin bs := by + intro bs + induction bs using rejectBlocks.induct with + | case1 => intro _; simp [rejectBlocks, countBlocks] + | case2 r' rest ih => + intro hn + simp only [rejectBlocks, countBlocks] + have := ih (allNeutral_run hn) + omega + | case3 bs rest ih => + intro hn + obtain ⟨hcn, _, hnrest⟩ := allNeutral_ins hn + simp only [rejectBlocks, countBlocks] + have hbal := neutral_balanced hcn + have := ih hnrest + omega + | case4 bs rest ih => + intro hn + obtain ⟨hcn, _, hnrest⟩ := allNeutral_moveTo hn + simp only [rejectBlocks, countBlocks] + have hbal := neutral_balanced hcn + have := ih hnrest + omega + | case5 bs rest ih1 ih2 => + intro hn + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_del hn + simp only [rejectBlocks, countBlocks] + rw [countBlocks_append, countBlocks_append] + have h1 := ih1 hnbs + have h2 := ih2 hnrest + omega + | case6 bs rest ih1 ih2 => + intro hn + obtain ⟨_, hnbs, hnrest⟩ := allNeutral_moveFrom hn + simp only [rejectBlocks, countBlocks] + rw [countBlocks_append, countBlocks_append] + have h1 := ih1 hnbs + have h2 := ih2 hnrest + omega + | case7 _ bs rest ih1 ih2 => + intro hn + obtain ⟨hnbs, hnrest⟩ := allNeutral_other hn + simp only [rejectBlocks, countBlocks] + have h1 := ih1 hnbs + have h2 := ih2 hnrest + omega + +/-! ### The preservation lemma -/ + +/-- **`field_structure_preserved`.** For any document satisfying the recursive + well-formedness precondition, both `accept` and `reject` produce output that + passes `validateFieldStructure`. This is the Tier 2 result that closes + `inv_field_001` once composed with the residual axiom in `Spec.lean`. -/ +theorem field_structure_preserved (d : Doc) (h : recursivelyWellformed d) : + validateFieldStructure (accept d) = true ∧ + validateFieldStructure (reject d) = true := by + obtain ⟨hv, hn⟩ := h + rw [validateFieldStructure, Bool.and_eq_true] at hv + obtain ⟨hvbal, hvwalk⟩ := hv + rw [fldCharBalanced, beq_iff_eq] at hvbal + refine ⟨?_, ?_⟩ + · -- accept + rw [validateFieldStructure, Bool.and_eq_true] + refine ⟨?_, ?_⟩ + · rw [fldCharBalanced, accept_blocks, beq_iff_eq] + have hbal := countBlocks_acceptBlocks_balance d.blocks hn + omega + · rw [accept_blocks, walkBlocks_acceptBlocks d.blocks hn] + exact hvwalk + · -- reject + rw [validateFieldStructure, Bool.and_eq_true] + refine ⟨?_, ?_⟩ + · rw [fldCharBalanced, reject_blocks, beq_iff_eq, + countBlocks_renameBlocks _ isBegin_renameAtom, + countBlocks_renameBlocks _ isEnd_renameAtom] + have hbal := countBlocks_rejectBlocks_balance d.blocks hn + omega + · rw [reject_blocks, walkBlocks_renameBlocks, + walkBlocks_rejectBlocks d.blocks hn] + exact hvwalk + +end Tier2.InvFieldOne diff --git a/verification/lean/Tier2/OoxmlModel.lean b/verification/lean/Tier2/OoxmlModel.lean new file mode 100644 index 0000000..aacf1c9 --- /dev/null +++ b/verification/lean/Tier2/OoxmlModel.lean @@ -0,0 +1,92 @@ +/- +Tier 2 — definitional OOXML subset. + +A small, tree-structured syntactic subset of the `document.xml` surface, mirroring +OOXML's nested track-change wrappers. The `accept` / `reject` operations and the +field-structure predicate are defined against this subset so they match the +production engine structurally rather than abstractly. + +Field-line citations point at the production engine: +`packages/docx-core/src/baselines/atomizer/trackChangesAcceptorAst.ts`. + +See `openspec/changes/add-ooxml-doc-subset-and-inv-field-001-proof/design.md` +for the rejected alternatives (flat tape; full `ComparisonUnitAtom` projection). +-/ + +namespace Tier2.OoxmlModel + +/-- `w:fldChar` field-character kinds (`w:fldCharType` attribute). + `begin` / `separate` / `endf` mirror OOXML `begin` / `separate` / `end` + (`endf` because `end` is a Lean keyword). -/ +inductive FldCharKind + | begin + | separate + | endf + deriving DecidableEq, Repr, Inhabited + +/-- A logical run-level atom. `instrText` / `delInstrText` are treated as single + logical atoms even though production OOXML can fragment them across sibling + `w:r` elements — we model the post-atomization view as canonical. + + `delText` / `delInstrText` are the deleted-content counterparts the engine + rewrites back to `text` / `instrText` on `reject` + (`trackChangesAcceptorAst.ts:602-616`). -/ +inductive Atom + | text (s : String) + | delText (s : String) + | instrText (s : String) + | delInstrText (s : String) + | fldChar (k : FldCharKind) + deriving DecidableEq, Repr, Inhabited + +/-- Opaque run-properties marker. `inv_field_001` does not depend on `w:rPr` + contents, so this carries no internal structure. -/ +structure RPr where + marker : Nat := 0 + deriving DecidableEq, Repr, Inhabited + +/-- A `w:r` run: properties plus an ordered list of atoms. -/ +structure Run where + rPr : RPr := {} + content : List Atom + deriving DecidableEq, Repr, Inhabited + +/-- A block-level element inside a paragraph body. + + Track-change wrapper types (`ins` / `del` / `moveFrom` / `moveTo`) are the + only kinds the accept/reject semantics act on. `other` is a transparent + container that models every other OOXML structural element that can hold + block-level content but does NOT alter the field-context walk semantically + (`w:tbl`, `w:tr`, `w:tc`, `w:sdt`, `w:sdtContent`, `w:hyperlink`, + `w:bookmarkStart`/`End`, etc.). The walk, accept, reject, and rename + operations all descend transparently through `other`; `wrapperSubtreesBlocks` + skips `other` because it is not a track-change wrapper. -/ +inductive Block + | run (r : Run) + | ins (children : List Block) + | del (children : List Block) + | moveFrom (children : List Block) + | moveTo (children : List Block) + | other (tag : String) (children : List Block) + deriving Repr, Inhabited + +/-- Opaque paragraph-properties marker. -/ +structure PPr where + marker : Nat := 0 + deriving DecidableEq, Repr, Inhabited + +/-- A `w:p` paragraph: properties plus an ordered list of blocks. -/ +structure Paragraph where + pPr : PPr := {} + body : List Block + deriving Repr, Inhabited + +/-- The modeled `document.xml` surface: an ordered list of paragraphs. -/ +abbrev Doc := List Paragraph + +/-- The whole-document block sequence in document order: every paragraph body + concatenated. The field-structure walk runs over this. -/ +def Doc.blocks (d : Doc) : List Block := + d.flatMap Paragraph.body + +end Tier2.OoxmlModel diff --git a/verification/lean/Tier2/README.md b/verification/lean/Tier2/README.md new file mode 100644 index 0000000..5ae8278 --- /dev/null +++ b/verification/lean/Tier2/README.md @@ -0,0 +1,82 @@ +# Tier 2 — definitional `OoxmlDoc` subset and the `inv_field_001` closure + +This directory holds the Tier 2 verification work: a definitional Lean model of +a small OOXML subset, definitional `accept` / `reject` / `validateFieldStructure` +operations over it, and a **closed, machine-checked proof** of the field-structure +preservation lemma that discharges the `inv_field_001` `sorry` in +`../LeanSpike/Spec.lean`. + +## Files + +- `OoxmlModel.lean` — definitional datatypes (`Doc`, `Paragraph`, `Block`, `Run`, + `Atom`, `FldCharKind`) for a tree-structured OOXML subset. +- `FieldStructure.lean` — the stack-valued field-context walk (`FieldCtx`, + `WalkResult`, `stepAtom`, `walkBlocks`), `validateFieldStructure`, + `fieldContextNeutral`, and `recursivelyWellformed`. +- `WalkLemmas.lean` — generic, operation-agnostic walk lemmas: `walkBlocks_append` + (L1), `walkBlocks_neutral_ok` (L2 core), `neutral_balanced`, + `delInstrText_rewrite_safe` (L3). +- `AcceptReject.lean` — definitional `accept` and `reject`. +- `InvFieldOne.lean` — the preservation lemma `field_structure_preserved` + (no `sorry`). + +## What the closed proof says + +`Tier2.InvFieldOne.field_structure_preserved`: for any `Doc` `d` satisfying +`recursivelyWellformed d`, both `accept d` and `reject d` satisfy +`validateFieldStructure`. + +`recursivelyWellformed d` requires (a) the whole document passes +`validateFieldStructure`, AND (b) every `w:ins` / `w:del` / `w:moveFrom` / +`w:moveTo` wrapper subtree (transitively) is `fieldContextNeutral` — i.e. scanned +under **any** outer field context, it returns to that context and never produces +an invalid state. The field-context walk carries a depth-indexed stack of +"separator-seen" bits exactly mirroring the TS engine's `pastSeparatorAtDepth: +number[]` at `packages/docx-core/src/baselines/atomizer/pipeline.ts:374-389`. + +`inv_field_001` in `Spec.lean` is then closed by composing this lemma with the +single named residual axiom `compareDocumentXml_output_recursivelyWellformed`. + +## Residual obligations — what the proof does NOT say + +- **`compareDocumentXml_output_recursivelyWellformed` is the single named + residual axiom.** It asserts that this repo's inplace atomizer output satisfies + `recursivelyWellformed`. It is scoped to this repo's inplace atomizer — NOT to + OOXML comparison engines in general. It is **not discharged**; discharging it + by modeling `compareDocumentXml` definitionally is **Tier 3** work, and the next + step there is exactly that definitional model. +- **Extensional equivalence is not established.** The Lean `accept` / `reject` / + `validateFieldStructure` are definitional mirrors of the TS + `acceptAllChanges` / `rejectAllChanges` / `validateFieldStructure` + (`trackChangesAcceptorAst.ts:368-659`, `pipeline.ts:352-402`), but no proof + ties the Lean operations extensionally to the TS code. That is **Tier 2.5**. +- **The runtime check is not made redundant.** The production engine's runtime + `validateFieldStructure` call (`pipeline.ts:439-440`) is not removed or + weakened by this proof; the proof is a property of the Lean model. + +## Model narrowing + +The Lean `Block` covers only the four track-change wrapper types plus `Run`. +The TS paragraph-removal logic at `trackChangesAcceptorAst.ts:411,456,564` walks +all non-excluded descendants looking for `w:r` children, catching arbitrary +nested OOXML. The Lean model deliberately narrows this: non-wrapper descendants +are out of model. A broader `Block` shape is owned by Tier 2.5. + +Locality of `delInstrText` to deleted-content wrappers (`w:del` / `w:moveFrom`) +is **not** enforced by the bare `OoxmlModel` datatype — `reject`'s +`delInstrText → instrText` rename pass runs globally after both unwraps complete +(matching `trackChangesAcceptorAst.ts:602-616`). The precondition that +`delInstrText` only originates inside deleted-content wrappers is enforced by +`recursivelyWellformed` on the *input* (`fieldContextNeutral` rejects a +`delInstrText` outside an open pre-separator field). + +## CI + +`lake build` over this directory runs in `.github/workflows/lean-build.yml`, +which also audits that every `Tier2/` module is zero-`sorry`. The single +remaining `sorry` in the spike is `inv_rt_001` in `../LeanSpike/Spec.lean`. + +A TS-side falsifiability layer for `compareDocumentXml_output_recursivelyWellformed` +lives in `packages/docx-core/src/integration/lean-spec-bridge.test.ts` — one +field-bearing fixture case exercising a TS analogue of `recursivelyWellformed` +against the live engine. diff --git a/verification/lean/Tier2/WalkLemmas.lean b/verification/lean/Tier2/WalkLemmas.lean new file mode 100644 index 0000000..c79af84 --- /dev/null +++ b/verification/lean/Tier2/WalkLemmas.lean @@ -0,0 +1,283 @@ +/- +Tier 2 — generic lemmas about the stack-valued field-context walk. + +These are the shared, operation-agnostic results consumed by `InvFieldOne.lean`: + + * `walkBlocks_append` — the walk distributes over list append (L1). + * `walkBlocks_neutral_ok` — a `fieldContextNeutral` segment is a walk no-op + under any starting result (L2 core). + * `neutral_balanced` — context-neutrality implies balanced begin/end counts. + * `delInstrText_rewrite_safe` — `stepAtom` does not distinguish `instrText` + from `delInstrText`, so the `reject` rename pass is walk-transparent (L3). + +Nothing here mentions `accept` / `reject`; the file is purely about the walk. +-/ +import Tier2.FieldStructure + +namespace Tier2.WalkLemmas + +open Tier2.OoxmlModel Tier2.FieldStructure + +/-! ### `invalid` is absorbing -/ + +theorem stepAtoms_invalid (as : List Atom) : + stepAtoms .invalid as = .invalid := by + unfold stepAtoms + induction as with + | nil => rfl + | cons a as ih => + show as.foldl stepAtom (stepAtom .invalid a) = .invalid + exact ih + +theorem walkBlocks_invalid (bs : List Block) : + walkBlocks .invalid bs = .invalid := by + have key : ∀ (r : WalkResult) (bs : List Block), + r = .invalid → walkBlocks r bs = .invalid := by + intro r bs + induction r, bs using walkBlocks.induct with + | case1 r => intro h; subst h; simp only [walkBlocks] + | case2 r run rest ih => + intro h; subst h + simp only [walkBlocks] + exact ih (stepAtoms_invalid run.content) + | case3 r bs rest ih1 ih2 => + intro h; subst h + simp only [walkBlocks] + exact ih2 (ih1 rfl) + | case4 r bs rest ih1 ih2 => + intro h; subst h + simp only [walkBlocks] + exact ih2 (ih1 rfl) + | case5 r bs rest ih1 ih2 => + intro h; subst h + simp only [walkBlocks] + exact ih2 (ih1 rfl) + | case6 r bs rest ih1 ih2 => + intro h; subst h + simp only [walkBlocks] + exact ih2 (ih1 rfl) + | case7 r _ bs rest ih1 ih2 => + intro h; subst h + simp only [walkBlocks] + exact ih2 (ih1 rfl) + exact key .invalid bs rfl + +/-! ### L1 — the walk distributes over append -/ + +theorem walkBlocks_append (r : WalkResult) (l m : List Block) : + walkBlocks r (l ++ m) = walkBlocks (walkBlocks r l) m := by + induction l generalizing r with + | nil => simp only [List.nil_append, walkBlocks] + | cons b l ih => + cases b with + | run run => simp only [List.cons_append, walkBlocks]; exact ih _ + | ins bs => simp only [List.cons_append, walkBlocks]; exact ih _ + | del bs => simp only [List.cons_append, walkBlocks]; exact ih _ + | moveFrom bs => simp only [List.cons_append, walkBlocks]; exact ih _ + | moveTo bs => simp only [List.cons_append, walkBlocks]; exact ih _ + | other tag bs => simp only [List.cons_append, walkBlocks]; exact ih _ + +/-! ### L2 core — a context-neutral segment is a walk no-op -/ + +theorem walkBlocks_neutral_ok {bs : List Block} (h : fieldContextNeutral bs) : + ∀ r, walkBlocks r bs = r := by + intro r + cases r with + | invalid => exact walkBlocks_invalid bs + | ok ctx => exact h ctx + +/-! ### `countBlocks` distributes over append -/ + +theorem countBlocks_append (p : Atom → Bool) (l m : List Block) : + countBlocks p (l ++ m) = countBlocks p l + countBlocks p m := by + induction l with + | nil => simp [countBlocks] + | cons b l ih => + cases b with + | run run => simp only [List.cons_append, countBlocks]; omega + | ins bs => simp only [List.cons_append, countBlocks]; omega + | del bs => simp only [List.cons_append, countBlocks]; omega + | moveFrom bs => simp only [List.cons_append, countBlocks]; omega + | moveTo bs => simp only [List.cons_append, countBlocks]; omega + | other tag bs => simp only [List.cons_append, countBlocks]; omega + +/-! ### L3 — `stepAtom` does not distinguish `instrText` from `delInstrText` -/ + +theorem delInstrText_rewrite_safe (r : WalkResult) (s s' : String) : + stepAtom r (.instrText s) = stepAtom r (.delInstrText s') := by + cases r with + | invalid => rfl + | ok ctx => + cases ctx with + | nil => rfl + | cons b rest => cases b <;> rfl + +/-! ### Counting helpers -/ + +theorem stepAtoms_cons (r : WalkResult) (a : Atom) (as : List Atom) : + stepAtoms r (a :: as) = stepAtoms (stepAtom r a) as := by + unfold stepAtoms; rw [List.foldl_cons] + +/-! ### `neutral_balanced` — context-neutrality implies balanced begin/end counts + +The walk's `end`-on-empty no-op (`pipeline.ts:389`) means a *standalone* walk +can hide an unbalanced subtree. But a *context-neutral* subtree returns to its +starting context under **every** outer stack — in particular a stack tall enough +that no `end` ever underflows. With no underflow the stack length tracks +`begins − ends` exactly, so neutrality forces `begins = ends`. -/ + +/-- One-atom length bookkeeping under a stack tall enough to absorb an `end`. -/ +theorem stepAtom_tall (ctx : FieldCtx) (a : Atom) + (h : (if Atom.isEnd a then 1 else 0) ≤ ctx.length) : + stepAtom (.ok ctx) a = .invalid ∨ + ∃ ctx₁, stepAtom (.ok ctx) a = .ok ctx₁ ∧ + ctx₁.length + (if Atom.isEnd a then 1 else 0) + = ctx.length + (if Atom.isBegin a then 1 else 0) := by + cases a with + | text s => exact Or.inr ⟨ctx, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | delText s => exact Or.inr ⟨ctx, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | instrText s => + cases ctx with + | nil => exact Or.inl rfl + | cons b rest => + cases b with + | false => exact Or.inr ⟨false :: rest, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | true => exact Or.inl rfl + | delInstrText s => + cases ctx with + | nil => exact Or.inl rfl + | cons b rest => + cases b with + | false => exact Or.inr ⟨false :: rest, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | true => exact Or.inl rfl + | fldChar k => + cases k with + | begin => exact Or.inr ⟨false :: ctx, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | separate => + cases ctx with + | nil => exact Or.inr ⟨[], rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | cons b rest => exact Or.inr ⟨true :: rest, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + | endf => + cases ctx with + | nil => simp [Atom.isEnd] at h + | cons b rest => exact Or.inr ⟨rest, rfl, by simp [Atom.isEnd, Atom.isBegin]⟩ + +/-- Atom-level tall-stack length tracking: under a stack at least as tall as the + total `end` count, the walk either fails or shifts the stack length by + exactly `begins − ends`. -/ +theorem stepAtoms_tall (as : List Atom) (ctx : FieldCtx) + (h : as.countP Atom.isEnd ≤ ctx.length) : + stepAtoms (.ok ctx) as = .invalid ∨ + ∃ ctx', stepAtoms (.ok ctx) as = .ok ctx' ∧ + ctx'.length + as.countP Atom.isEnd = ctx.length + as.countP Atom.isBegin := by + induction as generalizing ctx with + | nil => exact Or.inr ⟨ctx, rfl, by simp⟩ + | cons a as' ih => + rw [stepAtoms_cons] + rw [List.countP_cons] at h + have hEndA : (if Atom.isEnd a then 1 else 0) ≤ ctx.length := by + have := List.countP_cons (l := as') (p := Atom.isEnd) (a := a) + omega + rcases stepAtom_tall ctx a hEndA with hinv | ⟨ctx₁, hstep, hlenA⟩ + · rw [hinv]; exact Or.inl (stepAtoms_invalid as') + · rw [hstep] + have hbound : as'.countP Atom.isEnd ≤ ctx₁.length := by omega + rcases ih ctx₁ hbound with hinv2 | ⟨ctx', hok, hlenB⟩ + · exact Or.inl hinv2 + · refine Or.inr ⟨ctx', hok, ?_⟩ + rw [List.countP_cons, List.countP_cons] + omega + +/-- Block-level tall-stack length tracking. Lifts `stepAtoms_tall` over the + nested block tree via `walkBlocks.induct`. -/ +theorem walkBlocks_tall (bs : List Block) (ctx : FieldCtx) + (h : countBlocks Atom.isEnd bs ≤ ctx.length) : + walkBlocks (.ok ctx) bs = .invalid ∨ + ∃ ctx', walkBlocks (.ok ctx) bs = .ok ctx' ∧ + ctx'.length + countBlocks Atom.isEnd bs + = ctx.length + countBlocks Atom.isBegin bs := by + have key : ∀ (r : WalkResult) (bs : List Block) (ctx : FieldCtx), + r = .ok ctx → countBlocks Atom.isEnd bs ≤ ctx.length → + walkBlocks r bs = .invalid ∨ + ∃ ctx', walkBlocks r bs = .ok ctx' ∧ + ctx'.length + countBlocks Atom.isEnd bs + = ctx.length + countBlocks Atom.isBegin bs := by + intro r bs + induction r, bs using walkBlocks.induct with + | case1 r => + intro ctx hr hb; subst hr + exact Or.inr ⟨ctx, by simp only [walkBlocks], by simp [countBlocks]⟩ + | case2 r run rest ih => + intro ctx hr hb; subst hr + simp only [walkBlocks] + simp only [countBlocks] at hb ⊢ + rcases stepAtoms_tall run.content ctx (by omega) with hinv | ⟨ctx₁, hstep, hlenA⟩ + · rw [hinv, walkBlocks_invalid]; exact Or.inl rfl + · rcases ih ctx₁ hstep (by omega) with hinv2 | ⟨ctx', hok, hlenB⟩ + · exact Or.inl hinv2 + · exact Or.inr ⟨ctx', hok, by omega⟩ + | case3 r bs rest ih1 ih2 => + intro ctx hr hb; subst hr + simp only [walkBlocks] + simp only [countBlocks] at hb ⊢ + rcases ih1 ctx rfl (by omega) with hinv | ⟨ctx₁, hok1, hlenA⟩ + · rw [hinv, walkBlocks_invalid]; exact Or.inl rfl + · rcases ih2 ctx₁ hok1 (by omega) with hinv2 | ⟨ctx', hok2, hlenB⟩ + · exact Or.inl hinv2 + · exact Or.inr ⟨ctx', hok2, by omega⟩ + | case4 r bs rest ih1 ih2 => + intro ctx hr hb; subst hr + simp only [walkBlocks] + simp only [countBlocks] at hb ⊢ + rcases ih1 ctx rfl (by omega) with hinv | ⟨ctx₁, hok1, hlenA⟩ + · rw [hinv, walkBlocks_invalid]; exact Or.inl rfl + · rcases ih2 ctx₁ hok1 (by omega) with hinv2 | ⟨ctx', hok2, hlenB⟩ + · exact Or.inl hinv2 + · exact Or.inr ⟨ctx', hok2, by omega⟩ + | case5 r bs rest ih1 ih2 => + intro ctx hr hb; subst hr + simp only [walkBlocks] + simp only [countBlocks] at hb ⊢ + rcases ih1 ctx rfl (by omega) with hinv | ⟨ctx₁, hok1, hlenA⟩ + · rw [hinv, walkBlocks_invalid]; exact Or.inl rfl + · rcases ih2 ctx₁ hok1 (by omega) with hinv2 | ⟨ctx', hok2, hlenB⟩ + · exact Or.inl hinv2 + · exact Or.inr ⟨ctx', hok2, by omega⟩ + | case6 r bs rest ih1 ih2 => + intro ctx hr hb; subst hr + simp only [walkBlocks] + simp only [countBlocks] at hb ⊢ + rcases ih1 ctx rfl (by omega) with hinv | ⟨ctx₁, hok1, hlenA⟩ + · rw [hinv, walkBlocks_invalid]; exact Or.inl rfl + · rcases ih2 ctx₁ hok1 (by omega) with hinv2 | ⟨ctx', hok2, hlenB⟩ + · exact Or.inl hinv2 + · exact Or.inr ⟨ctx', hok2, by omega⟩ + | case7 r _ bs rest ih1 ih2 => + intro ctx hr hb; subst hr + simp only [walkBlocks] + simp only [countBlocks] at hb ⊢ + rcases ih1 ctx rfl (by omega) with hinv | ⟨ctx₁, hok1, hlenA⟩ + · rw [hinv, walkBlocks_invalid]; exact Or.inl rfl + · rcases ih2 ctx₁ hok1 (by omega) with hinv2 | ⟨ctx', hok2, hlenB⟩ + · exact Or.inl hinv2 + · exact Or.inr ⟨ctx', hok2, by omega⟩ + exact key (.ok ctx) bs ctx rfl h + +/-- **L2 / counting corollary.** A field-context-neutral subtree has equal + `w:fldChar` begin and end counts. -/ +theorem neutral_balanced {bs : List Block} (h : fieldContextNeutral bs) : + countBlocks Atom.isBegin bs = countBlocks Atom.isEnd bs := by + have hlenrep : (List.replicate (countBlocks Atom.isEnd bs) false).length + = countBlocks Atom.isEnd bs := by simp + have hwalk := h (List.replicate (countBlocks Atom.isEnd bs) false) + have htall := walkBlocks_tall bs (List.replicate (countBlocks Atom.isEnd bs) false) + (by omega) + rw [hwalk] at htall + rcases htall with hinv | ⟨ctx', hok, hlen⟩ + · simp at hinv + · simp only [WalkResult.ok.injEq] at hok + subst hok + rw [hlenrep] at hlen + omega + +end Tier2.WalkLemmas diff --git a/verification/lean/lakefile.lean b/verification/lean/lakefile.lean index 833f95f..2d67648 100644 --- a/verification/lean/lakefile.lean +++ b/verification/lean/lakefile.lean @@ -8,3 +8,6 @@ require mathlib from git @[default_target] lean_lib LeanSpike + +@[default_target] +lean_lib Tier2