From 8417d7b7c6107e1752fdd65f26721af24b8b6e24 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 22 May 2026 01:41:43 -0400 Subject: [PATCH 1/2] fix(docx-mcp): seed revision ids from side parts --- packages/docx-mcp/src/session/manager.test.ts | 30 +++++++++++++- packages/docx-mcp/src/session/manager.ts | 41 ++++++++++++------- packages/docx-mcp/src/tools/add_comment.ts | 2 +- packages/docx-mcp/src/tools/add_footnote.ts | 2 +- packages/docx-mcp/src/tools/apply_plan.ts | 2 +- .../docx-mcp/src/tools/clear_formatting.ts | 2 +- packages/docx-mcp/src/tools/delete_comment.ts | 2 +- .../docx-mcp/src/tools/delete_footnote.ts | 2 +- packages/docx-mcp/src/tools/format_layout.ts | 2 +- .../docx-mcp/src/tools/insert_paragraph.ts | 2 +- packages/docx-mcp/src/tools/replace_text.ts | 2 +- .../docx-mcp/src/tools/update_footnote.ts | 2 +- 12 files changed, 66 insertions(+), 25 deletions(-) diff --git a/packages/docx-mcp/src/session/manager.test.ts b/packages/docx-mcp/src/session/manager.test.ts index e5c6726..0c1d87c 100644 --- a/packages/docx-mcp/src/session/manager.test.ts +++ b/packages/docx-mcp/src/session/manager.test.ts @@ -117,11 +117,39 @@ describe('revision context helpers', () => { await fs.writeFile(filePath, new Uint8Array(buf)); const session = await mgr.createSession(buf, 'tracked.docx', filePath); - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); expect(ctx).toBeDefined(); expect(session.revisionIdState?.nextId).toBe(43); }); + + test('initializes revision ids above pre-existing side-part w:id values', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-sidepart-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'tracked-sidepart.docx'); + const documentXml = + `` + + `` + + `` + + `Prior` + + ``; + const commentsXml = + `` + + `` + + `` + + `Side part` + + `` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'tracked-sidepart.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(501); + }); }); // ── getSessionByPath ──────────────────────────────────────────────── diff --git a/packages/docx-mcp/src/session/manager.ts b/packages/docx-mcp/src/session/manager.ts index 5fb7353..1dbde4a 100644 --- a/packages/docx-mcp/src/session/manager.ts +++ b/packages/docx-mcp/src/session/manager.ts @@ -6,6 +6,8 @@ import { DocxDocument, createRevisionContext, createRevisionIdState, + parseXml, + readZipText, type NormalizationResult, type ParagraphRevision, type ReconstructionMode, @@ -93,6 +95,13 @@ export type GDocsSession = { export type Session = DocxSession | GDocsSession; const WORDPROCESSING_ML_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'; +const REVISION_ID_SEED_SIDE_PARTS = [ + 'word/comments.xml', + 'word/footnotes.xml', + 'word/endnotes.xml', + 'word/commentsExtended.xml', + 'word/people.xml', +] as const; function normalizeAiAuthor(author: string | null | undefined): string | null { if (typeof author !== 'string') return null; @@ -114,18 +123,8 @@ function getWordIdValue(element: Element): number | null { * Compute a starting `RevisionIdState` whose first allocated `w:id` is * higher than any existing revision id found in the supplied documents. * - * **Known limitation (see follow-up to #165):** this scans only the - * documents passed in. Today's caller passes `documentXml` only, so any - * pre-existing revision in side parts (`comments.xml`, `footnotesXml`, - * `endnotes.xml`) is invisible to the seed scan. For typical sessions - * with <100 AI edits this never collides in practice; for long-running - * sessions on heavily-reviewed third-party documents the AI counter - * could eventually cross a pre-existing side-part revision id. - * - * To close the gap, future work should pre-load side parts via - * `zip.readText('word/comments.xml')` etc. and pass them all here. - * That cascade requires `getRevisionContextForSession` to become async, - * which is deferred to follow-up. + * Callers should pass every available story/metadata part that can contain + * package-wide `w:id` revision attributes, not just `document.xml`. */ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdState { let maxId = 0; @@ -142,11 +141,25 @@ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdSta return createRevisionIdState(maxId + 1); } -export function getRevisionContextForSession(session: DocxSession): RevisionContext | undefined { +async function getSidePartRevisionSeedDocs(session: DocxSession): Promise { + const docs: Document[] = []; + + for (const partPath of REVISION_ID_SEED_SIDE_PARTS) { + const xml = await readZipText(session.originalBuffer, partPath); + if (xml) docs.push(parseXml(xml)); + } + + return docs; +} + +export async function getRevisionContextForSession(session: DocxSession): Promise { if (!session.aiAuthor) return undefined; if (!session.revisionIdState) { - session.revisionIdState = inferStartingRevisionIdState(session.doc.getDocumentXmlClone()); + session.revisionIdState = inferStartingRevisionIdState( + session.doc.getDocumentXmlClone(), + ...(await getSidePartRevisionSeedDocs(session)), + ); } return createRevisionContext({ diff --git a/packages/docx-mcp/src/tools/add_comment.ts b/packages/docx-mcp/src/tools/add_comment.ts index fdc2ec8..7a165f1 100644 --- a/packages/docx-mcp/src/tools/add_comment.ts +++ b/packages/docx-mcp/src/tools/add_comment.ts @@ -19,7 +19,7 @@ export async function addComment( const resolved = await resolveSessionForTool(manager, params, { toolName: 'add_comment' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); try { // Reply mode: parent_comment_id provided diff --git a/packages/docx-mcp/src/tools/add_footnote.ts b/packages/docx-mcp/src/tools/add_footnote.ts index 62773a8..1fdfb66 100644 --- a/packages/docx-mcp/src/tools/add_footnote.ts +++ b/packages/docx-mcp/src/tools/add_footnote.ts @@ -15,7 +15,7 @@ export async function addFootnote( const resolved = await resolveSessionForTool(manager, params, { toolName: 'add_footnote' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (!params.target_paragraph_id) { return err('MISSING_PARAMETER', 'target_paragraph_id is required.', 'Provide the _bk_* ID of the paragraph to anchor the footnote to.'); diff --git a/packages/docx-mcp/src/tools/apply_plan.ts b/packages/docx-mcp/src/tools/apply_plan.ts index 4ead732..84fff57 100644 --- a/packages/docx-mcp/src/tools/apply_plan.ts +++ b/packages/docx-mcp/src/tools/apply_plan.ts @@ -407,7 +407,7 @@ export async function applyPlan( const allWarnings = validations.flatMap((v) => v.warnings.map((w) => ({ step_id: v.step_id, warning: w }))); // Apply phase — execute steps sequentially - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); const result = await executeSteps(manager, manager.normalizePath(session.originalPath), steps, ctx); if (result.failed_step_id !== undefined) { diff --git a/packages/docx-mcp/src/tools/clear_formatting.ts b/packages/docx-mcp/src/tools/clear_formatting.ts index 6e3dd1a..05fbd19 100644 --- a/packages/docx-mcp/src/tools/clear_formatting.ts +++ b/packages/docx-mcp/src/tools/clear_formatting.ts @@ -71,7 +71,7 @@ export async function clearFormatting( const resolved = await resolveSessionForTool(manager, params, { toolName: 'clear_formatting' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const revisionCtx = ctx ?? getRevisionContextForSession(session); + const revisionCtx = ctx ?? await getRevisionContextForSession(session); const { nodes } = session.doc.buildDocumentView({ includeSemanticTags: false }); const pids = params.paragraph_ids ?? nodes.map((n) => n.id); diff --git a/packages/docx-mcp/src/tools/delete_comment.ts b/packages/docx-mcp/src/tools/delete_comment.ts index f68f521..ec31228 100644 --- a/packages/docx-mcp/src/tools/delete_comment.ts +++ b/packages/docx-mcp/src/tools/delete_comment.ts @@ -13,7 +13,7 @@ export async function deleteComment( const resolved = await resolveSessionForTool(manager, params, { toolName: 'delete_comment' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (params.comment_id == null) { return err('MISSING_PARAMETER', 'comment_id is required.', 'Provide the comment ID to delete.'); diff --git a/packages/docx-mcp/src/tools/delete_footnote.ts b/packages/docx-mcp/src/tools/delete_footnote.ts index 0bab4a9..8e27c94 100644 --- a/packages/docx-mcp/src/tools/delete_footnote.ts +++ b/packages/docx-mcp/src/tools/delete_footnote.ts @@ -13,7 +13,7 @@ export async function deleteFootnote( const resolved = await resolveSessionForTool(manager, params, { toolName: 'delete_footnote' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (params.note_id == null) { return err('MISSING_PARAMETER', 'note_id is required.', 'Provide the footnote ID to delete.'); diff --git a/packages/docx-mcp/src/tools/format_layout.ts b/packages/docx-mcp/src/tools/format_layout.ts index 4060fb3..bed5791 100644 --- a/packages/docx-mcp/src/tools/format_layout.ts +++ b/packages/docx-mcp/src/tools/format_layout.ts @@ -255,7 +255,7 @@ export async function formatLayout( const resolved = await resolveSessionForTool(manager, params, { toolName: 'format_layout' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); const strict = params.strict ?? true; if (typeof strict !== 'boolean') { diff --git a/packages/docx-mcp/src/tools/insert_paragraph.ts b/packages/docx-mcp/src/tools/insert_paragraph.ts index 6044d51..901f967 100644 --- a/packages/docx-mcp/src/tools/insert_paragraph.ts +++ b/packages/docx-mcp/src/tools/insert_paragraph.ts @@ -127,7 +127,7 @@ export async function insertParagraph( const resolved = await resolveSessionForTool(manager, params, { toolName: 'insert_paragraph' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const revisionCtx = ctx ?? getRevisionContextForSession(session); + const revisionCtx = ctx ?? await getRevisionContextForSession(session); const positionUpper = (params.position ?? 'AFTER').toUpperCase(); if (positionUpper !== 'BEFORE' && positionUpper !== 'AFTER') { diff --git a/packages/docx-mcp/src/tools/replace_text.ts b/packages/docx-mcp/src/tools/replace_text.ts index cdf48f8..581d179 100644 --- a/packages/docx-mcp/src/tools/replace_text.ts +++ b/packages/docx-mcp/src/tools/replace_text.ts @@ -174,7 +174,7 @@ export async function replaceText( const resolved = await resolveSessionForTool(manager, params, { toolName: 'replace_text' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const revisionCtx = ctx ?? getRevisionContextForSession(session); + const revisionCtx = ctx ?? await getRevisionContextForSession(session); if (params.normalize_first) { session.doc.mergeRunsOnly(); diff --git a/packages/docx-mcp/src/tools/update_footnote.ts b/packages/docx-mcp/src/tools/update_footnote.ts index bdaa239..6ffef20 100644 --- a/packages/docx-mcp/src/tools/update_footnote.ts +++ b/packages/docx-mcp/src/tools/update_footnote.ts @@ -14,7 +14,7 @@ export async function updateFootnote( const resolved = await resolveSessionForTool(manager, params, { toolName: 'update_footnote' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (params.note_id == null) { return err('MISSING_PARAMETER', 'note_id is required.', 'Provide the footnote ID to update.'); From ac999bb4c262b3369143a9bdc6db18f5bef3e3c0 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 22 May 2026 02:51:40 -0400 Subject: [PATCH 2/2] fix(docx-mcp): narrow revision-id seed scan and expand part coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refines the side-part revision-id seed (introduced in 8417d7b for #171) to address peer-review feedback before merge. Why: 1. **Over-broad scan.** The previous `inferStartingRevisionIdState` collected every `w:id`/`id` attribute in the DOM. Non-revision IDs (e.g. ``, ``, ``) share the attribute name but live in a separate ID space, so they were spuriously inflating the starting counter. Limit the scan to known revision-bearing WordprocessingML elements (`w:ins`, `w:del`, `w:moveFrom/To`, `w:*PrChange`, `w:cellIns/Del/Merge`, `w:customXml*RangeStart/End`). 2. **Incomplete part coverage.** Revision IDs are package-wide. The prior fixed list missed headers, footers, and the glossary part — pre-existing redlines in any of those would still collide with AI-emitted IDs. Replace the fixed list with `DocxZip.load` + dynamic enumeration of `word/header*.xml` and `word/footer*.xml`, plus a static list of `comments.xml`, `footnotes.xml`, `endnotes.xml`, `glossary/document.xml`. Drop `commentsExtended.xml` and `people.xml` from the seed list — they carry `w15:paraId` / `w15:author` identifiers, not revision `w:id` values, so under the narrowed scan they would be no-ops. 3. **First-call concurrency race.** With the scan now async, two concurrent first callers could each run the seed scan and the slower one could overwrite a counter that the faster caller had already advanced via emitted revisions, causing real collisions. Add a module-level `WeakMap>` single-flight guard that mirrors the existing `baselinePromises` pattern in `SessionManager`. 4. **Parse robustness.** `parseXml` throws on malformed input. A corrupt optional side part (e.g. truncated `comments.xml`) would crash every tracked edit on the session. Wrap each per-part parse in `try/catch` and skip on failure — collision risk on an unrelated corrupt part is better than bricking the session. New regression tests: - non-revision `` alone does not seed counter - header with `` seeds `nextId` to 901 - footer with `` seeds `nextId` to 1235 - malformed `comments.xml` does not block seeding from `document.xml` - 3-way concurrent first callers resolve to a single seeded state Verification: - `npm -w @usejunior/docx-mcp run test:run -- src/session/manager.test.ts` — 36 passed - `npm -w @usejunior/docx-mcp run lint` - `npm -w @usejunior/docx-mcp run build` - `npm run lint:workspaces` — clean (one pre-existing warning in edit.test.ts) - `npm run test:run` — all workspaces green (1239 + 742 + 116 + 3 passed) Ref: #171 Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/docx-mcp/src/session/manager.test.ts | 123 ++++++++++++++++++ packages/docx-mcp/src/session/manager.ts | 120 +++++++++++++++-- 2 files changed, 230 insertions(+), 13 deletions(-) diff --git a/packages/docx-mcp/src/session/manager.test.ts b/packages/docx-mcp/src/session/manager.test.ts index 0c1d87c..9525283 100644 --- a/packages/docx-mcp/src/session/manager.test.ts +++ b/packages/docx-mcp/src/session/manager.test.ts @@ -150,6 +150,129 @@ describe('revision context helpers', () => { expect(ctx).toBeDefined(); expect(session.revisionIdState?.nextId).toBe(501); }); + + test('ignores non-revision w:id attributes (e.g., ) in side parts', async () => { + // Comment IDs and revision IDs share an attribute name but occupy + // separate ID spaces — only revision-bearing elements should seed. + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-nonrevid-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'nonrev.docx'); + const documentXml = + `` + + `` + + `Plain`; + const commentsXml = + `` + + `` + + `` + + `Comment body` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'nonrev.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(1); + }); + + test('initializes revision ids above pre-existing header w:id values', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-header-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'tracked-header.docx'); + const documentXml = + `` + + `` + + `Body`; + const headerXml = + `` + + `` + + `Header` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/header1.xml': headerXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'tracked-header.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(901); + }); + + test('initializes revision ids above pre-existing footer w:id values', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-footer-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'tracked-footer.docx'); + const documentXml = + `` + + `` + + `Body`; + const footerXml = + `` + + `` + + `Footer` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/footer2.xml': footerXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'tracked-footer.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(1235); + }); + + test('skips malformed optional side parts and continues seeding from document.xml', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-malformed-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'malformed.docx'); + const documentXml = + `` + + `` + + `Body`; + // Truncated/unterminated comments.xml — parseXml must throw, but the + // session must remain editable rather than crashing the first tool call. + const commentsXml = + ` { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-race-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'race.docx'); + const documentXml = + `` + + `` + + `Hi`; + const buf = await makeDocxWithDocumentXml(documentXml); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'race.docx', filePath); + const [a, b, c] = await Promise.all([ + getRevisionContextForSession(session), + getRevisionContextForSession(session), + getRevisionContextForSession(session), + ]); + + expect(a).toBeDefined(); + expect(b).toBeDefined(); + expect(c).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(11); + }); }); // ── getSessionByPath ──────────────────────────────────────────────── diff --git a/packages/docx-mcp/src/session/manager.ts b/packages/docx-mcp/src/session/manager.ts index 1dbde4a..f244d8b 100644 --- a/packages/docx-mcp/src/session/manager.ts +++ b/packages/docx-mcp/src/session/manager.ts @@ -4,10 +4,10 @@ import os from 'node:os'; import fs from 'node:fs/promises'; import { DocxDocument, + DocxZip, createRevisionContext, createRevisionIdState, parseXml, - readZipText, type NormalizationResult, type ParagraphRevision, type ReconstructionMode, @@ -95,14 +95,56 @@ export type GDocsSession = { export type Session = DocxSession | GDocsSession; const WORDPROCESSING_ML_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'; -const REVISION_ID_SEED_SIDE_PARTS = [ + +/** + * WordprocessingML elements that carry package-wide revision `w:id` attributes. + * Limiting the seed scan to these elements prevents non-revision IDs (e.g., + * ``, ``, ``) from + * spuriously inflating the starting revision-id counter. + */ +const REVISION_ID_ELEMENT_LOCAL_NAMES = new Set([ + 'ins', + 'del', + 'moveFrom', + 'moveTo', + 'moveFromRangeStart', + 'moveFromRangeEnd', + 'moveToRangeStart', + 'moveToRangeEnd', + 'pPrChange', + 'rPrChange', + 'tblPrChange', + 'trPrChange', + 'tcPrChange', + 'sectPrChange', + 'cellIns', + 'cellDel', + 'cellMerge', + 'customXmlInsRangeStart', + 'customXmlInsRangeEnd', + 'customXmlDelRangeStart', + 'customXmlDelRangeEnd', + 'customXmlMoveFromRangeStart', + 'customXmlMoveFromRangeEnd', + 'customXmlMoveToRangeStart', + 'customXmlMoveToRangeEnd', +]); + +/** + * Fixed package paths that can carry package-wide revision attributes. + * `commentsExtended.xml` and `people.xml` are intentionally excluded — they + * use `w15:paraId` / `w15:author` identifiers, not revision `w:id` values. + */ +const FIXED_REVISION_ID_SEED_PARTS = [ 'word/comments.xml', 'word/footnotes.xml', 'word/endnotes.xml', - 'word/commentsExtended.xml', - 'word/people.xml', + 'word/glossary/document.xml', ] as const; +/** Matches numbered header/footer parts such as `word/header1.xml`. */ +const NUMBERED_HEADER_FOOTER_RE = /^word\/(header|footer)\d*\.xml$/; + function normalizeAiAuthor(author: string | null | undefined): string | null { if (typeof author !== 'string') return null; const trimmed = author.trim(); @@ -123,14 +165,22 @@ function getWordIdValue(element: Element): number | null { * Compute a starting `RevisionIdState` whose first allocated `w:id` is * higher than any existing revision id found in the supplied documents. * + * Only `w:id` attributes on revision-bearing elements + * (`REVISION_ID_ELEMENT_LOCAL_NAMES`) are considered. Non-revision IDs such + * as `` or `` share the attribute name but + * occupy a different ID space and must not influence the counter. + * * Callers should pass every available story/metadata part that can contain - * package-wide `w:id` revision attributes, not just `document.xml`. + * package-wide revision attributes, not just `document.xml`. */ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdState { let maxId = 0; for (const doc of docs) { for (const node of Array.from(doc.getElementsByTagName('*'))) { + const localName = node.localName ?? ''; + if (!REVISION_ID_ELEMENT_LOCAL_NAMES.has(localName)) continue; + if (node.namespaceURI && node.namespaceURI !== WORDPROCESSING_ML_NS) continue; const value = getWordIdValue(node); if (value !== null && value > maxId) { maxId = value; @@ -141,25 +191,69 @@ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdSta return createRevisionIdState(maxId + 1); } -async function getSidePartRevisionSeedDocs(session: DocxSession): Promise { +async function getSidePartRevisionSeedDocs(buffer: Buffer): Promise { const docs: Document[] = []; - for (const partPath of REVISION_ID_SEED_SIDE_PARTS) { - const xml = await readZipText(session.originalBuffer, partPath); - if (xml) docs.push(parseXml(xml)); + let zip: DocxZip; + try { + zip = await DocxZip.load(buffer); + } catch { + return docs; + } + + const seedPaths = new Set(FIXED_REVISION_ID_SEED_PARTS); + for (const entry of zip.listFiles()) { + if (NUMBERED_HEADER_FOOTER_RE.test(entry)) seedPaths.add(entry); + } + + for (const partPath of seedPaths) { + if (!zip.hasFile(partPath)) continue; + let xml: string | null; + try { + xml = await zip.readTextOrNull(partPath); + } catch { + continue; + } + if (!xml) continue; + try { + docs.push(parseXml(xml)); + } catch { + // Malformed optional side part — skip so an unrelated parse failure + // does not block every tracked edit on the session. + } } return docs; } +/** + * Single-flight guard around the first revision-id seed scan per session. + * Concurrent first callers await the same in-flight scan and assign the same + * result, preventing a slower scan from clobbering a counter that a faster + * caller has already advanced via emitted revisions. + */ +const revisionIdSeedPromises = new WeakMap>(); + export async function getRevisionContextForSession(session: DocxSession): Promise { if (!session.aiAuthor) return undefined; if (!session.revisionIdState) { - session.revisionIdState = inferStartingRevisionIdState( - session.doc.getDocumentXmlClone(), - ...(await getSidePartRevisionSeedDocs(session)), - ); + let pending = revisionIdSeedPromises.get(session); + if (!pending) { + pending = (async () => { + const sideDocs = await getSidePartRevisionSeedDocs(session.originalBuffer); + return inferStartingRevisionIdState(session.doc.getDocumentXmlClone(), ...sideDocs); + })(); + revisionIdSeedPromises.set(session, pending); + } + try { + const seeded = await pending; + if (!session.revisionIdState) { + session.revisionIdState = seeded; + } + } finally { + revisionIdSeedPromises.delete(session); + } } return createRevisionContext({