diff --git a/packages/docx-mcp/src/session/manager.test.ts b/packages/docx-mcp/src/session/manager.test.ts index e5c6726..9525283 100644 --- a/packages/docx-mcp/src/session/manager.test.ts +++ b/packages/docx-mcp/src/session/manager.test.ts @@ -117,11 +117,162 @@ describe('revision context helpers', () => { await fs.writeFile(filePath, new Uint8Array(buf)); const session = await mgr.createSession(buf, 'tracked.docx', filePath); - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); expect(ctx).toBeDefined(); expect(session.revisionIdState?.nextId).toBe(43); }); + + test('initializes revision ids above pre-existing side-part w:id values', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-sidepart-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'tracked-sidepart.docx'); + const documentXml = + `` + + `` + + `` + + `Prior` + + ``; + const commentsXml = + `` + + `` + + `` + + `Side part` + + `` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'tracked-sidepart.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(501); + }); + + test('ignores non-revision w:id attributes (e.g., ) in side parts', async () => { + // Comment IDs and revision IDs share an attribute name but occupy + // separate ID spaces — only revision-bearing elements should seed. + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-nonrevid-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'nonrev.docx'); + const documentXml = + `` + + `` + + `Plain`; + const commentsXml = + `` + + `` + + `` + + `Comment body` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'nonrev.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(1); + }); + + test('initializes revision ids above pre-existing header w:id values', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-header-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'tracked-header.docx'); + const documentXml = + `` + + `` + + `Body`; + const headerXml = + `` + + `` + + `Header` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/header1.xml': headerXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'tracked-header.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(901); + }); + + test('initializes revision ids above pre-existing footer w:id values', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-footer-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'tracked-footer.docx'); + const documentXml = + `` + + `` + + `Body`; + const footerXml = + `` + + `` + + `Footer` + + ``; + const buf = await makeDocxWithDocumentXml(documentXml, { 'word/footer2.xml': footerXml }); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'tracked-footer.docx', filePath); + const ctx = await getRevisionContextForSession(session); + + expect(ctx).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(1235); + }); + + test('skips malformed optional side parts and continues seeding from document.xml', async () => { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-malformed-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'malformed.docx'); + const documentXml = + `` + + `` + + `Body`; + // Truncated/unterminated comments.xml — parseXml must throw, but the + // session must remain editable rather than crashing the first tool call. + const commentsXml = + ` { + const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' }); + const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-race-test-')); + tmpDirs.push(dir); + const filePath = path.join(dir, 'race.docx'); + const documentXml = + `` + + `` + + `Hi`; + const buf = await makeDocxWithDocumentXml(documentXml); + await fs.writeFile(filePath, new Uint8Array(buf)); + + const session = await mgr.createSession(buf, 'race.docx', filePath); + const [a, b, c] = await Promise.all([ + getRevisionContextForSession(session), + getRevisionContextForSession(session), + getRevisionContextForSession(session), + ]); + + expect(a).toBeDefined(); + expect(b).toBeDefined(); + expect(c).toBeDefined(); + expect(session.revisionIdState?.nextId).toBe(11); + }); }); // ── getSessionByPath ──────────────────────────────────────────────── diff --git a/packages/docx-mcp/src/session/manager.ts b/packages/docx-mcp/src/session/manager.ts index 5fb7353..f244d8b 100644 --- a/packages/docx-mcp/src/session/manager.ts +++ b/packages/docx-mcp/src/session/manager.ts @@ -4,8 +4,10 @@ import os from 'node:os'; import fs from 'node:fs/promises'; import { DocxDocument, + DocxZip, createRevisionContext, createRevisionIdState, + parseXml, type NormalizationResult, type ParagraphRevision, type ReconstructionMode, @@ -94,6 +96,55 @@ export type Session = DocxSession | GDocsSession; const WORDPROCESSING_ML_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'; +/** + * WordprocessingML elements that carry package-wide revision `w:id` attributes. + * Limiting the seed scan to these elements prevents non-revision IDs (e.g., + * ``, ``, ``) from + * spuriously inflating the starting revision-id counter. + */ +const REVISION_ID_ELEMENT_LOCAL_NAMES = new Set([ + 'ins', + 'del', + 'moveFrom', + 'moveTo', + 'moveFromRangeStart', + 'moveFromRangeEnd', + 'moveToRangeStart', + 'moveToRangeEnd', + 'pPrChange', + 'rPrChange', + 'tblPrChange', + 'trPrChange', + 'tcPrChange', + 'sectPrChange', + 'cellIns', + 'cellDel', + 'cellMerge', + 'customXmlInsRangeStart', + 'customXmlInsRangeEnd', + 'customXmlDelRangeStart', + 'customXmlDelRangeEnd', + 'customXmlMoveFromRangeStart', + 'customXmlMoveFromRangeEnd', + 'customXmlMoveToRangeStart', + 'customXmlMoveToRangeEnd', +]); + +/** + * Fixed package paths that can carry package-wide revision attributes. + * `commentsExtended.xml` and `people.xml` are intentionally excluded — they + * use `w15:paraId` / `w15:author` identifiers, not revision `w:id` values. + */ +const FIXED_REVISION_ID_SEED_PARTS = [ + 'word/comments.xml', + 'word/footnotes.xml', + 'word/endnotes.xml', + 'word/glossary/document.xml', +] as const; + +/** Matches numbered header/footer parts such as `word/header1.xml`. */ +const NUMBERED_HEADER_FOOTER_RE = /^word\/(header|footer)\d*\.xml$/; + function normalizeAiAuthor(author: string | null | undefined): string | null { if (typeof author !== 'string') return null; const trimmed = author.trim(); @@ -114,24 +165,22 @@ function getWordIdValue(element: Element): number | null { * Compute a starting `RevisionIdState` whose first allocated `w:id` is * higher than any existing revision id found in the supplied documents. * - * **Known limitation (see follow-up to #165):** this scans only the - * documents passed in. Today's caller passes `documentXml` only, so any - * pre-existing revision in side parts (`comments.xml`, `footnotesXml`, - * `endnotes.xml`) is invisible to the seed scan. For typical sessions - * with <100 AI edits this never collides in practice; for long-running - * sessions on heavily-reviewed third-party documents the AI counter - * could eventually cross a pre-existing side-part revision id. + * Only `w:id` attributes on revision-bearing elements + * (`REVISION_ID_ELEMENT_LOCAL_NAMES`) are considered. Non-revision IDs such + * as `` or `` share the attribute name but + * occupy a different ID space and must not influence the counter. * - * To close the gap, future work should pre-load side parts via - * `zip.readText('word/comments.xml')` etc. and pass them all here. - * That cascade requires `getRevisionContextForSession` to become async, - * which is deferred to follow-up. + * Callers should pass every available story/metadata part that can contain + * package-wide revision attributes, not just `document.xml`. */ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdState { let maxId = 0; for (const doc of docs) { for (const node of Array.from(doc.getElementsByTagName('*'))) { + const localName = node.localName ?? ''; + if (!REVISION_ID_ELEMENT_LOCAL_NAMES.has(localName)) continue; + if (node.namespaceURI && node.namespaceURI !== WORDPROCESSING_ML_NS) continue; const value = getWordIdValue(node); if (value !== null && value > maxId) { maxId = value; @@ -142,11 +191,69 @@ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdSta return createRevisionIdState(maxId + 1); } -export function getRevisionContextForSession(session: DocxSession): RevisionContext | undefined { +async function getSidePartRevisionSeedDocs(buffer: Buffer): Promise { + const docs: Document[] = []; + + let zip: DocxZip; + try { + zip = await DocxZip.load(buffer); + } catch { + return docs; + } + + const seedPaths = new Set(FIXED_REVISION_ID_SEED_PARTS); + for (const entry of zip.listFiles()) { + if (NUMBERED_HEADER_FOOTER_RE.test(entry)) seedPaths.add(entry); + } + + for (const partPath of seedPaths) { + if (!zip.hasFile(partPath)) continue; + let xml: string | null; + try { + xml = await zip.readTextOrNull(partPath); + } catch { + continue; + } + if (!xml) continue; + try { + docs.push(parseXml(xml)); + } catch { + // Malformed optional side part — skip so an unrelated parse failure + // does not block every tracked edit on the session. + } + } + + return docs; +} + +/** + * Single-flight guard around the first revision-id seed scan per session. + * Concurrent first callers await the same in-flight scan and assign the same + * result, preventing a slower scan from clobbering a counter that a faster + * caller has already advanced via emitted revisions. + */ +const revisionIdSeedPromises = new WeakMap>(); + +export async function getRevisionContextForSession(session: DocxSession): Promise { if (!session.aiAuthor) return undefined; if (!session.revisionIdState) { - session.revisionIdState = inferStartingRevisionIdState(session.doc.getDocumentXmlClone()); + let pending = revisionIdSeedPromises.get(session); + if (!pending) { + pending = (async () => { + const sideDocs = await getSidePartRevisionSeedDocs(session.originalBuffer); + return inferStartingRevisionIdState(session.doc.getDocumentXmlClone(), ...sideDocs); + })(); + revisionIdSeedPromises.set(session, pending); + } + try { + const seeded = await pending; + if (!session.revisionIdState) { + session.revisionIdState = seeded; + } + } finally { + revisionIdSeedPromises.delete(session); + } } return createRevisionContext({ diff --git a/packages/docx-mcp/src/tools/add_comment.ts b/packages/docx-mcp/src/tools/add_comment.ts index fdc2ec8..7a165f1 100644 --- a/packages/docx-mcp/src/tools/add_comment.ts +++ b/packages/docx-mcp/src/tools/add_comment.ts @@ -19,7 +19,7 @@ export async function addComment( const resolved = await resolveSessionForTool(manager, params, { toolName: 'add_comment' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); try { // Reply mode: parent_comment_id provided diff --git a/packages/docx-mcp/src/tools/add_footnote.ts b/packages/docx-mcp/src/tools/add_footnote.ts index 62773a8..1fdfb66 100644 --- a/packages/docx-mcp/src/tools/add_footnote.ts +++ b/packages/docx-mcp/src/tools/add_footnote.ts @@ -15,7 +15,7 @@ export async function addFootnote( const resolved = await resolveSessionForTool(manager, params, { toolName: 'add_footnote' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (!params.target_paragraph_id) { return err('MISSING_PARAMETER', 'target_paragraph_id is required.', 'Provide the _bk_* ID of the paragraph to anchor the footnote to.'); diff --git a/packages/docx-mcp/src/tools/apply_plan.ts b/packages/docx-mcp/src/tools/apply_plan.ts index 4ead732..84fff57 100644 --- a/packages/docx-mcp/src/tools/apply_plan.ts +++ b/packages/docx-mcp/src/tools/apply_plan.ts @@ -407,7 +407,7 @@ export async function applyPlan( const allWarnings = validations.flatMap((v) => v.warnings.map((w) => ({ step_id: v.step_id, warning: w }))); // Apply phase — execute steps sequentially - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); const result = await executeSteps(manager, manager.normalizePath(session.originalPath), steps, ctx); if (result.failed_step_id !== undefined) { diff --git a/packages/docx-mcp/src/tools/clear_formatting.ts b/packages/docx-mcp/src/tools/clear_formatting.ts index 6e3dd1a..05fbd19 100644 --- a/packages/docx-mcp/src/tools/clear_formatting.ts +++ b/packages/docx-mcp/src/tools/clear_formatting.ts @@ -71,7 +71,7 @@ export async function clearFormatting( const resolved = await resolveSessionForTool(manager, params, { toolName: 'clear_formatting' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const revisionCtx = ctx ?? getRevisionContextForSession(session); + const revisionCtx = ctx ?? await getRevisionContextForSession(session); const { nodes } = session.doc.buildDocumentView({ includeSemanticTags: false }); const pids = params.paragraph_ids ?? nodes.map((n) => n.id); diff --git a/packages/docx-mcp/src/tools/delete_comment.ts b/packages/docx-mcp/src/tools/delete_comment.ts index f68f521..ec31228 100644 --- a/packages/docx-mcp/src/tools/delete_comment.ts +++ b/packages/docx-mcp/src/tools/delete_comment.ts @@ -13,7 +13,7 @@ export async function deleteComment( const resolved = await resolveSessionForTool(manager, params, { toolName: 'delete_comment' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (params.comment_id == null) { return err('MISSING_PARAMETER', 'comment_id is required.', 'Provide the comment ID to delete.'); diff --git a/packages/docx-mcp/src/tools/delete_footnote.ts b/packages/docx-mcp/src/tools/delete_footnote.ts index 0bab4a9..8e27c94 100644 --- a/packages/docx-mcp/src/tools/delete_footnote.ts +++ b/packages/docx-mcp/src/tools/delete_footnote.ts @@ -13,7 +13,7 @@ export async function deleteFootnote( const resolved = await resolveSessionForTool(manager, params, { toolName: 'delete_footnote' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (params.note_id == null) { return err('MISSING_PARAMETER', 'note_id is required.', 'Provide the footnote ID to delete.'); diff --git a/packages/docx-mcp/src/tools/format_layout.ts b/packages/docx-mcp/src/tools/format_layout.ts index 4060fb3..bed5791 100644 --- a/packages/docx-mcp/src/tools/format_layout.ts +++ b/packages/docx-mcp/src/tools/format_layout.ts @@ -255,7 +255,7 @@ export async function formatLayout( const resolved = await resolveSessionForTool(manager, params, { toolName: 'format_layout' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); const strict = params.strict ?? true; if (typeof strict !== 'boolean') { diff --git a/packages/docx-mcp/src/tools/insert_paragraph.ts b/packages/docx-mcp/src/tools/insert_paragraph.ts index 6044d51..901f967 100644 --- a/packages/docx-mcp/src/tools/insert_paragraph.ts +++ b/packages/docx-mcp/src/tools/insert_paragraph.ts @@ -127,7 +127,7 @@ export async function insertParagraph( const resolved = await resolveSessionForTool(manager, params, { toolName: 'insert_paragraph' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const revisionCtx = ctx ?? getRevisionContextForSession(session); + const revisionCtx = ctx ?? await getRevisionContextForSession(session); const positionUpper = (params.position ?? 'AFTER').toUpperCase(); if (positionUpper !== 'BEFORE' && positionUpper !== 'AFTER') { diff --git a/packages/docx-mcp/src/tools/replace_text.ts b/packages/docx-mcp/src/tools/replace_text.ts index cdf48f8..581d179 100644 --- a/packages/docx-mcp/src/tools/replace_text.ts +++ b/packages/docx-mcp/src/tools/replace_text.ts @@ -174,7 +174,7 @@ export async function replaceText( const resolved = await resolveSessionForTool(manager, params, { toolName: 'replace_text' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const revisionCtx = ctx ?? getRevisionContextForSession(session); + const revisionCtx = ctx ?? await getRevisionContextForSession(session); if (params.normalize_first) { session.doc.mergeRunsOnly(); diff --git a/packages/docx-mcp/src/tools/update_footnote.ts b/packages/docx-mcp/src/tools/update_footnote.ts index bdaa239..6ffef20 100644 --- a/packages/docx-mcp/src/tools/update_footnote.ts +++ b/packages/docx-mcp/src/tools/update_footnote.ts @@ -14,7 +14,7 @@ export async function updateFootnote( const resolved = await resolveSessionForTool(manager, params, { toolName: 'update_footnote' }); if (!resolved.ok) return resolved.response; const { session, metadata } = resolved; - const ctx = getRevisionContextForSession(session); + const ctx = await getRevisionContextForSession(session); if (params.note_id == null) { return err('MISSING_PARAMETER', 'note_id is required.', 'Provide the footnote ID to update.');