Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 152 additions & 1 deletion packages/docx-mcp/src/session/manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,162 @@ describe('revision context helpers', () => {
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'tracked.docx', filePath);
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);

expect(ctx).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(43);
});

test('initializes revision ids above pre-existing side-part w:id values', async () => {
const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' });
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-sidepart-test-'));
tmpDirs.push(dir);
const filePath = path.join(dir, 'tracked-sidepart.docx');
const documentXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body>` +
`<w:p><w:ins w:id="42" w:author="Existing" w:date="2026-01-01T00:00:00Z"><w:r><w:t>Prior</w:t></w:r></w:ins></w:p>` +
`</w:body></w:document>`;
const commentsXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:comment w:id="0" w:author="Reviewer" w:date="2026-01-01T00:00:00Z">` +
`<w:p><w:ins w:id="500" w:author="Reviewer" w:date="2026-01-01T00:00:00Z"><w:r><w:t>Side part</w:t></w:r></w:ins></w:p>` +
`</w:comment>` +
`</w:comments>`;
const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml });
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'tracked-sidepart.docx', filePath);
const ctx = await getRevisionContextForSession(session);

expect(ctx).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(501);
});

test('ignores non-revision w:id attributes (e.g., <w:comment w:id>) in side parts', async () => {
// Comment IDs and revision IDs share an attribute name but occupy
// separate ID spaces — only revision-bearing elements should seed.
const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' });
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-nonrevid-test-'));
tmpDirs.push(dir);
const filePath = path.join(dir, 'nonrev.docx');
const documentXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body><w:p><w:r><w:t>Plain</w:t></w:r></w:p></w:body></w:document>`;
const commentsXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:comment w:id="500" w:author="Reviewer" w:date="2026-01-01T00:00:00Z">` +
`<w:p><w:r><w:t>Comment body</w:t></w:r></w:p>` +
`</w:comment></w:comments>`;
const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml });
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'nonrev.docx', filePath);
const ctx = await getRevisionContextForSession(session);

expect(ctx).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(1);
});

test('initializes revision ids above pre-existing header w:id values', async () => {
const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' });
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-header-test-'));
tmpDirs.push(dir);
const filePath = path.join(dir, 'tracked-header.docx');
const documentXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body><w:p><w:r><w:t>Body</w:t></w:r></w:p></w:body></w:document>`;
const headerXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:hdr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:p><w:ins w:id="900" w:author="Reviewer" w:date="2026-01-01T00:00:00Z"><w:r><w:t>Header</w:t></w:r></w:ins></w:p>` +
`</w:hdr>`;
const buf = await makeDocxWithDocumentXml(documentXml, { 'word/header1.xml': headerXml });
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'tracked-header.docx', filePath);
const ctx = await getRevisionContextForSession(session);

expect(ctx).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(901);
});

test('initializes revision ids above pre-existing footer w:id values', async () => {
const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' });
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-footer-test-'));
tmpDirs.push(dir);
const filePath = path.join(dir, 'tracked-footer.docx');
const documentXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body><w:p><w:r><w:t>Body</w:t></w:r></w:p></w:body></w:document>`;
const footerXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:ftr xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:p><w:ins w:id="1234" w:author="Reviewer" w:date="2026-01-01T00:00:00Z"><w:r><w:t>Footer</w:t></w:r></w:ins></w:p>` +
`</w:ftr>`;
const buf = await makeDocxWithDocumentXml(documentXml, { 'word/footer2.xml': footerXml });
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'tracked-footer.docx', filePath);
const ctx = await getRevisionContextForSession(session);

expect(ctx).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(1235);
});

test('skips malformed optional side parts and continues seeding from document.xml', async () => {
const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' });
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-malformed-test-'));
tmpDirs.push(dir);
const filePath = path.join(dir, 'malformed.docx');
const documentXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body><w:p><w:ins w:id="77" w:author="Existing" w:date="2026-01-01T00:00:00Z"><w:r><w:t>Body</w:t></w:r></w:ins></w:p></w:body></w:document>`;
// Truncated/unterminated comments.xml — parseXml must throw, but the
// session must remain editable rather than crashing the first tool call.
const commentsXml =
`<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:comment`;
const buf = await makeDocxWithDocumentXml(documentXml, { 'word/comments.xml': commentsXml });
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'malformed.docx', filePath);
const ctx = await getRevisionContextForSession(session);

expect(ctx).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(78);
});

test('concurrent first callers resolve to a single seeded RevisionIdState', async () => {
const mgr = new SessionManager({ defaultAiAuthor: 'SafeDocX' });
const dir = await fs.mkdtemp(path.join(os.tmpdir(), 'mgr-revision-race-test-'));
tmpDirs.push(dir);
const filePath = path.join(dir, 'race.docx');
const documentXml =
`<?xml version="1.0" encoding="UTF-8" standalone="yes"?>` +
`<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">` +
`<w:body><w:p><w:ins w:id="10" w:author="x" w:date="2026-01-01T00:00:00Z"><w:r><w:t>Hi</w:t></w:r></w:ins></w:p></w:body></w:document>`;
const buf = await makeDocxWithDocumentXml(documentXml);
await fs.writeFile(filePath, new Uint8Array(buf));

const session = await mgr.createSession(buf, 'race.docx', filePath);
const [a, b, c] = await Promise.all([
getRevisionContextForSession(session),
getRevisionContextForSession(session),
getRevisionContextForSession(session),
]);

expect(a).toBeDefined();
expect(b).toBeDefined();
expect(c).toBeDefined();
expect(session.revisionIdState?.nextId).toBe(11);
});
});

// ── getSessionByPath ────────────────────────────────────────────────
Expand Down
133 changes: 120 additions & 13 deletions packages/docx-mcp/src/session/manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import os from 'node:os';
import fs from 'node:fs/promises';
import {
DocxDocument,
DocxZip,
createRevisionContext,
createRevisionIdState,
parseXml,
type NormalizationResult,
type ParagraphRevision,
type ReconstructionMode,
Expand Down Expand Up @@ -94,6 +96,55 @@ export type Session = DocxSession | GDocsSession;

const WORDPROCESSING_ML_NS = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';

/**
* WordprocessingML elements that carry package-wide revision `w:id` attributes.
* Limiting the seed scan to these elements prevents non-revision IDs (e.g.,
* `<w:comment w:id>`, `<w:footnote w:id>`, `<w:bookmarkStart w:id>`) from
* spuriously inflating the starting revision-id counter.
*/
const REVISION_ID_ELEMENT_LOCAL_NAMES = new Set<string>([
'ins',
'del',
'moveFrom',
'moveTo',
'moveFromRangeStart',
'moveFromRangeEnd',
'moveToRangeStart',
'moveToRangeEnd',
'pPrChange',
'rPrChange',
'tblPrChange',
'trPrChange',
'tcPrChange',
'sectPrChange',
'cellIns',
'cellDel',
'cellMerge',
'customXmlInsRangeStart',
'customXmlInsRangeEnd',
'customXmlDelRangeStart',
'customXmlDelRangeEnd',
'customXmlMoveFromRangeStart',
'customXmlMoveFromRangeEnd',
'customXmlMoveToRangeStart',
'customXmlMoveToRangeEnd',
]);

/**
* Fixed package paths that can carry package-wide revision attributes.
* `commentsExtended.xml` and `people.xml` are intentionally excluded — they
* use `w15:paraId` / `w15:author` identifiers, not revision `w:id` values.
*/
const FIXED_REVISION_ID_SEED_PARTS = [
'word/comments.xml',
'word/footnotes.xml',
'word/endnotes.xml',
'word/glossary/document.xml',
] as const;

/** Matches numbered header/footer parts such as `word/header1.xml`. */
const NUMBERED_HEADER_FOOTER_RE = /^word\/(header|footer)\d*\.xml$/;

function normalizeAiAuthor(author: string | null | undefined): string | null {
if (typeof author !== 'string') return null;
const trimmed = author.trim();
Expand All @@ -114,24 +165,22 @@ function getWordIdValue(element: Element): number | null {
* Compute a starting `RevisionIdState` whose first allocated `w:id` is
* higher than any existing revision id found in the supplied documents.
*
* **Known limitation (see follow-up to #165):** this scans only the
* documents passed in. Today's caller passes `documentXml` only, so any
* pre-existing revision in side parts (`comments.xml`, `footnotesXml`,
* `endnotes.xml`) is invisible to the seed scan. For typical sessions
* with <100 AI edits this never collides in practice; for long-running
* sessions on heavily-reviewed third-party documents the AI counter
* could eventually cross a pre-existing side-part revision id.
* Only `w:id` attributes on revision-bearing elements
* (`REVISION_ID_ELEMENT_LOCAL_NAMES`) are considered. Non-revision IDs such
* as `<w:comment w:id>` or `<w:footnote w:id>` share the attribute name but
* occupy a different ID space and must not influence the counter.
*
* To close the gap, future work should pre-load side parts via
* `zip.readText('word/comments.xml')` etc. and pass them all here.
* That cascade requires `getRevisionContextForSession` to become async,
* which is deferred to follow-up.
* Callers should pass every available story/metadata part that can contain
* package-wide revision attributes, not just `document.xml`.
*/
export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdState {
let maxId = 0;

for (const doc of docs) {
for (const node of Array.from(doc.getElementsByTagName('*'))) {
const localName = node.localName ?? '';
if (!REVISION_ID_ELEMENT_LOCAL_NAMES.has(localName)) continue;
if (node.namespaceURI && node.namespaceURI !== WORDPROCESSING_ML_NS) continue;
const value = getWordIdValue(node);
if (value !== null && value > maxId) {
maxId = value;
Expand All @@ -142,11 +191,69 @@ export function inferStartingRevisionIdState(...docs: Document[]): RevisionIdSta
return createRevisionIdState(maxId + 1);
}

export function getRevisionContextForSession(session: DocxSession): RevisionContext | undefined {
async function getSidePartRevisionSeedDocs(buffer: Buffer): Promise<Document[]> {
const docs: Document[] = [];

let zip: DocxZip;
try {
zip = await DocxZip.load(buffer);
} catch {
return docs;
}

const seedPaths = new Set<string>(FIXED_REVISION_ID_SEED_PARTS);
for (const entry of zip.listFiles()) {
if (NUMBERED_HEADER_FOOTER_RE.test(entry)) seedPaths.add(entry);
}

for (const partPath of seedPaths) {
if (!zip.hasFile(partPath)) continue;
let xml: string | null;
try {
xml = await zip.readTextOrNull(partPath);
} catch {
continue;
}
if (!xml) continue;
try {
docs.push(parseXml(xml));
} catch {
// Malformed optional side part — skip so an unrelated parse failure
// does not block every tracked edit on the session.
}
}

return docs;
}

/**
* Single-flight guard around the first revision-id seed scan per session.
* Concurrent first callers await the same in-flight scan and assign the same
* result, preventing a slower scan from clobbering a counter that a faster
* caller has already advanced via emitted revisions.
*/
const revisionIdSeedPromises = new WeakMap<DocxSession, Promise<RevisionIdState>>();

export async function getRevisionContextForSession(session: DocxSession): Promise<RevisionContext | undefined> {
if (!session.aiAuthor) return undefined;

if (!session.revisionIdState) {
session.revisionIdState = inferStartingRevisionIdState(session.doc.getDocumentXmlClone());
let pending = revisionIdSeedPromises.get(session);
if (!pending) {
pending = (async () => {
const sideDocs = await getSidePartRevisionSeedDocs(session.originalBuffer);
return inferStartingRevisionIdState(session.doc.getDocumentXmlClone(), ...sideDocs);
})();
revisionIdSeedPromises.set(session, pending);
}
try {
const seeded = await pending;
if (!session.revisionIdState) {
session.revisionIdState = seeded;
}
} finally {
revisionIdSeedPromises.delete(session);
}
}

return createRevisionContext({
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/add_comment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export async function addComment(
const resolved = await resolveSessionForTool(manager, params, { toolName: 'add_comment' });
if (!resolved.ok) return resolved.response;
const { session, metadata } = resolved;
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);

try {
// Reply mode: parent_comment_id provided
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/add_footnote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export async function addFootnote(
const resolved = await resolveSessionForTool(manager, params, { toolName: 'add_footnote' });
if (!resolved.ok) return resolved.response;
const { session, metadata } = resolved;
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);

if (!params.target_paragraph_id) {
return err('MISSING_PARAMETER', 'target_paragraph_id is required.', 'Provide the _bk_* ID of the paragraph to anchor the footnote to.');
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/apply_plan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ export async function applyPlan(
const allWarnings = validations.flatMap((v) => v.warnings.map((w) => ({ step_id: v.step_id, warning: w })));

// Apply phase — execute steps sequentially
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);
const result = await executeSteps(manager, manager.normalizePath(session.originalPath), steps, ctx);

if (result.failed_step_id !== undefined) {
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/clear_formatting.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export async function clearFormatting(
const resolved = await resolveSessionForTool(manager, params, { toolName: 'clear_formatting' });
if (!resolved.ok) return resolved.response;
const { session, metadata } = resolved;
const revisionCtx = ctx ?? getRevisionContextForSession(session);
const revisionCtx = ctx ?? await getRevisionContextForSession(session);

const { nodes } = session.doc.buildDocumentView({ includeSemanticTags: false });
const pids = params.paragraph_ids ?? nodes.map((n) => n.id);
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/delete_comment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export async function deleteComment(
const resolved = await resolveSessionForTool(manager, params, { toolName: 'delete_comment' });
if (!resolved.ok) return resolved.response;
const { session, metadata } = resolved;
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);

if (params.comment_id == null) {
return err('MISSING_PARAMETER', 'comment_id is required.', 'Provide the comment ID to delete.');
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/delete_footnote.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export async function deleteFootnote(
const resolved = await resolveSessionForTool(manager, params, { toolName: 'delete_footnote' });
if (!resolved.ok) return resolved.response;
const { session, metadata } = resolved;
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);

if (params.note_id == null) {
return err('MISSING_PARAMETER', 'note_id is required.', 'Provide the footnote ID to delete.');
Expand Down
2 changes: 1 addition & 1 deletion packages/docx-mcp/src/tools/format_layout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ export async function formatLayout(
const resolved = await resolveSessionForTool(manager, params, { toolName: 'format_layout' });
if (!resolved.ok) return resolved.response;
const { session, metadata } = resolved;
const ctx = getRevisionContextForSession(session);
const ctx = await getRevisionContextForSession(session);

const strict = params.strict ?? true;
if (typeof strict !== 'boolean') {
Expand Down
Loading
Loading