From 68575ef02a8da2eb8ec180ad415dcea3f0664d8c Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 15 Mar 2026 20:12:32 +0000 Subject: [PATCH 1/8] feat: add incremental annotation overlay API (Issue #106) Decouple HTML conversion from annotation projection to avoid full WASM re-conversion when annotations change. New API enables: - ProjectAnnotationsOntoHtml: overlay annotations on cached HTML - AddAnnotationToHtml: add single annotation without re-conversion - RemoveAnnotationFromHtml: remove annotation by ID, preserving text - GenerateVisibilityCss: CSS-based label toggling without re-rendering - GenerateAnnotationCssString: independent CSS generation All methods available across .NET, WASM (JSExport), and npm/TS layers. https://claude.ai/code/session_01EQQ8N9xQoSSogqhsXWn3sF --- CHANGELOG.md | 10 + Docxodus.Tests/ExternalAnnotationTests.cs | 186 +++++++++++++++++ Docxodus/ExternalAnnotationProjector.cs | 234 ++++++++++++++++++++- npm/src/index.ts | 233 +++++++++++++++++++++ npm/src/types.ts | 28 +++ wasm/DocxodusWasm/DocumentConverter.cs | 238 ++++++++++++++++++++++ wasm/DocxodusWasm/JsonContext.cs | 14 ++ 7 files changed, 940 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ccd5af..6496843 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file. ## [Unreleased] - .NET 8 / Open XML SDK 3.x Migration +### Added +- **Incremental annotation overlay API (Issue #106)** - Decouple HTML conversion from annotation projection to avoid full WASM re-conversion + - `ProjectAnnotationsOntoHtml()` - Project a full annotation set onto already-converted HTML + - `AddAnnotationToHtml()` - Add a single annotation to existing HTML without re-converting the document + - `RemoveAnnotationFromHtml()` - Remove a single annotation by ID, unwrapping spans back to plain text + - `GenerateVisibilityCss()` - Generate CSS to hide/show annotations by label ID for instant toggling + - `GenerateAnnotationCssString()` - Generate annotation CSS separately for independent management + - All methods available in .NET, WASM (JSExport), and npm TypeScript wrapper + - CSS-based label filtering enables responsive toggle without any re-rendering + ### Fixed - **Move markup Word compatibility (Issue #96)** - Documents with move operations no longer cause Word "unreadable content" warnings - Added `SimplifyMoveMarkup` setting to convert native move markup (`w:moveFrom`/`w:moveTo`) to simple `w:del`/`w:ins` diff --git a/Docxodus.Tests/ExternalAnnotationTests.cs b/Docxodus.Tests/ExternalAnnotationTests.cs index b53a492..325f350 100644 --- a/Docxodus.Tests/ExternalAnnotationTests.cs +++ b/Docxodus.Tests/ExternalAnnotationTests.cs @@ -555,6 +555,192 @@ public void EA060_Integration_RealDocument_CreatesAndValidatesSet() } #endregion + + #region Incremental Annotation Overlay Tests (Issue #106) + + [Fact] + public void EA020_ProjectAnnotationsOntoHtml_AddsAnnotationSpans() + { + // Arrange + var doc = CreateSimpleTestDocument("Hello, world! This is a test document."); + var set = ExternalAnnotationManager.CreateAnnotationSet(doc, "test"); + + set.TextLabels["GREETING"] = new AnnotationLabel + { + Id = "GREETING", + Text = "Greeting", + Color = "#FFEB3B" + }; + + var annotation = ExternalAnnotationManager.CreateAnnotation( + "ann-001", "GREETING", set.Content, 0, 5); + Assert.NotNull(annotation); + set.LabelledText.Add(annotation); + + // Convert HTML once (without annotations) + var baseHtml = WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings + { + PageTitle = "Test" + }).ToString(); + + // Act - project annotations onto cached HTML + var annotatedHtml = ExternalAnnotationProjector.ProjectAnnotationsOntoHtml( + baseHtml, set); + + // Assert + Assert.Contains("data-annotation-id=\"ann-001\"", annotatedHtml); + Assert.Contains("ext-annot-highlight", annotatedHtml); + Assert.Contains("--annot-color: #FFEB3B", annotatedHtml); + } + + [Fact] + public void EA021_AddAnnotationToHtml_AddsSingleAnnotation() + { + // Arrange + var doc = CreateSimpleTestDocument("Hello, world! This is a test document."); + var baseHtml = WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings + { + PageTitle = "Test" + }).ToString(); + + var set = ExternalAnnotationManager.CreateAnnotationSet(doc, "test"); + var annotation = ExternalAnnotationManager.CreateAnnotation( + "ann-single", "CLAUSE", set.Content, 0, 5); + Assert.NotNull(annotation); + + var label = new AnnotationLabel + { + Id = "CLAUSE", + Text = "Clause", + Color = "#FF5722" + }; + + // Act + var result = ExternalAnnotationProjector.AddAnnotationToHtml( + baseHtml, annotation, label); + + // Assert + Assert.Contains("data-annotation-id=\"ann-single\"", result); + Assert.Contains("--annot-color: #FF5722", result); + } + + [Fact] + public void EA022_RemoveAnnotationFromHtml_RemovesAnnotationSpans() + { + // Arrange - first project an annotation + var doc = CreateSimpleTestDocument("Hello, world!"); + var set = ExternalAnnotationManager.CreateAnnotationSet(doc, "test"); + + set.TextLabels["GREETING"] = new AnnotationLabel + { + Id = "GREETING", + Text = "Greeting", + Color = "#FFEB3B" + }; + + var annotation = ExternalAnnotationManager.CreateAnnotation( + "ann-remove", "GREETING", set.Content, 0, 5); + Assert.NotNull(annotation); + set.LabelledText.Add(annotation); + + var baseHtml = WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings + { + PageTitle = "Test" + }).ToString(); + + var annotatedHtml = ExternalAnnotationProjector.ProjectAnnotationsOntoHtml( + baseHtml, set); + Assert.Contains("data-annotation-id=\"ann-remove\"", annotatedHtml); + + // Act + var result = ExternalAnnotationProjector.RemoveAnnotationFromHtml( + annotatedHtml, "ann-remove"); + + // Assert - annotation spans should be removed + Assert.DoesNotContain("data-annotation-id=\"ann-remove\"", result); + // But the text should still be there + Assert.Contains("Hello", result); + } + + [Fact] + public void EA023_GenerateVisibilityCss_HidesSpecifiedLabels() + { + // Act + var css = ExternalAnnotationProjector.GenerateVisibilityCss( + new[] { "DRAFT", "INTERNAL" }); + + // Assert + Assert.Contains("data-label-id=\"DRAFT\"", css); + Assert.Contains("data-label-id=\"INTERNAL\"", css); + Assert.Contains("background-color: transparent", css); + Assert.Contains("display: none", css); + } + + [Fact] + public void EA024_GenerateAnnotationCssString_GeneratesValidCss() + { + // Arrange + var labels = new Dictionary + { + ["CLAUSE"] = new AnnotationLabel + { + Id = "CLAUSE", + Text = "Clause", + Color = "#FF5722" + }, + ["TERM"] = new AnnotationLabel + { + Id = "TERM", + Text = "Term", + Color = "#2196F3" + } + }; + + // Act + var css = ExternalAnnotationProjector.GenerateAnnotationCssString(labels); + + // Assert + Assert.Contains("ext-annot-highlight", css); + Assert.Contains("ext-annot-label-CLAUSE", css); + Assert.Contains("#FF5722", css); + Assert.Contains("ext-annot-label-TERM", css); + Assert.Contains("#2196F3", css); + } + + [Fact] + public void EA025_ProjectAnnotationsOntoHtml_ThenRemove_PreservesText() + { + // Arrange + var doc = CreateSimpleTestDocument("Alpha Beta Gamma Delta"); + var set = ExternalAnnotationManager.CreateAnnotationSet(doc, "test"); + + set.TextLabels["LABEL_A"] = new AnnotationLabel { Id = "LABEL_A", Text = "A", Color = "#FF0000" }; + set.TextLabels["LABEL_B"] = new AnnotationLabel { Id = "LABEL_B", Text = "B", Color = "#00FF00" }; + + var ann1 = ExternalAnnotationManager.CreateAnnotation("ann-a", "LABEL_A", set.Content, 0, 5); + var ann2 = ExternalAnnotationManager.CreateAnnotation("ann-b", "LABEL_B", set.Content, 6, 10); + Assert.NotNull(ann1); + Assert.NotNull(ann2); + set.LabelledText.Add(ann1); + set.LabelledText.Add(ann2); + + var baseHtml = WmlToHtmlConverter.ConvertToHtml(doc, new WmlToHtmlConverterSettings + { + PageTitle = "Test" + }).ToString(); + + // Act - project both, then remove one + var annotatedHtml = ExternalAnnotationProjector.ProjectAnnotationsOntoHtml(baseHtml, set); + var afterRemove = ExternalAnnotationProjector.RemoveAnnotationFromHtml(annotatedHtml, "ann-a"); + + // Assert - ann-a removed, ann-b still present, all text preserved + Assert.DoesNotContain("data-annotation-id=\"ann-a\"", afterRemove); + Assert.Contains("data-annotation-id=\"ann-b\"", afterRemove); + Assert.Contains("Alpha", afterRemove); + Assert.Contains("Beta", afterRemove); + } + + #endregion } } diff --git a/Docxodus/ExternalAnnotationProjector.cs b/Docxodus/ExternalAnnotationProjector.cs index ddab613..bf9b684 100644 --- a/Docxodus/ExternalAnnotationProjector.cs +++ b/Docxodus/ExternalAnnotationProjector.cs @@ -368,10 +368,228 @@ private static XElement CreateAnnotationWrapper( #endregion - #region CSS Generation + #region Incremental Annotation API - private static void AddAnnotationCss( + /// + /// Project annotations onto an HTML string (already converted from DOCX). + /// This avoids re-converting the DOCX when only annotations change. + /// + /// HTML string (previously converted via WmlToHtmlConverter). + /// The external annotation set to project. + /// Projection settings. + /// HTML string with annotations projected. + public static string ProjectAnnotationsOntoHtml( + string html, + ExternalAnnotationSet annotationSet, + ExternalAnnotationProjectionSettings? settings = null) + { + if (string.IsNullOrEmpty(html)) throw new ArgumentNullException(nameof(html)); + if (annotationSet == null) throw new ArgumentNullException(nameof(annotationSet)); + settings ??= new ExternalAnnotationProjectionSettings(); + + var htmlDoc = XElement.Parse(html); + var result = ProjectAnnotations(htmlDoc, annotationSet, settings); + return result.ToString(); + } + + /// + /// Add a single annotation to existing HTML without re-converting the document. + /// The HTML should already be converted (with or without other annotations). + /// + /// HTML string. + /// The annotation to add. + /// Label definition for the annotation. + /// Projection settings. + /// HTML string with the annotation added. + public static string AddAnnotationToHtml( + string html, + OpenContractsAnnotation annotation, + AnnotationLabel? label, + ExternalAnnotationProjectionSettings? settings = null) + { + if (string.IsNullOrEmpty(html)) throw new ArgumentNullException(nameof(html)); + if (annotation == null) throw new ArgumentNullException(nameof(annotation)); + settings ??= new ExternalAnnotationProjectionSettings(); + + var htmlDoc = XElement.Parse(html); + + // Build text map and find annotation location + var textMap = BuildTextMap(htmlDoc); + var htmlText = GetHtmlText(textMap); + var usedOffsets = new HashSet(); + + if (annotation.AnnotationJson is TextSpan span) + { + var searchText = span.Text ?? annotation.RawText; + if (!string.IsNullOrEmpty(searchText)) + { + var htmlLocation = FindTextInHtml(htmlText, searchText, usedOffsets); + if (htmlLocation != null) + { + var htmlSpan = new TextSpan + { + Id = span.Id, + Start = htmlLocation.Value.start, + End = htmlLocation.Value.end, + Text = searchText + }; + + textMap = BuildTextMap(htmlDoc); + ProjectSingleAnnotation(htmlDoc, textMap, annotation, htmlSpan, label, settings); + } + } + } + + // Add per-annotation CSS (label color class) + if (label != null) + { + AddSingleAnnotationCss(htmlDoc, annotation, label, settings); + } + + return htmlDoc.ToString(); + } + + /// + /// Remove a single annotation from HTML by annotation ID. + /// Unwraps annotation spans back to plain text. + /// + /// HTML string with annotations. + /// ID of the annotation to remove. + /// CSS class prefix used for annotations (default: "ext-annot-"). + /// HTML string with the annotation removed. + public static string RemoveAnnotationFromHtml( + string html, + string annotationId, + string cssClassPrefix = "ext-annot-") + { + if (string.IsNullOrEmpty(html)) throw new ArgumentNullException(nameof(html)); + if (string.IsNullOrEmpty(annotationId)) throw new ArgumentNullException(nameof(annotationId)); + + var htmlDoc = XElement.Parse(html); + + // Find all spans with data-annotation-id matching + var annotationSpans = htmlDoc.Descendants("span") + .Where(e => (string?)e.Attribute("data-annotation-id") == annotationId) + .ToList(); + + foreach (var span in annotationSpans) + { + // Remove label child spans + var labelSpans = span.Elements("span") + .Where(e => + { + var cls = (string?)e.Attribute("class") ?? ""; + return cls.Contains($"{cssClassPrefix}label"); + }) + .ToList(); + + foreach (var labelSpan in labelSpans) + { + labelSpan.Remove(); + } + + // Replace the annotation span with its remaining content (unwrap) + var parent = span.Parent; + if (parent != null) + { + var nodes = span.Nodes().ToList(); + foreach (var node in nodes) + { + span.AddBeforeSelf(node); + } + span.Remove(); + } + } + + return htmlDoc.ToString(); + } + + /// + /// Generate CSS to hide annotations with specific label IDs. + /// This enables CSS-based label filtering without re-rendering. + /// + /// Label IDs to hide. + /// CSS class prefix (default: "ext-annot-"). + /// CSS string that hides the specified labels. + public static string GenerateVisibilityCss( + IEnumerable hiddenLabelIds, + string cssClassPrefix = "ext-annot-") + { + if (hiddenLabelIds == null) throw new ArgumentNullException(nameof(hiddenLabelIds)); + + var css = new StringBuilder(); + css.AppendLine("/* Annotation Visibility Overrides */"); + + foreach (var labelId in hiddenLabelIds) + { + var safeId = labelId.Replace(" ", "-").Replace(".", "-"); + // Hide the highlight styling but keep the text visible + css.AppendLine($".{cssClassPrefix}highlight[data-label-id=\"{safeId}\"] {{"); + css.AppendLine(" background-color: transparent !important;"); + css.AppendLine(" border-bottom: none !important;"); + css.AppendLine("}"); + // Hide the label text + css.AppendLine($".{cssClassPrefix}highlight[data-label-id=\"{safeId}\"] .{cssClassPrefix}label {{"); + css.AppendLine(" display: none !important;"); + css.AppendLine("}"); + } + + return css.ToString(); + } + + /// + /// Generate annotation CSS for a set of labels. + /// Useful when you need the CSS separately from the HTML (e.g., for incremental updates). + /// + /// Label definitions. + /// Projection settings. + /// CSS string for the given labels and settings. + public static string GenerateAnnotationCssString( + Dictionary labels, + ExternalAnnotationProjectionSettings? settings = null) + { + if (labels == null) throw new ArgumentNullException(nameof(labels)); + settings ??= new ExternalAnnotationProjectionSettings(); + return BuildAnnotationCssString(labels, settings); + } + + /// + /// Add CSS for a single annotation to existing HTML. + /// Used by AddAnnotationToHtml to inject per-label color classes. + /// + private static void AddSingleAnnotationCss( XElement html, + OpenContractsAnnotation annotation, + AnnotationLabel label, + ExternalAnnotationProjectionSettings settings) + { + var prefix = settings.CssClassPrefix; + var safeId = (annotation.AnnotationLabel ?? "").Replace(" ", "-").Replace(".", "-"); + + var css = new StringBuilder(); + css.AppendLine(); + css.AppendLine($"/* Annotation label: {safeId} */"); + css.AppendLine($".{prefix}label-{safeId} {{"); + css.AppendLine($" --annot-color: {label.Color};"); + css.AppendLine("}"); + + var head = html.Descendants() + .FirstOrDefault(e => e.Name.LocalName.Equals("head", StringComparison.OrdinalIgnoreCase)); + + if (head != null) + { + var style = new XElement("style", + new XAttribute("type", "text/css"), + new XText(css.ToString())); + head.Add(style); + } + } + + #endregion + + #region CSS Generation + + private static string BuildAnnotationCssString( Dictionary labels, ExternalAnnotationProjectionSettings settings) { @@ -421,6 +639,16 @@ private static void AddAnnotationCss( css.AppendLine("}"); } + return css.ToString(); + } + + private static void AddAnnotationCss( + XElement html, + Dictionary labels, + ExternalAnnotationProjectionSettings settings) + { + var css = BuildAnnotationCssString(labels, settings); + // Find or create head element var head = html.Descendants() .FirstOrDefault(e => e.Name.LocalName.Equals("head", StringComparison.OrdinalIgnoreCase)); @@ -429,7 +657,7 @@ private static void AddAnnotationCss( { var style = new XElement("style", new XAttribute("type", "text/css"), - new XText(css.ToString())); + new XText(css)); head.Add(style); } } diff --git a/npm/src/index.ts b/npm/src/index.ts index 30e8ac0..13a6b2a 100644 --- a/npm/src/index.ts +++ b/npm/src/index.ts @@ -1921,3 +1921,236 @@ function convertExternalAnnotationSet(parsed: any): ExternalAnnotationSet { }; } +// ============================================================================ +// Incremental Annotation Overlay API (Issue #106) +// ============================================================================ + +/** + * Project external annotations onto already-converted HTML. + * This avoids full DOCX re-conversion when only annotations change. + * + * Workflow: + * 1. Convert DOCX to HTML once using `convertDocxToHtml()` + * 2. Use this function to overlay annotations on the cached HTML + * 3. When annotations change, call this again with the same base HTML + * + * @param html - HTML string (previously converted via convertDocxToHtml) + * @param annotationSet - The external annotation set to project + * @param projectionOptions - Projection settings (CSS prefix, label mode, etc.) + * @returns HTML string with annotations projected + * @throws Error if projection fails + * + * @example + * ```typescript + * // Step 1: Convert once + * const baseHtml = await convertDocxToHtml(docxFile); + * + * // Step 2: Project annotations (fast, no DOCX re-conversion) + * const annotatedHtml = await projectAnnotationsOntoHtml(baseHtml, annotationSet); + * + * // Step 3: When annotations change, project again on the same base HTML + * annotationSet.labelledText.push(newAnnotation); + * const updatedHtml = await projectAnnotationsOntoHtml(baseHtml, annotationSet); + * ``` + */ +export async function projectAnnotationsOntoHtml( + html: string, + annotationSet: ExternalAnnotationSet, + projectionOptions?: ExternalAnnotationProjectionSettings +): Promise { + const exports = ensureInitialized(); + + await yieldToMain(); + + const annotationSetJson = JSON.stringify(annotationSet); + const result = exports.DocumentConverter.ProjectAnnotationsOntoHtml( + html, + annotationSetJson, + projectionOptions?.cssClassPrefix ?? "ext-annot-", + projectionOptions?.labelMode ?? AnnotationLabelMode.Above + ); + + if (isErrorResponse(result)) { + const error = parseError(result); + throw new Error(`Failed to project annotations: ${error.error}`); + } + + const parsed = JSON.parse(result); + return parsed.Html ?? parsed.html; +} + +/** + * Add a single annotation to existing HTML without re-converting the document. + * This is the fastest way to add one annotation to already-rendered HTML. + * + * @param html - HTML string (with or without existing annotations) + * @param annotation - The annotation to add + * @param label - Label definition for the annotation (optional, for color/text) + * @param projectionOptions - Projection settings + * @returns HTML string with the annotation added + * @throws Error if operation fails + * + * @example + * ```typescript + * const annotation = createAnnotation("ann-new", "CLAUSE", set.content, 100, 150); + * const label = { id: "CLAUSE", text: "Clause", color: "#FF5722" }; + * const updatedHtml = await addAnnotationToHtml(currentHtml, annotation, label); + * ``` + */ +export async function addAnnotationToHtml( + html: string, + annotation: OpenContractsAnnotation, + label?: AnnotationLabel, + projectionOptions?: ExternalAnnotationProjectionSettings +): Promise { + const exports = ensureInitialized(); + + await yieldToMain(); + + const annotationJson = JSON.stringify(annotation); + const labelJson = label ? JSON.stringify(label) : ""; + const result = exports.DocumentConverter.AddAnnotationToHtml( + html, + annotationJson, + labelJson, + projectionOptions?.cssClassPrefix ?? "ext-annot-", + projectionOptions?.labelMode ?? AnnotationLabelMode.Above + ); + + if (isErrorResponse(result)) { + const error = parseError(result); + throw new Error(`Failed to add annotation to HTML: ${error.error}`); + } + + const parsed = JSON.parse(result); + return parsed.Html ?? parsed.html; +} + +/** + * Remove a single annotation from HTML by annotation ID. + * Unwraps annotation spans back to plain text. + * + * @param html - HTML string with annotations + * @param annotationId - ID of the annotation to remove + * @param cssClassPrefix - CSS class prefix used for annotations (default: "ext-annot-") + * @returns HTML string with the annotation removed + * @throws Error if operation fails + * + * @example + * ```typescript + * const updatedHtml = await removeAnnotationFromHtml(currentHtml, "ann-001"); + * ``` + */ +export async function removeAnnotationFromHtml( + html: string, + annotationId: string, + cssClassPrefix?: string +): Promise { + const exports = ensureInitialized(); + + await yieldToMain(); + + const result = exports.DocumentConverter.RemoveAnnotationFromHtml( + html, + annotationId, + cssClassPrefix ?? "ext-annot-" + ); + + if (isErrorResponse(result)) { + const error = parseError(result); + throw new Error(`Failed to remove annotation from HTML: ${error.error}`); + } + + const parsed = JSON.parse(result); + return parsed.Html ?? parsed.html; +} + +/** + * Generate CSS to hide annotations with specific label IDs. + * Enables CSS-based label filtering without re-rendering HTML. + * + * Apply the returned CSS to your document (e.g., via a `