diff --git a/CHANGELOG.md b/CHANGELOG.md index 5183b10..ba9c5c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,10 @@ All notable changes to this project will be documented in this file. - CSS-based label filtering enables responsive toggle without any re-rendering ### Fixed +- **Paginated rendering: text clipped at page bottom + inconsistent paragraph spacing (Issue #114)** + - Fixed `lineRule` default handling: when `w:lineRule` is absent but `w:line` is present, treat as "auto" per OOXML spec (ISO/IEC 29500). Previously the line value was ignored, causing accumulated line-height mismatches that clipped the last line on pages. + - Fixed `contextualSpacing` handling: now suppresses both `spacingAfter` (margin-bottom) AND `spacingBefore` (margin-top) for consecutive same-style paragraphs. Previously only `spacingAfter` was suppressed, leaving inconsistent inter-paragraph gaps. + - Fixed pagination engine bottom margin over-reservation: the last block's bottom margin is no longer counted against page space since it's invisible (clipped by `overflow: hidden`). This prevents premature page breaks where content would have been visible. - **Annotation projection fails on sanitized HTML (Issue #110)** - `ProjectAnnotationsOntoHtml`, `AddAnnotationToHtml`, and `RemoveAnnotationFromHtml` now handle HTML fragments with multiple root elements (e.g., DOMPurify-sanitized output) and HTML named entities (` `, `–`, etc.) - Root cause: `XElement.Parse()` requires valid XML with a single root element; sanitized HTML strips ``/`` wrappers leaving multiple roots - Fix: Auto-wraps multi-root HTML in a synthetic container for parsing, unwraps on serialization; replaces common HTML entities with numeric XML equivalents diff --git a/Docxodus/WmlToHtmlConverter.cs b/Docxodus/WmlToHtmlConverter.cs index 3eff9fa..634bd70 100644 --- a/Docxodus/WmlToHtmlConverter.cs +++ b/Docxodus/WmlToHtmlConverter.cs @@ -2363,7 +2363,8 @@ private static string GetUnsupportedContentTooltip(UnsupportedContentType conten private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc, WmlToHtmlConverterSettings settings, XNode node, bool suppressTrailingWhiteSpace, - decimal currentMarginLeft) + decimal currentMarginLeft, + bool suppressLeadingWhiteSpace = false) { var element = node as XElement; if (element == null) return null; @@ -2489,7 +2490,7 @@ private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc, // have a style separator). if (element.Name == W.p) { - return ProcessParagraph(wordDoc, settings, element, suppressTrailingWhiteSpace, currentMarginLeft); + return ProcessParagraph(wordDoc, settings, element, suppressTrailingWhiteSpace, currentMarginLeft, suppressLeadingWhiteSpace); } // Transform hyperlinks to the XHTML h:a element. @@ -4276,7 +4277,7 @@ private static object ProcessContentControl(WordprocessingDocument wordDoc, WmlT // the element (e.g., h:h2) created from the w:p element having the (first) // style separator (i.e., a w:specVanish element). private static object ProcessParagraph(WordprocessingDocument wordDoc, WmlToHtmlConverterSettings settings, - XElement element, bool suppressTrailingWhiteSpace, decimal currentMarginLeft) + XElement element, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool suppressLeadingWhiteSpace = false) { // Ignore this paragraph if the previous paragraph has a style separator. // We have already transformed this one together with the previous one. @@ -4286,7 +4287,7 @@ private static object ProcessParagraph(WordprocessingDocument wordDoc, WmlToHtml var elementName = GetParagraphElementName(element, wordDoc); var isBidi = IsBidi(element); var paragraph = (XElement) ConvertParagraph(wordDoc, settings, element, elementName, - suppressTrailingWhiteSpace, currentMarginLeft, isBidi); + suppressTrailingWhiteSpace, currentMarginLeft, isBidi, suppressLeadingWhiteSpace); // The paragraph conversion might have created empty spans. // These can and should be removed because empty spans are @@ -4955,7 +4956,7 @@ private enum BorderType * - autoSpaceDE * - autoSpaceDN * - bidi - * - contextualSpacing + * - contextualSpacing (handled via GroupAndVerticallySpaceNumberedParagraphs) * - divId * - framePr * - keepLines @@ -4978,9 +4979,10 @@ private enum BorderType */ private static object ConvertParagraph(WordprocessingDocument wordDoc, WmlToHtmlConverterSettings settings, - XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi) + XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi, + bool suppressLeadingWhiteSpace = false) { - var style = DefineParagraphStyle(paragraph, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi); + var style = DefineParagraphStyle(paragraph, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi, suppressLeadingWhiteSpace); var rtl = isBidi ? new XAttribute("dir", "rtl") : new XAttribute("dir", "ltr"); var firstMark = isBidi ? new XEntity("#x200f") : null; @@ -5113,7 +5115,7 @@ private static List TransformElementsPrecedingTab(WordprocessingDocument } private static Dictionary DefineParagraphStyle(XElement paragraph, XName elementName, - bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi) + bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi, bool suppressLeadingWhiteSpace = false) { var style = new Dictionary(); @@ -5124,7 +5126,7 @@ private static Dictionary DefineParagraphStyle(XElement paragrap var pPr = paragraph.Element(W.pPr); if (pPr == null) return style; - CreateStyleFromSpacing(style, pPr.Element(W.spacing), elementName, suppressTrailingWhiteSpace); + CreateStyleFromSpacing(style, pPr.Element(W.spacing), elementName, suppressTrailingWhiteSpace, suppressLeadingWhiteSpace); CreateStyleFromInd(style, pPr.Element(W.ind), elementName, currentMarginLeft, isBidi); // todo need to handle @@ -5217,18 +5219,19 @@ private static void CreateStyleFromJc(Dictionary style, XElement } private static void CreateStyleFromSpacing(Dictionary style, XElement spacing, XName elementName, - bool suppressTrailingWhiteSpace) + bool suppressTrailingWhiteSpace, bool suppressLeadingWhiteSpace = false) { if (spacing == null) return; - var spacingBefore = (decimal?) spacing.Attribute(W.before); + var spacingBefore = suppressLeadingWhiteSpace ? 0 : (decimal?) spacing.Attribute(W.before); if (spacingBefore != null && elementName != Xhtml.span) style.AddIfMissing("margin-top", spacingBefore > 0m ? string.Format(NumberFormatInfo.InvariantInfo, "{0}pt", spacingBefore/20.0m) : "0"); - var lineRule = (string) spacing.Attribute(W.lineRule); + // Per OOXML spec (ISO/IEC 29500), when lineRule is absent the default is "auto" + var lineRule = (string) spacing.Attribute(W.lineRule) ?? (spacing.Attribute(W.line) != null ? "auto" : null); if (lineRule == "auto") { var line = (decimal) spacing.Attribute(W.line); @@ -7189,7 +7192,13 @@ private static IEnumerable GroupAndVerticallySpaceNumberedParagraphs(Wor if (g.Key == "") return g.Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft)); var last = g.Count() - 1; - return g.Select((e, i) => ConvertToHtmlTransform(wordDoc, settings, e, i != last, currentMarginLeft)); + // For contextualSpacing groups (sty: prefix), suppress both trailing whitespace + // for non-last paragraphs AND leading whitespace for non-first paragraphs. + // Word removes all inter-paragraph spacing for same-style contextualSpacing paragraphs. + var isContextualGroup = g.Key.StartsWith("sty:"); + return g.Select((e, i) => ConvertToHtmlTransform(wordDoc, settings, e, + i != last, currentMarginLeft, + suppressLeadingWhiteSpace: isContextualGroup && i != 0)); }); return (IEnumerable)newContent; } diff --git a/npm/src/pagination.ts b/npm/src/pagination.ts index 2234a5e..84382d8 100644 --- a/npm/src/pagination.ts +++ b/npm/src/pagination.ts @@ -1024,8 +1024,11 @@ export class PaginationEngine { // Margin collapsing: use the larger of the two adjacent margins effectiveMarginTop = Math.max(block.marginTopPt, prevMarginBottomPt) - prevMarginBottomPt; } - // Total height = top margin gap + content + bottom margin + footnote space - const blockSpace = effectiveMarginTop + block.heightPt + block.marginBottomPt + additionalFootnoteHeight; + // Visible height = top margin gap + content + footnote space + // Note: bottom margin is NOT included in the fit check because the last block's + // bottom margin extends beyond the content area and is clipped by overflow:hidden. + // It is still tracked in remainingHeight for correct margin collapsing with the next block. + const blockSpace = effectiveMarginTop + block.heightPt + additionalFootnoteHeight; // Calculate needed height (including keepWithNext) let neededHeight = blockSpace; @@ -1033,7 +1036,7 @@ export class PaginationEngine { // For keepWithNext, include the next block with collapsed margins const collapsedMargin = Math.max(block.marginBottomPt, nextBlock.marginTopPt); neededHeight = effectiveMarginTop + block.heightPt + collapsedMargin + - nextBlock.heightPt + nextBlock.marginBottomPt + additionalFootnoteHeight; + nextBlock.heightPt + additionalFootnoteHeight; } // Effective remaining height (content area minus footnotes already on page) @@ -1055,9 +1058,9 @@ export class PaginationEngine { currentFootnoteIds.push(...newFootnoteIds); currentFootnoteHeight += additionalFootnoteHeight; } - } else if (block.heightPt + block.marginTopPt + block.marginBottomPt <= effectiveContentHeight) { + } else if (block.heightPt + block.marginTopPt <= effectiveContentHeight) { // Block doesn't fit with current allocation - try expanding footnote area - const blockSpaceWithoutFootnotes = effectiveMarginTop + block.heightPt + block.marginBottomPt; + const blockSpaceWithoutFootnotes = effectiveMarginTop + block.heightPt; // Check if block fits if we expand footnote area // We can expand footnotes up to maxFootnoteArea, leaving room for body content