Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ All notable changes to this project will be documented in this file.
- CSS-based label filtering enables responsive toggle without any re-rendering

### Fixed
- **Paginated rendering: text clipped at page bottom + inconsistent paragraph spacing (Issue #114)**
- Fixed `lineRule` default handling: when `w:lineRule` is absent but `w:line` is present, treat as "auto" per OOXML spec (ISO/IEC 29500). Previously the line value was ignored, causing accumulated line-height mismatches that clipped the last line on pages.
- Fixed `contextualSpacing` handling: now suppresses both `spacingAfter` (margin-bottom) AND `spacingBefore` (margin-top) for consecutive same-style paragraphs. Previously only `spacingAfter` was suppressed, leaving inconsistent inter-paragraph gaps.
- Fixed pagination engine bottom margin over-reservation: the last block's bottom margin is no longer counted against page space since it's invisible (clipped by `overflow: hidden`). This prevents premature page breaks where content would have been visible.
- **Annotation projection fails on sanitized HTML (Issue #110)** - `ProjectAnnotationsOntoHtml`, `AddAnnotationToHtml`, and `RemoveAnnotationFromHtml` now handle HTML fragments with multiple root elements (e.g., DOMPurify-sanitized output) and HTML named entities (` `, `–`, etc.)
- Root cause: `XElement.Parse()` requires valid XML with a single root element; sanitized HTML strips `<html>`/`<body>` wrappers leaving multiple roots
- Fix: Auto-wraps multi-root HTML in a synthetic container for parsing, unwraps on serialization; replaces common HTML entities with numeric XML equivalents
Expand Down
35 changes: 22 additions & 13 deletions Docxodus/WmlToHtmlConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2363,7 +2363,8 @@ private static string GetUnsupportedContentTooltip(UnsupportedContentType conten
private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
WmlToHtmlConverterSettings settings, XNode node,
bool suppressTrailingWhiteSpace,
decimal currentMarginLeft)
decimal currentMarginLeft,
bool suppressLeadingWhiteSpace = false)
{
var element = node as XElement;
if (element == null) return null;
Expand Down Expand Up @@ -2489,7 +2490,7 @@ private static object ConvertToHtmlTransform(WordprocessingDocument wordDoc,
// have a style separator).
if (element.Name == W.p)
{
return ProcessParagraph(wordDoc, settings, element, suppressTrailingWhiteSpace, currentMarginLeft);
return ProcessParagraph(wordDoc, settings, element, suppressTrailingWhiteSpace, currentMarginLeft, suppressLeadingWhiteSpace);
}

// Transform hyperlinks to the XHTML h:a element.
Expand Down Expand Up @@ -4276,7 +4277,7 @@ private static object ProcessContentControl(WordprocessingDocument wordDoc, WmlT
// the element (e.g., h:h2) created from the w:p element having the (first)
// style separator (i.e., a w:specVanish element).
private static object ProcessParagraph(WordprocessingDocument wordDoc, WmlToHtmlConverterSettings settings,
XElement element, bool suppressTrailingWhiteSpace, decimal currentMarginLeft)
XElement element, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool suppressLeadingWhiteSpace = false)
{
// Ignore this paragraph if the previous paragraph has a style separator.
// We have already transformed this one together with the previous one.
Expand All @@ -4286,7 +4287,7 @@ private static object ProcessParagraph(WordprocessingDocument wordDoc, WmlToHtml
var elementName = GetParagraphElementName(element, wordDoc);
var isBidi = IsBidi(element);
var paragraph = (XElement) ConvertParagraph(wordDoc, settings, element, elementName,
suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
suppressTrailingWhiteSpace, currentMarginLeft, isBidi, suppressLeadingWhiteSpace);

// The paragraph conversion might have created empty spans.
// These can and should be removed because empty spans are
Expand Down Expand Up @@ -4955,7 +4956,7 @@ private enum BorderType
* - autoSpaceDE
* - autoSpaceDN
* - bidi
* - contextualSpacing
* - contextualSpacing (handled via GroupAndVerticallySpaceNumberedParagraphs)
* - divId
* - framePr
* - keepLines
Expand All @@ -4978,9 +4979,10 @@ private enum BorderType
*/

private static object ConvertParagraph(WordprocessingDocument wordDoc, WmlToHtmlConverterSettings settings,
XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi)
XElement paragraph, XName elementName, bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi,
bool suppressLeadingWhiteSpace = false)
{
var style = DefineParagraphStyle(paragraph, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi);
var style = DefineParagraphStyle(paragraph, elementName, suppressTrailingWhiteSpace, currentMarginLeft, isBidi, suppressLeadingWhiteSpace);
var rtl = isBidi ? new XAttribute("dir", "rtl") : new XAttribute("dir", "ltr");
var firstMark = isBidi ? new XEntity("#x200f") : null;

Expand Down Expand Up @@ -5113,7 +5115,7 @@ private static List<object> TransformElementsPrecedingTab(WordprocessingDocument
}

private static Dictionary<string, string> DefineParagraphStyle(XElement paragraph, XName elementName,
bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi)
bool suppressTrailingWhiteSpace, decimal currentMarginLeft, bool isBidi, bool suppressLeadingWhiteSpace = false)
{
var style = new Dictionary<string, string>();

Expand All @@ -5124,7 +5126,7 @@ private static Dictionary<string, string> DefineParagraphStyle(XElement paragrap
var pPr = paragraph.Element(W.pPr);
if (pPr == null) return style;

CreateStyleFromSpacing(style, pPr.Element(W.spacing), elementName, suppressTrailingWhiteSpace);
CreateStyleFromSpacing(style, pPr.Element(W.spacing), elementName, suppressTrailingWhiteSpace, suppressLeadingWhiteSpace);
CreateStyleFromInd(style, pPr.Element(W.ind), elementName, currentMarginLeft, isBidi);

// todo need to handle
Expand Down Expand Up @@ -5217,18 +5219,19 @@ private static void CreateStyleFromJc(Dictionary<string, string> style, XElement
}

private static void CreateStyleFromSpacing(Dictionary<string, string> style, XElement spacing, XName elementName,
bool suppressTrailingWhiteSpace)
bool suppressTrailingWhiteSpace, bool suppressLeadingWhiteSpace = false)
{
if (spacing == null) return;

var spacingBefore = (decimal?) spacing.Attribute(W.before);
var spacingBefore = suppressLeadingWhiteSpace ? 0 : (decimal?) spacing.Attribute(W.before);
if (spacingBefore != null && elementName != Xhtml.span)
style.AddIfMissing("margin-top",
spacingBefore > 0m
? string.Format(NumberFormatInfo.InvariantInfo, "{0}pt", spacingBefore/20.0m)
: "0");

var lineRule = (string) spacing.Attribute(W.lineRule);
// Per OOXML spec (ISO/IEC 29500), when lineRule is absent the default is "auto"
var lineRule = (string) spacing.Attribute(W.lineRule) ?? (spacing.Attribute(W.line) != null ? "auto" : null);
if (lineRule == "auto")
{
var line = (decimal) spacing.Attribute(W.line);
Expand Down Expand Up @@ -7189,7 +7192,13 @@ private static IEnumerable<object> GroupAndVerticallySpaceNumberedParagraphs(Wor
if (g.Key == "")
return g.Select(e => ConvertToHtmlTransform(wordDoc, settings, e, false, currentMarginLeft));
var last = g.Count() - 1;
return g.Select((e, i) => ConvertToHtmlTransform(wordDoc, settings, e, i != last, currentMarginLeft));
// For contextualSpacing groups (sty: prefix), suppress both trailing whitespace
// for non-last paragraphs AND leading whitespace for non-first paragraphs.
// Word removes all inter-paragraph spacing for same-style contextualSpacing paragraphs.
var isContextualGroup = g.Key.StartsWith("sty:");
return g.Select((e, i) => ConvertToHtmlTransform(wordDoc, settings, e,
i != last, currentMarginLeft,
suppressLeadingWhiteSpace: isContextualGroup && i != 0));
});
return (IEnumerable<object>)newContent;
}
Expand Down
13 changes: 8 additions & 5 deletions npm/src/pagination.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1024,16 +1024,19 @@ export class PaginationEngine {
// Margin collapsing: use the larger of the two adjacent margins
effectiveMarginTop = Math.max(block.marginTopPt, prevMarginBottomPt) - prevMarginBottomPt;
}
// Total height = top margin gap + content + bottom margin + footnote space
const blockSpace = effectiveMarginTop + block.heightPt + block.marginBottomPt + additionalFootnoteHeight;
// Visible height = top margin gap + content + footnote space
// Note: bottom margin is NOT included in the fit check because the last block's
// bottom margin extends beyond the content area and is clipped by overflow:hidden.
// It is still tracked in remainingHeight for correct margin collapsing with the next block.
const blockSpace = effectiveMarginTop + block.heightPt + additionalFootnoteHeight;

// Calculate needed height (including keepWithNext)
let neededHeight = blockSpace;
if (block.keepWithNext && nextBlock && !nextBlock.isPageBreak) {
// For keepWithNext, include the next block with collapsed margins
const collapsedMargin = Math.max(block.marginBottomPt, nextBlock.marginTopPt);
neededHeight = effectiveMarginTop + block.heightPt + collapsedMargin +
nextBlock.heightPt + nextBlock.marginBottomPt + additionalFootnoteHeight;
nextBlock.heightPt + additionalFootnoteHeight;
}

// Effective remaining height (content area minus footnotes already on page)
Expand All @@ -1055,9 +1058,9 @@ export class PaginationEngine {
currentFootnoteIds.push(...newFootnoteIds);
currentFootnoteHeight += additionalFootnoteHeight;
}
} else if (block.heightPt + block.marginTopPt + block.marginBottomPt <= effectiveContentHeight) {
} else if (block.heightPt + block.marginTopPt <= effectiveContentHeight) {
// Block doesn't fit with current allocation - try expanding footnote area
const blockSpaceWithoutFootnotes = effectiveMarginTop + block.heightPt + block.marginBottomPt;
const blockSpaceWithoutFootnotes = effectiveMarginTop + block.heightPt;

// Check if block fits if we expand footnote area
// We can expand footnotes up to maxFootnoteArea, leaving room for body content
Expand Down
Loading