Skip to content

Commit

Permalink
Ensure to apply default style for paragraphs, to avoid a paragraph be…
Browse files Browse the repository at this point in the history
…tween 2 list is mis-guessed
  • Loading branch information
onizet committed Oct 12, 2024
1 parent ba251d3 commit 092980d
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 17 deletions.
14 changes: 12 additions & 2 deletions src/Html2OpenXml/Expressions/BlockElementExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,21 @@ namespace HtmlToOpenXml.Expressions;
/// Process the parsing of block contents (like <c>p</c>, <c>span</c>, <c>heading</c>).
/// A block-level element always starts on a new line, and the browsers automatically add some space (a margin) before and after the element.
/// </summary>
class BlockElementExpression(IHtmlElement node, params OpenXmlLeafElement[]? styleProperty) : PhrasingElementExpression(node)
class BlockElementExpression: PhrasingElementExpression
{
private readonly OpenXmlLeafElement[]? defaultStyleProperties = styleProperty;
private readonly OpenXmlLeafElement[]? defaultStyleProperties;
protected readonly ParagraphProperties paraProperties = new();

public BlockElementExpression(IHtmlElement node, OpenXmlLeafElement? styleProperty) : base(node)
{
if (styleProperty is not null)
defaultStyleProperties = [styleProperty];
}
public BlockElementExpression(IHtmlElement node, params OpenXmlLeafElement[]? styleProperty) : base(node)
{
defaultStyleProperties = styleProperty;
}


/// <inheritdoc/>
public override IEnumerable<OpenXmlElement> Interpret (ParsingContext context)
Expand Down
3 changes: 2 additions & 1 deletion src/Html2OpenXml/Expressions/BodyExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ namespace HtmlToOpenXml.Expressions;
/// Top parent expression, processing the <c>body</c> tag,
/// even if it is not directly specified in the provided Html.
/// </summary>
sealed class BodyExpression(IHtmlElement node) : BlockElementExpression(node)
sealed class BodyExpression(IHtmlElement node, ParagraphStyleId? defaultStyle)
: BlockElementExpression(node, defaultStyle)
{
private bool shouldRegisterTopBookmark;

Expand Down
15 changes: 6 additions & 9 deletions src/Html2OpenXml/HtmlConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,7 @@ public async Task ParseHeader(string html, HeaderFooterValues? headerType = null
new ParallelOptions() { CancellationToken = cancellationToken },
htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.HeaderStyle));

foreach (var p in paragraphs)
headerPart.Header.AddChild(p);
headerPart.Header.Append(paragraphs);
}

/// <summary>
Expand All @@ -152,8 +151,7 @@ public async Task ParseFooter(string html, HeaderFooterValues? footerType = null
new ParallelOptions() { CancellationToken = cancellationToken },
htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.FooterStyle));

foreach (var p in paragraphs)
footerPart.Footer.AddChild(p);
footerPart.Footer.Append(paragraphs);
}

/// <summary>
Expand All @@ -166,7 +164,8 @@ public async Task ParseBody(string html, CancellationToken cancellationToken = d
{
bodyImageLoader ??= new ImagePrefetcher<MainDocumentPart>(mainPart, webRequester);
var paragraphs = await ParseCoreAsync(html, mainPart, bodyImageLoader,
new ParallelOptions() { CancellationToken = cancellationToken });
new ParallelOptions() { CancellationToken = cancellationToken },
htmlStyles.GetParagraphStyle(htmlStyles.DefaultStyles.Paragraph));

if (!paragraphs.Any())
return;
Expand Down Expand Up @@ -263,11 +262,9 @@ private async Task<IEnumerable<OpenXmlCompositeElement>> ParseCoreAsync(string h

Expressions.HtmlDomExpression expression;
if (hostingPart is MainDocumentPart)
expression = new Expressions.BodyExpression(htmlDocument.Body!);
else if (defaultParagraphStyleId?.Val?.HasValue == true)
expression = new Expressions.BlockElementExpression(htmlDocument.Body!, defaultParagraphStyleId);
expression = new Expressions.BodyExpression(htmlDocument.Body!, defaultParagraphStyleId);
else
expression = new Expressions.BlockElementExpression(htmlDocument.Body!);
expression = new Expressions.BlockElementExpression(htmlDocument.Body!, defaultParagraphStyleId);

var parsingContext = new ParsingContext(this, hostingPart, imageLoader);
var paragraphs = expression.Interpret(parsingContext);
Expand Down
1 change: 1 addition & 0 deletions src/Html2OpenXml/PredefinedStyles.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ internal class PredefinedStyles
public const string TableGrid = "TableGrid";
public const string Header = "Header";
public const string Footer = "Footer";
public const string Paragraph = "Normal";



Expand Down
6 changes: 6 additions & 0 deletions src/Html2OpenXml/Primitives/DefaultStyles.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,10 @@ public class DefaultStyles
/// </summary>
/// <value>Footer</value>
public string FooterStyle { get; set; } = PredefinedStyles.Footer;

/// <summary>
/// Default style for body paragraph.
/// </summary>
/// <value>Normal</value>
public string Paragraph { get; set; } = PredefinedStyles.Paragraph;
}
3 changes: 2 additions & 1 deletion src/Html2OpenXml/WordDocumentStyle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ internal WordDocumentStyle(MainDocumentPart mainPart)
PredefinedStyles.ListParagraph,
PredefinedStyles.Quote,
PredefinedStyles.QuoteChar,
PredefinedStyles.TableGrid
PredefinedStyles.TableGrid,
PredefinedStyles.Paragraph
];
this.mainPart = mainPart;
}
Expand Down
12 changes: 8 additions & 4 deletions test/HtmlToOpenXml.Tests/BodyTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ public class BodyTests : HtmlConverterTestBase
{
[TestCase("landscape", ExpectedResult = true)]
[TestCase("portrait", ExpectedResult = false)]
public bool PageOrientation_ReturnsLandscapeDimension(string orientation)
public async Task<bool> PageOrientation_ReturnsLandscapeDimension(string orientation)
{
var _ = converter.Parse($@"<body style=""page-orientation:{orientation}""><body>");
await converter.ParseBody($@"<body style=""page-orientation:{orientation}""><body>");
AssertThatOpenXmlDocumentIsValid();

var sectionProperties = mainPart.Document.Body!.GetFirstChild<SectionProperties>();
Assert.That(sectionProperties, Is.Not.Null);
var pageSize = sectionProperties.GetFirstChild<PageSize>();
Expand All @@ -24,7 +26,7 @@ public bool PageOrientation_ReturnsLandscapeDimension(string orientation)

[TestCase("portrait", ExpectedResult = true)]
[TestCase("landscape", ExpectedResult = false)]
public bool PageOrientation_OverrideExistingLayout_ReturnsLandscapeDimension(string orientation)
public async Task<bool> PageOrientation_OverrideExistingLayout_ReturnsLandscapeDimension(string orientation)
{
using var generatedDocument = new MemoryStream();
using (var buffer = ResourceHelper.GetStream("Resources.DocWithLandscape.docx"))
Expand All @@ -35,7 +37,9 @@ public bool PageOrientation_OverrideExistingLayout_ReturnsLandscapeDimension(str
MainDocumentPart mainPart = package.MainDocumentPart!;
HtmlConverter converter = new(mainPart);

var _ = converter.Parse($@"<body style=""page-orientation:{orientation}""><body>");
await converter.ParseBody($@"<body style=""page-orientation:{orientation}""><body>");
AssertThatOpenXmlDocumentIsValid();

var sectionProperties = mainPart.Document.Body!.GetFirstChild<SectionProperties>();
Assert.That(sectionProperties, Is.Not.Null);
var pageSize = sectionProperties.GetFirstChild<PageSize>();
Expand Down
43 changes: 43 additions & 0 deletions test/HtmlToOpenXml.Tests/HeaderFooterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,5 +109,48 @@ public async Task WithExistingHeader_Even_ReturnsAnotherHeaderPart()
});
AssertThatOpenXmlDocumentIsValid();
}

[Test]
public async Task Header_ReturnsStyleParagraphs()
{
await converter.ParseHeader(@"
<header>
<p>Placeholder
<nav>
<ul><li>Home</li><li>Contact</li></ul>
</nav>
</p>
</header>
");

var header = mainPart.HeaderParts.FirstOrDefault()?.Header;
Assert.That(header, Is.Not.Null);
var paragraphs = header.Elements<Paragraph>();
Assert.That(paragraphs.Count(), Is.EqualTo(3));
Assert.That(paragraphs.First().ParagraphProperties?.ParagraphStyleId?.Val?.Value,
Is.EqualTo(converter.HtmlStyles.DefaultStyles.HeaderStyle));
Assert.That(paragraphs.Skip(1).Select(p => p.ParagraphProperties?.ParagraphStyleId?.Val?.Value),
Has.All.EqualTo(converter.HtmlStyles.DefaultStyles.ListParagraphStyle));
}

[Test]
public async Task Footer_ReturnsStyleParagraphs()
{
await converter.ParseFooter(@"
<footer>
<p>
<a rel=""license"" href=""https://creativecommons.org/licenses/by/4.0/"">Copyrighted but you can use what's here as long as you credit me</a>
<small>&copy; Copyright 2058, Company Inc.</small>
</p>
</footer>
");

var footer = mainPart.FooterParts.FirstOrDefault()?.Footer;
Assert.That(footer, Is.Not.Null);
var paragraphs = footer.Elements<Paragraph>();
Assert.That(paragraphs.Count(), Is.EqualTo(2));
Assert.That(paragraphs.Select(p => p.ParagraphProperties?.ParagraphStyleId?.Val?.Value),
Has.All.EqualTo(converter.HtmlStyles.DefaultStyles.FooterStyle));
}
}
}

0 comments on commit 092980d

Please sign in to comment.