From 07fae6da38ff655044896a9358f384d23f8e789b Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Mon, 11 Nov 2024 15:32:23 +0100 Subject: [PATCH] Extend support of nested list for non-W3C compliant html #173 --- CHANGELOG.md | 1 + .../Expressions/Numbering/ListExpression.cs | 13 +++++- test/HtmlToOpenXml.Tests/NumberingTests.cs | 42 ++++++++++++++++++- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4edec4..a6003e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Support deprecrated align attribute for block #171 - Fix parsing of style attribute with a key with no value - Improve parsing of style attribute to avoid an extra call to HtmlDecode +- Extend support of nested list for non-W3C compliant html #173 ## 3.2.1 diff --git a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs index 811976c..ef7fa22 100644 --- a/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs +++ b/src/Html2OpenXml/Expressions/Numbering/ListExpression.cs @@ -50,9 +50,20 @@ readonly struct ListContext(string listName, int absNumId, int instanceId, int l public override IEnumerable Interpret(ParsingContext context) { - var liNodes = node.Children.Where(n => n.LocalName == "li"); + var liNodes = node.Children.Where(n => n.LocalName.Equals("li", StringComparison.OrdinalIgnoreCase)); if (!liNodes.Any()) yield break; + // W3C requires that nested list stands below a `li` element but some editors + // don't care to respect the standard. Let's reparent those lists + var nestedList = node.Children.Where(n => + n.LocalName.Equals("ol", StringComparison.OrdinalIgnoreCase) || + n.LocalName.Equals("ul", StringComparison.OrdinalIgnoreCase)); + if (nestedList.Any()) + { + foreach (var list in nestedList) + list.PreviousElementSibling?.AppendChild(list); + } + var listContext = context.Properties("listContext"); var parentContext = listContext; var listStyle = GetListName(node, listContext.Name); diff --git a/test/HtmlToOpenXml.Tests/NumberingTests.cs b/test/HtmlToOpenXml.Tests/NumberingTests.cs index a7b914e..17b75de 100644 --- a/test/HtmlToOpenXml.Tests/NumberingTests.cs +++ b/test/HtmlToOpenXml.Tests/NumberingTests.cs @@ -514,7 +514,7 @@ public void WithRtl_ReturnsBidi(string dir, bool? expectedValue) } [Test] - public void NestedNumberList_ReturnsIncrementalIdentation() + public void NestedNumberList_ReturnsIncrementalIndentation() { const int maxLevel = 8; var sb = new System.Text.StringBuilder(); @@ -541,5 +541,45 @@ public void NestedNumberList_ReturnsIncrementalIdentation() TestContext.Out.WriteLine($"{i}. {ident?.Left?.Value}"); } } + + [Test(Description = "Nested list must be a children of a `li` tag but some editor are not respecting the W3C standard (issue #173)")] + public async Task NestedNumberList_NonCompliant_ReturnsIncrementalIndentation() + { + await converter.ParseBody(@"
    +
  1. Item1
  2. +
  3. Item2
  4. +
    1. Item 2.1
    +
"); + + var absNum = mainPart.NumberingDefinitionsPart?.Numbering + .Elements() + .SingleOrDefault(); + Assert.That(absNum, Is.Not.Null); + + var inst = mainPart.NumberingDefinitionsPart?.Numbering + .Elements().Where(i => i.AbstractNumId?.Val == absNum.AbstractNumberId) + .SingleOrDefault(); + Assert.That(inst, Is.Not.Null); + Assert.That(inst.NumberID?.Value, Is.Not.Null); + + var elements = mainPart.Document.Body!.ChildElements; + Assert.Multiple(() => { + Assert.That(elements, Has.Count.EqualTo(3)); + Assert.That(elements, Is.All.TypeOf()); + Assert.That(mainPart.NumberingDefinitionsPart?.Numbering, Is.Not.Null); + }); + + // assert paragraphs linked to numbering instance + Assert.Multiple(() => + { + Assert.That(elements.Cast().Select(e => + e.ParagraphProperties?.NumberingProperties?.NumberingId?.Val?.Value), + Has.All.EqualTo(inst.NumberID.Value), + "All paragraphs are linked to the same list instance"); + Assert.That(elements.Take(2).Select(p => p.GetFirstChild()?.NumberingProperties?.NumberingLevelReference?.Val?.Value), Has.All.EqualTo(0)); + Assert.That(elements.Last().GetFirstChild()?.NumberingProperties?.NumberingLevelReference?.Val?.Value, Is.EqualTo(1)); + }); + AssertThatOpenXmlDocumentIsValid(); + } } } \ No newline at end of file