From 4a2d955f4d4f1f6d855756f9e4fd4a26cab8ff75 Mon Sep 17 00:00:00 2001 From: Olivier Nizet Date: Mon, 23 Sep 2024 14:36:21 +0200 Subject: [PATCH] SVG tag support (#164) * Support of `svg` tag element * Detect Xml/Svg image and read for its preferred size * Rationalise object declaration * Support of `svg` tag element * Detect Xml/Svg image and read for its preferred size * Rationalise object declaration * Fix after rebase * Minor fixup * Improve testing * Handle SVG image in an unified code --- examples/Demo/Program.cs | 7 +- .../Expressions/HtmlDomExpression.cs | 1 + .../Expressions/Image/ImageExpression.cs | 18 ++- .../Expressions/Image/SvgExpression.cs | 105 ++++++++++++++++++ src/Html2OpenXml/IO/ImageHeader.cs | 31 +++++- src/Html2OpenXml/IO/ImagePrefetcher.cs | 20 ++-- src/Html2OpenXml/Primitives/HtmlImageInfo.cs | 11 +- test/HtmlToOpenXml.Tests/BodyTests.cs | 2 +- .../ImageFormats/ImageHeaderTests.cs | 2 + test/HtmlToOpenXml.Tests/ImgTests.cs | 38 ++++--- test/HtmlToOpenXml.Tests/Resources/kiwi.svg | 30 +++++ 11 files changed, 228 insertions(+), 37 deletions(-) create mode 100644 src/Html2OpenXml/Expressions/Image/SvgExpression.cs create mode 100644 test/HtmlToOpenXml.Tests/Resources/kiwi.svg diff --git a/examples/Demo/Program.cs b/examples/Demo/Program.cs index e8b75393..47c0124b 100644 --- a/examples/Demo/Program.cs +++ b/examples/Demo/Program.cs @@ -15,7 +15,7 @@ static class Program static async Task Main(string[] args) { const string filename = "test.docx"; - string html = ResourceHelper.GetString("Resources.Document.html"); + string html = ResourceHelper.GetString("Resources.AdvancedTable.html"); if (File.Exists(filename)) File.Delete(filename); using (MemoryStream generatedDocument = new MemoryStream()) @@ -39,14 +39,9 @@ static async Task Main(string[] args) } HtmlConverter converter = new HtmlConverter(mainPart); - // HeaderPart headerPart = mainPart.AddNewPart(); - //FooterPart footerPart = mainPart.AddNewPart(); converter.RenderPreAsTable = true; Body body = mainPart.Document.Body; - await converter.ParseHeader(@" - Red Dot"); - await converter.ParseBody(html); mainPart.Document.Save(); diff --git a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs index a60c6033..44ec6bc0 100644 --- a/src/Html2OpenXml/Expressions/HtmlDomExpression.cs +++ b/src/Html2OpenXml/Expressions/HtmlDomExpression.cs @@ -67,6 +67,7 @@ private static Dictionary> InitKnownTa { TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) }, { TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) }, { TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) }, + { TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) }, { TagNames.Table, el => new TableExpression((IHtmlTableElement) el) }, { TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) }, { TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) }, diff --git a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs index be9bb9c6..a4b61c30 100644 --- a/src/Html2OpenXml/Expressions/Image/ImageExpression.cs +++ b/src/Html2OpenXml/Expressions/Image/ImageExpression.cs @@ -11,8 +11,11 @@ */ using System; using System.Threading; +using AngleSharp.Dom; using AngleSharp.Html.Dom; +using AngleSharp.Svg.Dom; using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using HtmlToOpenXml.IO; @@ -57,14 +60,24 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) preferredSize.Height = imgNode.DisplayHeight; } - var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); - HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None) .ConfigureAwait(false).GetAwaiter().GetResult(); if (iinfo == null) return null; + if (iinfo.TypeInfo == ImagePartType.Svg) + { + var imagePart = context.HostingPart.GetPartById(iinfo.ImagePartId); + using var stream = imagePart.GetStream(System.IO.FileMode.Open); + using var sreader = new System.IO.StreamReader(stream); + imgNode.Insert(AdjacentPosition.AfterBegin, sreader.ReadToEnd()); + + var svgNode = imgNode.FindChild(); + if (svgNode is null) return null; + return SvgExpression.CreateSvgDrawing(context, svgNode, iinfo.ImagePartId, preferredSize); + } + if (preferredSize.IsEmpty) { preferredSize = iinfo.Size; @@ -78,6 +91,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node) long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; + var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); var img = new Drawing( new wp.Inline( new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus }, diff --git a/src/Html2OpenXml/Expressions/Image/SvgExpression.cs b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs new file mode 100644 index 00000000..7bb9a7f6 --- /dev/null +++ b/src/Html2OpenXml/Expressions/Image/SvgExpression.cs @@ -0,0 +1,105 @@ +/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved + * + * This source is subject to the Microsoft Permissive License. + * Please see the License.txt file for more information. + * All other rights reserved. + * + * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY + * KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A + * PARTICULAR PURPOSE. + */ +using AngleSharp.Svg.Dom; +using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; +using DocumentFormat.OpenXml.Wordprocessing; +using DocumentFormat.OpenXml.Office2019.Drawing.SVG; +using System.Text; + +using a = DocumentFormat.OpenXml.Drawing; +using pic = DocumentFormat.OpenXml.Drawing.Pictures; +using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing; +using AngleSharp.Text; + +namespace HtmlToOpenXml.Expressions; + +/// +/// Process the parsing of a svg element. +/// +sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node) +{ + private readonly ISvgSvgElement svgNode = node; + + + protected override Drawing? CreateDrawing(ParsingContext context) + { + var imgPart = context.MainPart.AddImagePart(ImagePartType.Svg); + using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false); + imgPart.FeedData(stream); + var imagePartId = context.MainPart.GetIdOfPart(imgPart); + return CreateSvgDrawing(context, svgNode, imagePartId, Size.Empty); + } + + internal static Drawing CreateSvgDrawing(ParsingContext context, ISvgSvgElement svgNode, string imagePartId, Size preferredSize) + { + var width = Unit.Parse(svgNode.GetAttribute("width")); + var height = Unit.Parse(svgNode.GetAttribute("height")); + long widthInEmus, heightInEmus; + if (width.IsValid && height.IsValid) + { + widthInEmus = width.ValueInEmus; + heightInEmus = height.ValueInEmus; + } + else + { + widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus; + heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus; + } + + var (imageObjId, drawingObjId) = IncrementDrawingObjId(context); + + string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId; + string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty; + + var img = new Drawing( + new wp.Inline( + new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus }, + new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L }, + new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description }, + new wp.NonVisualGraphicFrameDrawingProperties { + GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true } + }, + new a.Graphic( + new a.GraphicData( + new pic.Picture( + new pic.NonVisualPictureProperties { + NonVisualDrawingProperties = new pic.NonVisualDrawingProperties() { + Id = imageObjId, Name = title + }, + NonVisualPictureDrawingProperties = new() + }, + new pic.BlipFill( + new a.Blip( + new a.BlipExtensionList( + new a.BlipExtension(new SVGBlip { Embed = imagePartId }) { + Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}" + }) + ) { Embed = imagePartId /* ideally, that should be a png representation of the svg */ }, + new a.Stretch( + new a.FillRectangle()) + ), + new pic.ShapeProperties( + new a.Transform2D( + new a.Offset() { X = 0L, Y = 0L }, + new a.Extents() { Cx = widthInEmus, Cy = heightInEmus }), + new a.PresetGeometry( + new a.AdjustValueList() + ) { Preset = a.ShapeTypeValues.Rectangle }) + ) + ) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" }) + ) { DistanceFromTop = (UInt32Value)0U, DistanceFromBottom = (UInt32Value)0U, DistanceFromLeft = (UInt32Value)0U, DistanceFromRight = (UInt32Value)0U } + ); + + return img; + } +} \ No newline at end of file diff --git a/src/Html2OpenXml/IO/ImageHeader.cs b/src/Html2OpenXml/IO/ImageHeader.cs index 762c9c90..858a923d 100755 --- a/src/Html2OpenXml/IO/ImageHeader.cs +++ b/src/Html2OpenXml/IO/ImageHeader.cs @@ -18,6 +18,7 @@ using System.IO; using System.Linq; using System.Text; +using System.Xml.XPath; namespace HtmlToOpenXml.IO; @@ -29,7 +30,7 @@ public static class ImageHeader // https://en.wikipedia.org/wiki/List_of_file_signatures #pragma warning disable CS1591 // Missing XML comment for publicly visible type or member - public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf } + public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml } #pragma warning restore CS1591 // Missing XML comment for publicly visible type or member private static readonly byte[] pngSignatureBytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]; @@ -41,7 +42,8 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf } { Encoding.UTF8.GetBytes("GIF89a"), FileType.Gif }, // animated gif { pngSignatureBytes, FileType.Png }, { new byte[] { 0xff, 0xd8 }, FileType.Jpeg }, - { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf } + { new byte[] { 0x1, 0, 0, 0 }, FileType.Emf }, + { Encoding.UTF8.GetBytes(" /// Represents an image and its metadata. /// -sealed class HtmlImageInfo(string source) +sealed class HtmlImageInfo(string source, string partId) { /// /// The URI identifying this cached image information. @@ -26,12 +26,17 @@ sealed class HtmlImageInfo(string source) /// /// The Unique identifier of the ImagePart in the . /// - public string? ImagePartId { get; set; } + public string ImagePartId { get; set; } = partId; /// - /// Gets or sets the size of the image + /// Gets or sets the original size of the image. /// public Size Size { get; set; } + + /// + /// Gets the content type of the image. + /// + public PartTypeInfo TypeInfo { get; set; } } /// diff --git a/test/HtmlToOpenXml.Tests/BodyTests.cs b/test/HtmlToOpenXml.Tests/BodyTests.cs index 187e2f2a..706556d1 100644 --- a/test/HtmlToOpenXml.Tests/BodyTests.cs +++ b/test/HtmlToOpenXml.Tests/BodyTests.cs @@ -72,7 +72,7 @@ public async Task WithGoBackBookmark_ShouldBeAfterAppendedOutput() Assert.That(goBackBookmark, Is.Not.Null); HtmlConverter converter = new HtmlConverter(mainPart); - await converter.ParseHtml("

Placeholder

"); + await converter.ParseBody("

Placeholder

"); Assert.That(mainPart.Document.Body!.LastChild, Is.TypeOf()); var paragrahs = mainPart.Document.Body!.Elements(); diff --git a/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs b/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs index 8f8a14f6..8dabcd50 100644 --- a/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs +++ b/test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs @@ -28,6 +28,7 @@ public void GuessFormat_ReturnsImageSize((string resourceName, Size expectedSize yield return ("Resources.html2openxml.emf", new Size(100, 100)); // animated gif: yield return ("Resources.stan.gif", new Size(252, 318)); + yield return ("Resources.kiwi.svg", new Size(612, 502)); } /// @@ -53,6 +54,7 @@ public void PngSof2_ReturnsImageSize() [TestCase("Resources.html2openxml.gif", ExpectedResult = ImageHeader.FileType.Gif)] [TestCase("Resources.html2openxml.jpg", ExpectedResult = ImageHeader.FileType.Jpeg)] [TestCase("Resources.html2openxml.png", ExpectedResult = ImageHeader.FileType.Png)] + [TestCase("Resources.kiwi.svg", ExpectedResult = ImageHeader.FileType.Xml)] public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName) { using var imageStream = ResourceHelper.GetStream(resourceName); diff --git a/test/HtmlToOpenXml.Tests/ImgTests.cs b/test/HtmlToOpenXml.Tests/ImgTests.cs index d3a62d7b..64d97ae6 100644 --- a/test/HtmlToOpenXml.Tests/ImgTests.cs +++ b/test/HtmlToOpenXml.Tests/ImgTests.cs @@ -15,12 +15,14 @@ namespace HtmlToOpenXml.Tests [TestFixture] public class ImgTests : HtmlConverterTestBase { - [Test] - public void AbsoluteUri_ReturnsDrawing_WithDownloadedData() + [TestCase("https://www.w3schools.com/tags/smiley.gif", "image/gif")] + [TestCase("https://dev.w3.org/SVG/tools/svgweb/samples/svg-files/helloworld.svg", "image/svg+xml")] + public void AbsoluteUri_ReturnsDrawing_WithDownloadedData(string imageUri, string contentType) { - var elements = converter.Parse(@"Smiley face"); + var elements = converter.Parse(@$"Smiley face"); Assert.That(elements, Has.Count.EqualTo(1)); - AssertIsImg(mainPart, elements[0]); + var (_, imagePart) = AssertIsImg(mainPart, elements[0]); + Assert.That(imagePart.ContentType, Is.EqualTo(contentType)); } [Test] @@ -58,7 +60,6 @@ public void ManualProvisioning_ReturnsDrawing_WithProvidedData() AssertIsImg(mainPart, elements[0]); } - [TestCase("Smiley face", Description = "Empty image")] [TestCase("", Description = "Unsupported protocol")] [TestCase("", Description = "Relative url without providing BaseImagerUri")] public void IgnoreImage_ShouldBeIgnored(string html) @@ -94,6 +95,17 @@ public async Task FileSystem_LocalImage_WithSpaceInName_ShouldSucceed() AssertIsImg(mainPart, elements.First()); } + [Test] + public void SvgNode_ReturnsImage() + { + var elements = converter.Parse(ResourceHelper.GetString("Resources.kiwi.svg")); + Assert.That(elements, Has.Count.EqualTo(1)); + var (drawing, imagePart) = AssertIsImg(mainPart, elements[0]); + Assert.That(drawing.Inline!.DocProperties?.Name?.Value, Is.EqualTo("Illustration of a Kiwi")); + Assert.That(drawing.Inline!.DocProperties?.Description?.Value, Is.EqualTo("Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes.")); + Assert.That(imagePart.ContentType, Is.EqualTo("image/svg+xml")); + } + [Test(Description = "Reading local file containing a space in the name")] public async Task RemoteImage_WithBaseUri_ShouldSucceed() { @@ -191,21 +203,21 @@ public async Task ParseIntoDocumentPart_ReturnsImageParentedToPart (Type openXml AssertThatOpenXmlDocumentIsValid(); } - private static Drawing AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) + private static (Drawing, ImagePart) AssertIsImg (OpenXmlPartContainer container, OpenXmlElement paragraph) { var run = paragraph.GetFirstChild(); Assert.That(run, Is.Not.Null); - var img = run.GetFirstChild(); - Assert.That(img, Is.Not.Null); - Assert.That(img.Inline?.Graphic?.GraphicData, Is.Not.Null); - var pic = img.Inline.Graphic.GraphicData.GetFirstChild(); + var drawing = run.GetFirstChild(); + Assert.That(drawing, Is.Not.Null); + Assert.That(drawing.Inline?.Graphic?.GraphicData, Is.Not.Null); + var pic = drawing.Inline.Graphic.GraphicData.GetFirstChild(); Assert.That(pic?.BlipFill?.Blip?.Embed, Is.Not.Null); var imagePartId = pic.BlipFill.Blip.Embed.Value; Assert.That(imagePartId, Is.Not.Null); - var part = container.GetPartById(imagePartId); - Assert.That(part, Is.TypeOf(typeof(ImagePart))); - return img; + var imagePart = container.GetPartById(imagePartId); + Assert.That(imagePart, Is.TypeOf(typeof(ImagePart))); + return (drawing, (ImagePart) imagePart); } } } \ No newline at end of file diff --git a/test/HtmlToOpenXml.Tests/Resources/kiwi.svg b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg new file mode 100644 index 00000000..9a5b8dd7 --- /dev/null +++ b/test/HtmlToOpenXml.Tests/Resources/kiwi.svg @@ -0,0 +1,30 @@ + + + + + + Illustration of a Kiwi + + + Kiwi (/ˈkiːwiː/ KEE-wee)[4] are flightless birds endemic to New Zealand of the order Apterygiformes. + + + +