Skip to content

Commit

Permalink
SVG tag support (#164)
Browse files Browse the repository at this point in the history
* Support of `svg` tag element

* Detect Xml/Svg image and read for its preferred size

* Rationalise object declaration

* Support of `svg` tag element

* Detect Xml/Svg image and read for its preferred size

* Rationalise object declaration

* Fix after rebase

* Minor fixup

* Improve testing

* Handle SVG image in an unified code
  • Loading branch information
onizet authored Sep 23, 2024
1 parent 1666f5b commit 4a2d955
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 37 deletions.
7 changes: 1 addition & 6 deletions examples/Demo/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ static class Program
static async Task Main(string[] args)
{
const string filename = "test.docx";
string html = ResourceHelper.GetString("Resources.Document.html");
string html = ResourceHelper.GetString("Resources.AdvancedTable.html");
if (File.Exists(filename)) File.Delete(filename);

using (MemoryStream generatedDocument = new MemoryStream())
Expand All @@ -39,14 +39,9 @@ static async Task Main(string[] args)
}

HtmlConverter converter = new HtmlConverter(mainPart);
// HeaderPart headerPart = mainPart.AddNewPart<HeaderPart>();
//FooterPart footerPart = mainPart.AddNewPart<FooterPart>();
converter.RenderPreAsTable = true;
Body body = mainPart.Document.Body;

await converter.ParseHeader(@"<a href=""www.github.com"">
<img src=""data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg=="" alt=""Red dot"" /> Red Dot</a>");

await converter.ParseBody(html);
mainPart.Document.Save();

Expand Down
1 change: 1 addition & 0 deletions src/Html2OpenXml/Expressions/HtmlDomExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ private static Dictionary<string, Func<IElement, HtmlDomExpression>> InitKnownTa
{ TagNames.Strong, el => new PhrasingElementExpression((IHtmlElement) el, new Bold()) },
{ TagNames.Sub, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Subscript }) },
{ TagNames.Sup, el => new PhrasingElementExpression((IHtmlElement) el, new VerticalTextAlignment() { Val = VerticalPositionValues.Superscript }) },
{ TagNames.Svg, el => new SvgExpression((AngleSharp.Svg.Dom.ISvgSvgElement) el) },
{ TagNames.Table, el => new TableExpression((IHtmlTableElement) el) },
{ TagNames.Time, el => new PhrasingElementExpression((IHtmlElement) el) },
{ TagNames.U, el => new PhrasingElementExpression((IHtmlElement) el, new Underline() { Val = UnderlineValues.Single }) },
Expand Down
18 changes: 16 additions & 2 deletions src/Html2OpenXml/Expressions/Image/ImageExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
*/
using System;
using System.Threading;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
using AngleSharp.Svg.Dom;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using HtmlToOpenXml.IO;

Expand Down Expand Up @@ -57,14 +60,24 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
preferredSize.Height = imgNode.DisplayHeight;
}

var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);

HtmlImageInfo? iinfo = context.ImageLoader.Download(src, CancellationToken.None)
.ConfigureAwait(false).GetAwaiter().GetResult();

if (iinfo == null)
return null;

if (iinfo.TypeInfo == ImagePartType.Svg)
{
var imagePart = context.HostingPart.GetPartById(iinfo.ImagePartId);
using var stream = imagePart.GetStream(System.IO.FileMode.Open);
using var sreader = new System.IO.StreamReader(stream);
imgNode.Insert(AdjacentPosition.AfterBegin, sreader.ReadToEnd());

var svgNode = imgNode.FindChild<ISvgSvgElement>();
if (svgNode is null) return null;
return SvgExpression.CreateSvgDrawing(context, svgNode, iinfo.ImagePartId, preferredSize);
}

if (preferredSize.IsEmpty)
{
preferredSize = iinfo.Size;
Expand All @@ -78,6 +91,7 @@ class ImageExpression(IHtmlImageElement node) : ImageExpressionBase(node)
long widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
long heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;

var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);
var img = new Drawing(
new wp.Inline(
new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
Expand Down
105 changes: 105 additions & 0 deletions src/Html2OpenXml/Expressions/Image/SvgExpression.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/* Copyright (C) Olivier Nizet https://github.com/onizet/html2openxml - All Rights Reserved
*
* This source is subject to the Microsoft Permissive License.
* Please see the License.txt file for more information.
* All other rights reserved.
*
* THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY
* KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
* PARTICULAR PURPOSE.
*/
using AngleSharp.Svg.Dom;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using DocumentFormat.OpenXml.Office2019.Drawing.SVG;
using System.Text;

using a = DocumentFormat.OpenXml.Drawing;
using pic = DocumentFormat.OpenXml.Drawing.Pictures;
using wp = DocumentFormat.OpenXml.Drawing.Wordprocessing;
using AngleSharp.Text;

namespace HtmlToOpenXml.Expressions;

/// <summary>
/// Process the parsing of a <c>svg</c> element.
/// </summary>
sealed class SvgExpression(ISvgSvgElement node) : ImageExpressionBase(node)
{
private readonly ISvgSvgElement svgNode = node;


protected override Drawing? CreateDrawing(ParsingContext context)
{
var imgPart = context.MainPart.AddImagePart(ImagePartType.Svg);
using var stream = new System.IO.MemoryStream(Encoding.UTF8.GetBytes(svgNode.OuterHtml), writable: false);
imgPart.FeedData(stream);
var imagePartId = context.MainPart.GetIdOfPart(imgPart);
return CreateSvgDrawing(context, svgNode, imagePartId, Size.Empty);
}

internal static Drawing CreateSvgDrawing(ParsingContext context, ISvgSvgElement svgNode, string imagePartId, Size preferredSize)
{
var width = Unit.Parse(svgNode.GetAttribute("width"));
var height = Unit.Parse(svgNode.GetAttribute("height"));
long widthInEmus, heightInEmus;
if (width.IsValid && height.IsValid)
{
widthInEmus = width.ValueInEmus;
heightInEmus = height.ValueInEmus;
}
else
{
widthInEmus = new Unit(UnitMetric.Pixel, preferredSize.Width).ValueInEmus;
heightInEmus = new Unit(UnitMetric.Pixel, preferredSize.Height).ValueInEmus;
}

var (imageObjId, drawingObjId) = IncrementDrawingObjId(context);

string? title = svgNode.QuerySelector("title")?.TextContent?.CollapseAndStrip() ?? "Picture " + imageObjId;
string? description = svgNode.QuerySelector("desc")?.TextContent?.CollapseAndStrip() ?? string.Empty;

var img = new Drawing(
new wp.Inline(
new wp.Extent() { Cx = widthInEmus, Cy = heightInEmus },
new wp.EffectExtent() { LeftEdge = 0L, TopEdge = 0L, RightEdge = 0L, BottomEdge = 0L },
new wp.DocProperties() { Id = drawingObjId, Name = title, Description = description },
new wp.NonVisualGraphicFrameDrawingProperties {
GraphicFrameLocks = new a.GraphicFrameLocks() { NoChangeAspect = true }
},
new a.Graphic(
new a.GraphicData(
new pic.Picture(
new pic.NonVisualPictureProperties {
NonVisualDrawingProperties = new pic.NonVisualDrawingProperties() {
Id = imageObjId, Name = title
},
NonVisualPictureDrawingProperties = new()
},
new pic.BlipFill(
new a.Blip(
new a.BlipExtensionList(
new a.BlipExtension(new SVGBlip { Embed = imagePartId }) {
Uri = "{96DAC541-7B7A-43D3-8B79-37D633B846F1}"
})
) { Embed = imagePartId /* ideally, that should be a png representation of the svg */ },
new a.Stretch(
new a.FillRectangle())
),
new pic.ShapeProperties(
new a.Transform2D(
new a.Offset() { X = 0L, Y = 0L },
new a.Extents() { Cx = widthInEmus, Cy = heightInEmus }),
new a.PresetGeometry(
new a.AdjustValueList()
) { Preset = a.ShapeTypeValues.Rectangle })
)
) { Uri = "http://schemas.openxmlformats.org/drawingml/2006/picture" })
) { DistanceFromTop = (UInt32Value)0U, DistanceFromBottom = (UInt32Value)0U, DistanceFromLeft = (UInt32Value)0U, DistanceFromRight = (UInt32Value)0U }
);

return img;
}
}
31 changes: 28 additions & 3 deletions src/Html2OpenXml/IO/ImageHeader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
using System.IO;
using System.Linq;
using System.Text;
using System.Xml.XPath;

namespace HtmlToOpenXml.IO;

Expand All @@ -29,7 +30,7 @@ public static class ImageHeader
// https://en.wikipedia.org/wiki/List_of_file_signatures

#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member
public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf }
public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf, Xml }
#pragma warning restore CS1591 // Missing XML comment for publicly visible type or member

private static readonly byte[] pngSignatureBytes = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
Expand All @@ -41,7 +42,8 @@ public enum FileType { Unrecognized, Bitmap, Gif, Png, Jpeg, Emf }
{ Encoding.UTF8.GetBytes("GIF89a"), FileType.Gif }, // animated gif
{ pngSignatureBytes, FileType.Png },
{ new byte[] { 0xff, 0xd8 }, FileType.Jpeg },
{ new byte[] { 0x1, 0, 0, 0 }, FileType.Emf }
{ new byte[] { 0x1, 0, 0, 0 }, FileType.Emf },
{ Encoding.UTF8.GetBytes("<?xml "), FileType.Xml }, // Xml so potentially Svg
};

private static readonly int MaxMagicBytesLength = imageFormatDecoders
Expand Down Expand Up @@ -83,6 +85,7 @@ public static Size GetDimensions(Stream stream)
case FileType.Jpeg: return DecodeJfif(reader);
case FileType.Png: return DecodePng(reader);
case FileType.Emf: return DecodeEmf(reader);
case FileType.Xml: return DecodeXml(stream);
default: return Size.Empty;
}
}
Expand Down Expand Up @@ -279,5 +282,27 @@ private static Size DecodeEmf(SequentialBinaryReader reader)

return new Size(widthInPixel, heightInPixel);
}
}

private static Size DecodeXml(Stream stream)
{
try
{
var nav = new XPathDocument(stream).CreateNavigator();
// use local-name() to ignore any xml namespace
nav = nav.SelectSingleNode("/*[local-name() = 'svg']");
if (nav is not null)
{
var width = Unit.Parse(nav.GetAttribute("width", string.Empty));
var height = Unit.Parse(nav.GetAttribute("height", string.Empty));
if (width.IsValid && height.IsValid)
return new Size(width.ValueInPx, height.ValueInPx);
}
}
catch (SystemException)
{
return Size.Empty;
}

return Size.Empty;
}
}
20 changes: 11 additions & 9 deletions src/Html2OpenXml/IO/ImagePrefetcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
if (response?.Content == null)
return null;

HtmlImageInfo info = new HtmlImageInfo(src);
using (response)
{
// For requested url with no filename, we need to read the media mime type if provided
Expand All @@ -123,16 +122,19 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
}

var ipart = hostingPart.AddImagePart(type);
Size originalSize;
using (var outputStream = ipart.GetStream(FileMode.Create))
{
response.Content.CopyTo(outputStream);

outputStream.Seek(0L, SeekOrigin.Begin);
info.Size = GetImageSize(outputStream);
originalSize = GetImageSize(outputStream);
}

info.ImagePartId = hostingPart.GetIdOfPart(ipart);
return info;
return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
TypeInfo = type,
Size = originalSize
};
}
}

Expand All @@ -143,20 +145,20 @@ public ImagePrefetcher(T hostingPart, IWebRequest resourceLoader)
{
if (DataUri.TryCreate(src, out var dataUri))
{
Size size;
Size originalSize;
knownContentType.TryGetValue(dataUri!.Mime, out PartTypeInfo type);
var ipart = hostingPart.AddImagePart(type);
using (var outputStream = ipart.GetStream(FileMode.Create))
{
outputStream.Write(dataUri.Data, 0, dataUri.Data.Length);

outputStream.Seek(0L, SeekOrigin.Begin);
size = GetImageSize(outputStream);
originalSize = GetImageSize(outputStream);
}

return new HtmlImageInfo(src) {
ImagePartId = hostingPart.GetIdOfPart(ipart),
Size = size
return new HtmlImageInfo(src, hostingPart.GetIdOfPart(ipart)) {
TypeInfo = type,
Size = originalSize
};
}

Expand Down
11 changes: 8 additions & 3 deletions src/Html2OpenXml/Primitives/HtmlImageInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ namespace HtmlToOpenXml;
/// <summary>
/// Represents an image and its metadata.
/// </summary>
sealed class HtmlImageInfo(string source)
sealed class HtmlImageInfo(string source, string partId)
{
/// <summary>
/// The URI identifying this cached image information.
Expand All @@ -26,12 +26,17 @@ sealed class HtmlImageInfo(string source)
/// <summary>
/// The Unique identifier of the ImagePart in the <see cref="MainDocumentPart"/>.
/// </summary>
public string? ImagePartId { get; set; }
public string ImagePartId { get; set; } = partId;

/// <summary>
/// Gets or sets the size of the image
/// Gets or sets the original size of the image.
/// </summary>
public Size Size { get; set; }

/// <summary>
/// Gets the content type of the image.
/// </summary>
public PartTypeInfo TypeInfo { get; set; }
}

/// <summary>
Expand Down
2 changes: 1 addition & 1 deletion test/HtmlToOpenXml.Tests/BodyTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public async Task WithGoBackBookmark_ShouldBeAfterAppendedOutput()
Assert.That(goBackBookmark, Is.Not.Null);

HtmlConverter converter = new HtmlConverter(mainPart);
await converter.ParseHtml("<p>Placeholder</p>");
await converter.ParseBody("<p>Placeholder</p>");

Assert.That(mainPart.Document.Body!.LastChild, Is.TypeOf<SectionProperties>());
var paragrahs = mainPart.Document.Body!.Elements<Paragraph>();
Expand Down
2 changes: 2 additions & 0 deletions test/HtmlToOpenXml.Tests/ImageFormats/ImageHeaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public void GuessFormat_ReturnsImageSize((string resourceName, Size expectedSize
yield return ("Resources.html2openxml.emf", new Size(100, 100));
// animated gif:
yield return ("Resources.stan.gif", new Size(252, 318));
yield return ("Resources.kiwi.svg", new Size(612, 502));
}

/// <summary>
Expand All @@ -53,6 +54,7 @@ public void PngSof2_ReturnsImageSize()
[TestCase("Resources.html2openxml.gif", ExpectedResult = ImageHeader.FileType.Gif)]
[TestCase("Resources.html2openxml.jpg", ExpectedResult = ImageHeader.FileType.Jpeg)]
[TestCase("Resources.html2openxml.png", ExpectedResult = ImageHeader.FileType.Png)]
[TestCase("Resources.kiwi.svg", ExpectedResult = ImageHeader.FileType.Xml)]
public ImageHeader.FileType GuessFormat_ReturnsFileType(string resourceName)
{
using var imageStream = ResourceHelper.GetStream(resourceName);
Expand Down
Loading

0 comments on commit 4a2d955

Please sign in to comment.