diff --git a/src/Mime-Detective/MimeTypes.cs b/src/Mime-Detective/MimeTypes.cs
index b012754..3840e06 100644
--- a/src/Mime-Detective/MimeTypes.cs
+++ b/src/Mime-Detective/MimeTypes.cs
@@ -59,7 +59,17 @@ public static class MimeTypes
public readonly static FileType MS_OFFICE = new FileType(new byte?[] { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1 }, "doc,ppt,xls", "application/octet-stream");
//application/xml text/xml
- public readonly static FileType XML = new FileType(new byte?[] { 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E }, "xml,xul", "text/xml");
+ // r s i o n = " 1 . 0 " ? >
+ public readonly static FileType XML = new FileType(new byte?[] { 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E }, "xml", "text/xml");
+
+ // XML file encoded with UTF-8 < ? x m l (spc)
+ public readonly static FileType XML_NoBom = new FileType(new byte?[] { 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, }, "xml", "application/xml");
+ // XML file encoded with UTF-8 + Byte order mark Byte Order Mark < ? x m l (spc)
+ public readonly static FileType XML_Utf8Bom = new FileType(new byte?[] { 0x0EF, 0xBB, 0xBF, 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, }, "xml", "application/xml");
+ // XML file encoded with UCS-2 Big Endian BOM FEFF < ? x m l (spc)
+ public readonly static FileType XML_UCS2BE = new FileType(new byte?[] { 0x0FF, 0xFE, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, 0x00, }, "xml", "application/xml");
+ // XML file encoded with UCS-2 Little Endian BOM FFFE < ? x m l (spc)
+ public readonly static FileType XML_UCS2LE = new FileType(new byte?[] { 0x0FE, 0xFF, 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, }, "xml", "application/xml");
//text files
public readonly static FileType TXT = new FileType(EmptyHeader, "txt", "text/plain");
@@ -214,7 +224,7 @@ EML is also used by Outlook Express and QuickMail.
public readonly static FileType ELF = new FileType(new byte?[] { 0x45, 0x6C, 0x66, 0x46, 0x69, 0x6C, 0x65, 0x00 }, "elf", "text/plain");
public static readonly FileType[] Types = new FileType[] { PDF, JPEG, ZIP, ZIP_EMPTY, RAR, RTF, PNG, GIF, DLL_EXE, MS_OFFICE,
- BMP, DLL_EXE, ZIP_7z, GZ_TGZ, TAR_ZH, TAR_ZV, OGG, ICO, XML, DWG, LIB_COFF, PST, PSD, BZ2,
+ BMP, DLL_EXE, ZIP_7z, GZ_TGZ, TAR_ZH, TAR_ZV, OGG, ICO, XML, XML_NoBom, XML_Utf8Bom, XML_UCS2BE, XML_UCS2LE, DWG, LIB_COFF, PST, PSD, BZ2,
AES, SKR, SKR_2, PKR, EML_FROM, ELF, TXT_UTF8, TXT_UTF16_BE, TXT_UTF16_LE, TXT_UTF32_BE, TXT_UTF32_LE,
Mp3ID3, Wav, Flac, MIDI,
Tiff, TiffLittleEndian, TiffBigEndian, TiffBig,
diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx
new file mode 100644
index 0000000..303a9fb
--- /dev/null
+++ b/test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2BE.WithBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2BE.WithBOM.smmx
new file mode 100644
index 0000000..00d385b
Binary files /dev/null and b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2BE.WithBOM.smmx differ
diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2LE.WithBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2LE.WithBOM.smmx
new file mode 100644
index 0000000..1e3b058
Binary files /dev/null and b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2LE.WithBOM.smmx differ
diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx
new file mode 100644
index 0000000..a7850dc
--- /dev/null
+++ b/test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
diff --git a/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs b/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs
index e84e7c9..f28d229 100644
--- a/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs
+++ b/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs
@@ -24,5 +24,57 @@ public async Task IsTxt()
Assert.Equal(fileType.Extension, MimeTypes.TXT.Extension);
}
+
+ [Fact]
+ public async Task IsXml_UTF8_WithBOM()
+ {
+ // this XML file is encoded with: UTF-8
+ // this XML does NOT include a Byte Order Mark (EF BB BF) to signal the encoding
+ var info = new FileInfo(TextPath + "MindMap.NoBOM.smmx");
+
+ var fileType = await info.GetFileTypeAsync();
+
+ Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
+ Assert.Equal("application/xml", fileType.Mime);
+ }
+
+ [Fact]
+ public async Task IsXml_UTF8_WithoutBOM()
+ {
+ // this XML file is encoded with: UTF-8
+ // this XML INCLUDES a Byte Order Mark (EF BB BF) to signal the encoding
+ var info = new FileInfo(TextPath + "MindMap.WithBOM.smmx");
+
+ var fileType = await info.GetFileTypeAsync();
+
+ Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
+ Assert.Equal("application/xml", fileType.Mime);
+ }
+
+ [Fact]
+ public async Task IsXml_UCS2LE_WithBOM()
+ {
+ // this XML file is encoded with: UCS-2 Little Endian (UTF16)
+ // this XML INCLUDES a Byte Order Mark (FEFF) to signal the encoding
+ var info = new FileInfo(TextPath + "MindMap.UCS2LE.WithBOM.smmx");
+
+ var fileType = await info.GetFileTypeAsync();
+
+ Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
+ Assert.Equal("application/xml", fileType.Mime);
+ }
+
+ [Fact]
+ public async Task IsXml_UCS2BE_WithBOM()
+ {
+ // this XML file is encoded with: UCS-2 Little Endian (UTF16)
+ // this XML INCLUDES a Byte Order Mark (FEFF) to signal the encoding
+ var info = new FileInfo(TextPath + "MindMap.UCS2BE.WithBOM.smmx");
+
+ var fileType = await info.GetFileTypeAsync();
+
+ Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
+ Assert.Equal("application/xml", fileType.Mime);
+ }
}
}