diff --git a/src/Mime-Detective/MimeTypes.cs b/src/Mime-Detective/MimeTypes.cs index b012754..3840e06 100644 --- a/src/Mime-Detective/MimeTypes.cs +++ b/src/Mime-Detective/MimeTypes.cs @@ -59,7 +59,17 @@ public static class MimeTypes public readonly static FileType MS_OFFICE = new FileType(new byte?[] { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1 }, "doc,ppt,xls", "application/octet-stream"); //application/xml text/xml - public readonly static FileType XML = new FileType(new byte?[] { 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E }, "xml,xul", "text/xml"); + // r s i o n = " 1 . 0 " ? > + public readonly static FileType XML = new FileType(new byte?[] { 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E }, "xml", "text/xml"); + + // XML file encoded with UTF-8 < ? x m l (spc) + public readonly static FileType XML_NoBom = new FileType(new byte?[] { 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, }, "xml", "application/xml"); + // XML file encoded with UTF-8 + Byte order mark Byte Order Mark < ? x m l (spc) + public readonly static FileType XML_Utf8Bom = new FileType(new byte?[] { 0x0EF, 0xBB, 0xBF, 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, }, "xml", "application/xml"); + // XML file encoded with UCS-2 Big Endian BOM FEFF < ? x m l (spc) + public readonly static FileType XML_UCS2BE = new FileType(new byte?[] { 0x0FF, 0xFE, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, 0x00, }, "xml", "application/xml"); + // XML file encoded with UCS-2 Little Endian BOM FFFE < ? x m l (spc) + public readonly static FileType XML_UCS2LE = new FileType(new byte?[] { 0x0FE, 0xFF, 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, }, "xml", "application/xml"); //text files public readonly static FileType TXT = new FileType(EmptyHeader, "txt", "text/plain"); @@ -214,7 +224,7 @@ EML is also used by Outlook Express and QuickMail. public readonly static FileType ELF = new FileType(new byte?[] { 0x45, 0x6C, 0x66, 0x46, 0x69, 0x6C, 0x65, 0x00 }, "elf", "text/plain"); public static readonly FileType[] Types = new FileType[] { PDF, JPEG, ZIP, ZIP_EMPTY, RAR, RTF, PNG, GIF, DLL_EXE, MS_OFFICE, - BMP, DLL_EXE, ZIP_7z, GZ_TGZ, TAR_ZH, TAR_ZV, OGG, ICO, XML, DWG, LIB_COFF, PST, PSD, BZ2, + BMP, DLL_EXE, ZIP_7z, GZ_TGZ, TAR_ZH, TAR_ZV, OGG, ICO, XML, XML_NoBom, XML_Utf8Bom, XML_UCS2BE, XML_UCS2LE, DWG, LIB_COFF, PST, PSD, BZ2, AES, SKR, SKR_2, PKR, EML_FROM, ELF, TXT_UTF8, TXT_UTF16_BE, TXT_UTF16_LE, TXT_UTF32_BE, TXT_UTF32_LE, Mp3ID3, Wav, Flac, MIDI, Tiff, TiffLittleEndian, TiffBigEndian, TiffBig, diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx new file mode 100644 index 0000000..303a9fb --- /dev/null +++ b/test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2BE.WithBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2BE.WithBOM.smmx new file mode 100644 index 0000000..00d385b Binary files /dev/null and b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2BE.WithBOM.smmx differ diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2LE.WithBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2LE.WithBOM.smmx new file mode 100644 index 0000000..1e3b058 Binary files /dev/null and b/test/Mime-Detective.Tests/Data/Text/MindMap.UCS2LE.WithBOM.smmx differ diff --git a/test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx b/test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx new file mode 100644 index 0000000..a7850dc --- /dev/null +++ b/test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs b/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs index e84e7c9..f28d229 100644 --- a/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs +++ b/test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs @@ -24,5 +24,57 @@ public async Task IsTxt() Assert.Equal(fileType.Extension, MimeTypes.TXT.Extension); } + + [Fact] + public async Task IsXml_UTF8_WithBOM() + { + // this XML file is encoded with: UTF-8 + // this XML does NOT include a Byte Order Mark (EF BB BF) to signal the encoding + var info = new FileInfo(TextPath + "MindMap.NoBOM.smmx"); + + var fileType = await info.GetFileTypeAsync(); + + Assert.Equal(MimeTypes.XML.Extension, fileType.Extension); + Assert.Equal("application/xml", fileType.Mime); + } + + [Fact] + public async Task IsXml_UTF8_WithoutBOM() + { + // this XML file is encoded with: UTF-8 + // this XML INCLUDES a Byte Order Mark (EF BB BF) to signal the encoding + var info = new FileInfo(TextPath + "MindMap.WithBOM.smmx"); + + var fileType = await info.GetFileTypeAsync(); + + Assert.Equal(MimeTypes.XML.Extension, fileType.Extension); + Assert.Equal("application/xml", fileType.Mime); + } + + [Fact] + public async Task IsXml_UCS2LE_WithBOM() + { + // this XML file is encoded with: UCS-2 Little Endian (UTF16) + // this XML INCLUDES a Byte Order Mark (FEFF) to signal the encoding + var info = new FileInfo(TextPath + "MindMap.UCS2LE.WithBOM.smmx"); + + var fileType = await info.GetFileTypeAsync(); + + Assert.Equal(MimeTypes.XML.Extension, fileType.Extension); + Assert.Equal("application/xml", fileType.Mime); + } + + [Fact] + public async Task IsXml_UCS2BE_WithBOM() + { + // this XML file is encoded with: UCS-2 Little Endian (UTF16) + // this XML INCLUDES a Byte Order Mark (FEFF) to signal the encoding + var info = new FileInfo(TextPath + "MindMap.UCS2BE.WithBOM.smmx"); + + var fileType = await info.GetFileTypeAsync(); + + Assert.Equal(MimeTypes.XML.Extension, fileType.Extension); + Assert.Equal("application/xml", fileType.Mime); + } } }