Skip to content

Commit

Permalink
Enhanced support for XML files detection (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
sandrock authored and clarkis117 committed Nov 17, 2018
1 parent f643dee commit aa397aa
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 2 deletions.
14 changes: 12 additions & 2 deletions src/Mime-Detective/MimeTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,17 @@ public static class MimeTypes
public readonly static FileType MS_OFFICE = new FileType(new byte?[] { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1 }, "doc,ppt,xls", "application/octet-stream");

//application/xml text/xml
public readonly static FileType XML = new FileType(new byte?[] { 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E }, "xml,xul", "text/xml");
// r s i o n = " 1 . 0 " ? >
public readonly static FileType XML = new FileType(new byte?[] { 0x72, 0x73, 0x69, 0x6F, 0x6E, 0x3D, 0x22, 0x31, 0x2E, 0x30, 0x22, 0x3F, 0x3E }, "xml", "text/xml");

// XML file encoded with UTF-8 < ? x m l (spc)
public readonly static FileType XML_NoBom = new FileType(new byte?[] { 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, }, "xml", "application/xml");
// XML file encoded with UTF-8 + Byte order mark Byte Order Mark < ? x m l (spc)
public readonly static FileType XML_Utf8Bom = new FileType(new byte?[] { 0x0EF, 0xBB, 0xBF, 0x3C, 0x3F, 0x78, 0x6D, 0x6C, 0x20, }, "xml", "application/xml");
// XML file encoded with UCS-2 Big Endian BOM FEFF < ? x m l (spc)
public readonly static FileType XML_UCS2BE = new FileType(new byte?[] { 0x0FF, 0xFE, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, 0x00, }, "xml", "application/xml");
// XML file encoded with UCS-2 Little Endian BOM FFFE < ? x m l (spc)
public readonly static FileType XML_UCS2LE = new FileType(new byte?[] { 0x0FE, 0xFF, 0x00, 0x3C, 0x00, 0x3F, 0x00, 0x78, 0x00, 0x6D, 0x00, 0x6C, 0x00, 0x20, }, "xml", "application/xml");

//text files
public readonly static FileType TXT = new FileType(EmptyHeader, "txt", "text/plain");
Expand Down Expand Up @@ -214,7 +224,7 @@ EML is also used by Outlook Express and QuickMail.
public readonly static FileType ELF = new FileType(new byte?[] { 0x45, 0x6C, 0x66, 0x46, 0x69, 0x6C, 0x65, 0x00 }, "elf", "text/plain");

public static readonly FileType[] Types = new FileType[] { PDF, JPEG, ZIP, ZIP_EMPTY, RAR, RTF, PNG, GIF, DLL_EXE, MS_OFFICE,
BMP, DLL_EXE, ZIP_7z, GZ_TGZ, TAR_ZH, TAR_ZV, OGG, ICO, XML, DWG, LIB_COFF, PST, PSD, BZ2,
BMP, DLL_EXE, ZIP_7z, GZ_TGZ, TAR_ZH, TAR_ZV, OGG, ICO, XML, XML_NoBom, XML_Utf8Bom, XML_UCS2BE, XML_UCS2LE, DWG, LIB_COFF, PST, PSD, BZ2,
AES, SKR, SKR_2, PKR, EML_FROM, ELF, TXT_UTF8, TXT_UTF16_BE, TXT_UTF16_LE, TXT_UTF32_BE, TXT_UTF32_LE,
Mp3ID3, Wav, Flac, MIDI,
Tiff, TiffLittleEndian, TiffBigEndian, TiffBig,
Expand Down
11 changes: 11 additions & 0 deletions test/Mime-Detective.Tests/Data/Text/MindMap.NoBOM.smmx
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE simplemind-mindmaps>
<simplemind-mindmaps generator="SimpleMindWin32" gen-version="1.20.2" doc-version="3">
<mindmap>
<meta>
</meta>
<topics>
</topics>
<relations/>
</mindmap>
</simplemind-mindmaps>
Binary file not shown.
Binary file not shown.
9 changes: 9 additions & 0 deletions test/Mime-Detective.Tests/Data/Text/MindMap.WithBOM.smmx
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE simplemind-mindmaps>
<simplemind-mindmaps generator="SimpleMindWin32" gen-version="1.20.2" doc-version="3">
<mindmap>
<topics>
</topics>
<relations/>
</mindmap>
</simplemind-mindmaps>
52 changes: 52 additions & 0 deletions test/Mime-Detective.Tests/Tests/Text/CommonFormats.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,57 @@ public async Task IsTxt()

Assert.Equal(fileType.Extension, MimeTypes.TXT.Extension);
}

[Fact]
public async Task IsXml_UTF8_WithBOM()
{
// this XML file is encoded with: UTF-8
// this XML does NOT include a Byte Order Mark (EF BB BF) to signal the encoding
var info = new FileInfo(TextPath + "MindMap.NoBOM.smmx");

var fileType = await info.GetFileTypeAsync();

Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
Assert.Equal("application/xml", fileType.Mime);
}

[Fact]
public async Task IsXml_UTF8_WithoutBOM()
{
// this XML file is encoded with: UTF-8
// this XML INCLUDES a Byte Order Mark (EF BB BF) to signal the encoding
var info = new FileInfo(TextPath + "MindMap.WithBOM.smmx");

var fileType = await info.GetFileTypeAsync();

Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
Assert.Equal("application/xml", fileType.Mime);
}

[Fact]
public async Task IsXml_UCS2LE_WithBOM()
{
// this XML file is encoded with: UCS-2 Little Endian (UTF16)
// this XML INCLUDES a Byte Order Mark (FEFF) to signal the encoding
var info = new FileInfo(TextPath + "MindMap.UCS2LE.WithBOM.smmx");

var fileType = await info.GetFileTypeAsync();

Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
Assert.Equal("application/xml", fileType.Mime);
}

[Fact]
public async Task IsXml_UCS2BE_WithBOM()
{
// this XML file is encoded with: UCS-2 Little Endian (UTF16)
// this XML INCLUDES a Byte Order Mark (FEFF) to signal the encoding
var info = new FileInfo(TextPath + "MindMap.UCS2BE.WithBOM.smmx");

var fileType = await info.GetFileTypeAsync();

Assert.Equal(MimeTypes.XML.Extension, fileType.Extension);
Assert.Equal("application/xml", fileType.Mime);
}
}
}

0 comments on commit aa397aa

Please sign in to comment.