forked from ofthelit/Mime-Detective
-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Mime-Detective 0.0.6-beta1 includes numerous changes and improvements. Analyzer Abstraction (IFileAnalyzer) for allowing extensibility Static extension method extensibility through the static MimeAnalyzer class Various improvements and additions to the underlying file header definitions Significantly faster file header matching algorithms Seekable Streams are now reset to position 0 by default for extension methods that accept streams Secondary Analyzer for MS Document Type matching the MSDoc header (aka MS_Office) More test coverage Tries (prefix trees) and Analyzers This release now includes 3 different file header matching implementations: ArrayBasedTrie -- Fastest implementation by far -- Consumes the most amount of memory DictionaryBasedTrie -- Significantly slower than ArrayBasedTrie -- Significantly faster than LinearCountingAnalyzer -- Consumes significantly less memory than ArrayBasedTrie -- This is the default LinearCountingAnalyzer -- A simple linear Algorithm, iterates through a list -- Significantly slower than all other implementations -- Consumes the least memory Default header matching algorithm is now the DictionaryBasedTrie, constructed from MimeType.Types, and can be manipulated via the static MimeAnalzyer.PrimaryAnalzyer property. The Linear Algorithm now has the same behavior as tries. It will try to find the highest completely matching definition.
- Loading branch information
1 parent
de3faaf
commit 06bc12d
Showing
105 changed files
with
3,009 additions
and
1,292 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
using System.Linq; | ||
|
||
namespace MimeDetective.Analyzers | ||
{ | ||
public sealed class ArrayBasedTrie : IFileAnalyzer | ||
{ | ||
public const int NullStandInValue = 256; | ||
public const int MaxNodeSize = 257; | ||
|
||
private List<OffsetNode> Nodes = new List<OffsetNode>(10); | ||
|
||
/// <summary> | ||
/// Constructs an empty ArrayBasedTrie, <see cref="Insert(FileType)"/> to add definitions | ||
/// </summary> | ||
public ArrayBasedTrie() | ||
{ | ||
} | ||
|
||
/// <summary> | ||
/// Constructs an ArrayBasedTrie from an Enumerable of FileTypes, <see cref="Insert(FileType)"/> to add more definitions | ||
/// </summary> | ||
/// <param name="types"></param> | ||
public ArrayBasedTrie(IEnumerable<FileType> types) | ||
{ | ||
if (types is null) | ||
throw new ArgumentNullException(nameof(types)); | ||
|
||
foreach (var type in types) | ||
{ | ||
if ((object)type != null) | ||
Insert(type); | ||
} | ||
|
||
Nodes = Nodes.OrderBy(x => x.Offset).ToList(); | ||
} | ||
|
||
public FileType Search(in ReadResult readResult) | ||
{ | ||
FileType match = null; | ||
|
||
//iterate through offset nodes | ||
for (int offsetNodeIndex = 0; offsetNodeIndex < Nodes.Count; offsetNodeIndex++) | ||
{ | ||
//get offset node | ||
var offsetNode = Nodes[offsetNodeIndex]; | ||
|
||
int i = offsetNode.Offset; | ||
byte value = readResult.Array[i]; | ||
|
||
var node = offsetNode.Children[value]; | ||
|
||
if (node is null) | ||
{ | ||
node = offsetNode.Children[NullStandInValue]; | ||
|
||
if (node is null) | ||
break; | ||
} | ||
|
||
if ((object)node.Record != null) | ||
match = node.Record; | ||
|
||
i++; | ||
|
||
//iterate through the current trie | ||
for (; i < readResult.ReadLength; i++) | ||
{ | ||
value = readResult.Array[i]; | ||
|
||
var prevNode = node; | ||
node = node.Children[value]; | ||
|
||
if (node is null) | ||
{ | ||
node = prevNode.Children[NullStandInValue]; | ||
|
||
if (node is null) | ||
break; | ||
} | ||
|
||
if ((object)node.Record != null) | ||
match = node.Record; | ||
} | ||
|
||
if ((object)match != null) | ||
break; | ||
} | ||
|
||
return match; | ||
} | ||
|
||
public void Insert(FileType type) | ||
{ | ||
if (type is null) | ||
throw new ArgumentNullException(nameof(type)); | ||
|
||
OffsetNode match = null; | ||
|
||
foreach (var offsetNode in Nodes) | ||
{ | ||
if (offsetNode.Offset == type.HeaderOffset) | ||
{ | ||
match = offsetNode; | ||
break; | ||
} | ||
} | ||
|
||
if (match is null) | ||
{ | ||
match = new OffsetNode(type.HeaderOffset); | ||
Nodes.Add(match); | ||
} | ||
|
||
match.Insert(type); | ||
} | ||
|
||
private sealed class OffsetNode | ||
{ | ||
public readonly ushort Offset; | ||
public readonly Node[] Children; | ||
|
||
public OffsetNode(ushort offset) | ||
{ | ||
if (offset > (MimeTypes.MaxHeaderSize - 1)) | ||
throw new ArgumentException("Offset cannot be greater than MaxHeaderSize - 1"); | ||
|
||
Offset = offset; | ||
Children = new Node[MaxNodeSize]; | ||
} | ||
|
||
public void Insert(FileType type) | ||
{ | ||
int i = 0; | ||
byte? value = type.Header[i]; | ||
int arrayPos = value ?? NullStandInValue; | ||
|
||
var node = Children[arrayPos]; | ||
|
||
if (node is null) | ||
{ | ||
node = new Node(value); | ||
Children[arrayPos] = node; | ||
} | ||
|
||
i++; | ||
|
||
for (; i < type.Header.Length; i++) | ||
{ | ||
value = type.Header[i]; | ||
arrayPos = value ?? NullStandInValue; | ||
var prevNode = node; | ||
node = node.Children[arrayPos]; | ||
|
||
if (node is null) | ||
{ | ||
var newNode = new Node(value); | ||
|
||
if (i == type.Header.Length - 1) | ||
newNode.Record = type; | ||
|
||
node = prevNode.Children[arrayPos] = newNode; | ||
} | ||
} | ||
} | ||
} | ||
|
||
private sealed class Node | ||
{ | ||
public readonly Node[] Children; | ||
|
||
//if complete node then this not null | ||
public FileType Record; | ||
|
||
public readonly byte? Value; | ||
|
||
public Node(byte? value) | ||
{ | ||
Value = value; | ||
Children = new Node[MaxNodeSize]; | ||
Record = null; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace MimeDetective.Analyzers | ||
{ | ||
public sealed class DictionaryBasedTrie : IFileAnalyzer | ||
{ | ||
private const ushort NullStandInValue = 256; | ||
|
||
//root dictionary contains the nodes with offset values | ||
private Dictionary<ushort, Node> Nodes { get; } = new Dictionary<ushort, Node>(); | ||
|
||
/// <summary> | ||
/// Constructs an empty DictionaryBasedTrie | ||
/// </summary> | ||
public DictionaryBasedTrie() | ||
{ | ||
|
||
} | ||
|
||
/// <summary> | ||
/// Constructs a DictionaryBasedTrie from an Enumerable of FileTypes | ||
/// </summary> | ||
/// <param name="types"></param> | ||
public DictionaryBasedTrie(IEnumerable<FileType> types) | ||
{ | ||
if (types is null) | ||
throw new ArgumentNullException(nameof(types)); | ||
|
||
foreach (var type in types) | ||
{ | ||
Insert(type); | ||
} | ||
} | ||
|
||
public FileType Search(in ReadResult readResult) | ||
{ | ||
FileType match = null; | ||
var enumerator = Nodes.GetEnumerator(); | ||
|
||
while (match is null && enumerator.MoveNext()) | ||
{ | ||
Node node = enumerator.Current.Value; | ||
|
||
for (int i = node.Value; i < readResult.ReadLength; i++) | ||
{ | ||
Node prevNode = node; | ||
|
||
if (!prevNode.Children.TryGetValue(readResult.Array[i], out node) | ||
&& !prevNode.Children.TryGetValue(NullStandInValue, out node)) | ||
break; | ||
|
||
if ((object)node.Record != null) | ||
match = node.Record; | ||
} | ||
|
||
if ((object)match != null) | ||
break; | ||
} | ||
|
||
return match; | ||
} | ||
|
||
public void Insert(FileType type) | ||
{ | ||
if (type is null) | ||
throw new ArgumentNullException(nameof(type)); | ||
|
||
if (!Nodes.TryGetValue(type.HeaderOffset, out var offsetNode)) | ||
{ | ||
offsetNode = new Node(type.HeaderOffset); | ||
Nodes.Add(type.HeaderOffset, offsetNode); | ||
} | ||
|
||
offsetNode.Insert(type); | ||
} | ||
|
||
private sealed class Node | ||
{ | ||
public readonly Dictionary<ushort, Node> Children = new Dictionary<ushort, Node>(); | ||
|
||
//if complete node then this not null | ||
public FileType Record; | ||
|
||
public readonly ushort Value; | ||
|
||
public Node(ushort value) | ||
{ | ||
Value = value; | ||
} | ||
|
||
public void Insert(FileType type) | ||
{ | ||
int i = 0; | ||
ushort value = type.Header[i] ?? NullStandInValue; | ||
|
||
if (!Children.TryGetValue(value, out Node node)) | ||
{ | ||
node = new Node(value); | ||
Children.Add(value, node); | ||
} | ||
|
||
i++; | ||
|
||
for (; i < type.Header.Length; i++) | ||
{ | ||
value = type.Header[i] ?? NullStandInValue; | ||
|
||
if (!node.Children.ContainsKey(value)) | ||
{ | ||
Node newNode = new Node(value); | ||
node.Children.Add(value, newNode); | ||
} | ||
|
||
node = node.Children[value]; | ||
} | ||
|
||
node.Record = type; | ||
} | ||
} | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
using System.Runtime.CompilerServices; | ||
|
||
namespace MimeDetective.Analyzers | ||
{ | ||
public interface IReadOnlyFileAnalyzer | ||
{ | ||
FileType Search(in ReadResult readResult); | ||
} | ||
|
||
public interface IFileAnalyzer : IReadOnlyFileAnalyzer | ||
{ | ||
void Insert(FileType fileType); | ||
} | ||
} |
Oops, something went wrong.