Skip to content

Commit

Permalink
0.0.6-beta1 (#15)
Browse files Browse the repository at this point in the history
Mime-Detective 0.0.6-beta1 includes numerous changes and improvements.
Analyzer Abstraction (IFileAnalyzer) for allowing extensibility
Static extension method extensibility through the static MimeAnalyzer class
Various improvements and additions to the underlying file header definitions
Significantly faster file header matching algorithms
Seekable Streams are now reset to position 0 by default for extension methods that accept streams
Secondary Analyzer for MS Document Type matching the MSDoc header (aka MS_Office)
More test coverage
Tries (prefix trees) and Analyzers
This release now includes 3 different file header matching implementations:
ArrayBasedTrie
-- Fastest implementation by far
-- Consumes the most amount of memory
DictionaryBasedTrie
-- Significantly slower than ArrayBasedTrie
-- Significantly faster than LinearCountingAnalyzer
-- Consumes significantly less memory than ArrayBasedTrie
-- This is the default
LinearCountingAnalyzer
-- A simple linear Algorithm, iterates through a list
-- Significantly slower than all other implementations
-- Consumes the least memory
Default header matching algorithm is now the DictionaryBasedTrie, constructed from MimeType.Types, and can be manipulated via the static MimeAnalzyer.PrimaryAnalzyer property. The Linear Algorithm now has the same behavior as tries. It will try to find the highest completely matching definition.
  • Loading branch information
clarkis117 authored Mar 17, 2018
1 parent de3faaf commit 06bc12d
Show file tree
Hide file tree
Showing 105 changed files with 3,009 additions and 1,292 deletions.
7 changes: 5 additions & 2 deletions Mime-Detective.sln
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.26430.13
VisualStudioVersion = 15.0.27130.2027
MinimumVisualStudioVersion = 10.0.40219.1
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{098944FB-C1C9-48BE-AA37-CD3C5C336A84}"
ProjectSection(SolutionItems) = preProject
Expand All @@ -22,7 +22,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "test", "test", "{A50202E7-0
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mime-Detective", "src\Mime-Detective\Mime-Detective.csproj", "{40608F32-BF6E-4DE4-85AE-EF71C69EF18D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Mime-Detective.Benchmarks", "test\Mime-Detective.Benchmarks\Mime-Detective.Benchmarks.csproj", "{7F622459-3B42-4393-A08D-BEB47432628A}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Mime-Detective.Benchmarks", "test\Mime-Detective.Benchmarks\Mime-Detective.Benchmarks.csproj", "{7F622459-3B42-4393-A08D-BEB47432628A}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -51,4 +51,7 @@ Global
{40608F32-BF6E-4DE4-85AE-EF71C69EF18D} = {17C4E0DE-B863-4A81-B755-62E663D041F1}
{7F622459-3B42-4393-A08D-BEB47432628A} = {A50202E7-0386-4EB3-B09C-00EFCAE360F7}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {D55424FE-E903-489F-8709-BC2A5802AB45}
EndGlobalSection
EndGlobal
187 changes: 187 additions & 0 deletions src/Mime-Detective/Analyzers/ArrayBasedTrie.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Linq;

namespace MimeDetective.Analyzers
{
public sealed class ArrayBasedTrie : IFileAnalyzer
{
public const int NullStandInValue = 256;
public const int MaxNodeSize = 257;

private List<OffsetNode> Nodes = new List<OffsetNode>(10);

/// <summary>
/// Constructs an empty ArrayBasedTrie, <see cref="Insert(FileType)"/> to add definitions
/// </summary>
public ArrayBasedTrie()
{
}

/// <summary>
/// Constructs an ArrayBasedTrie from an Enumerable of FileTypes, <see cref="Insert(FileType)"/> to add more definitions
/// </summary>
/// <param name="types"></param>
public ArrayBasedTrie(IEnumerable<FileType> types)
{
if (types is null)
throw new ArgumentNullException(nameof(types));

foreach (var type in types)
{
if ((object)type != null)
Insert(type);
}

Nodes = Nodes.OrderBy(x => x.Offset).ToList();
}

public FileType Search(in ReadResult readResult)
{
FileType match = null;

//iterate through offset nodes
for (int offsetNodeIndex = 0; offsetNodeIndex < Nodes.Count; offsetNodeIndex++)
{
//get offset node
var offsetNode = Nodes[offsetNodeIndex];

int i = offsetNode.Offset;
byte value = readResult.Array[i];

var node = offsetNode.Children[value];

if (node is null)
{
node = offsetNode.Children[NullStandInValue];

if (node is null)
break;
}

if ((object)node.Record != null)
match = node.Record;

i++;

//iterate through the current trie
for (; i < readResult.ReadLength; i++)
{
value = readResult.Array[i];

var prevNode = node;
node = node.Children[value];

if (node is null)
{
node = prevNode.Children[NullStandInValue];

if (node is null)
break;
}

if ((object)node.Record != null)
match = node.Record;
}

if ((object)match != null)
break;
}

return match;
}

public void Insert(FileType type)
{
if (type is null)
throw new ArgumentNullException(nameof(type));

OffsetNode match = null;

foreach (var offsetNode in Nodes)
{
if (offsetNode.Offset == type.HeaderOffset)
{
match = offsetNode;
break;
}
}

if (match is null)
{
match = new OffsetNode(type.HeaderOffset);
Nodes.Add(match);
}

match.Insert(type);
}

private sealed class OffsetNode
{
public readonly ushort Offset;
public readonly Node[] Children;

public OffsetNode(ushort offset)
{
if (offset > (MimeTypes.MaxHeaderSize - 1))
throw new ArgumentException("Offset cannot be greater than MaxHeaderSize - 1");

Offset = offset;
Children = new Node[MaxNodeSize];
}

public void Insert(FileType type)
{
int i = 0;
byte? value = type.Header[i];
int arrayPos = value ?? NullStandInValue;

var node = Children[arrayPos];

if (node is null)
{
node = new Node(value);
Children[arrayPos] = node;
}

i++;

for (; i < type.Header.Length; i++)
{
value = type.Header[i];
arrayPos = value ?? NullStandInValue;
var prevNode = node;
node = node.Children[arrayPos];

if (node is null)
{
var newNode = new Node(value);

if (i == type.Header.Length - 1)
newNode.Record = type;

node = prevNode.Children[arrayPos] = newNode;
}
}
}
}

private sealed class Node
{
public readonly Node[] Children;

//if complete node then this not null
public FileType Record;

public readonly byte? Value;

public Node(byte? value)
{
Value = value;
Children = new Node[MaxNodeSize];
Record = null;
}
}
}
}
124 changes: 124 additions & 0 deletions src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace MimeDetective.Analyzers
{
public sealed class DictionaryBasedTrie : IFileAnalyzer
{
private const ushort NullStandInValue = 256;

//root dictionary contains the nodes with offset values
private Dictionary<ushort, Node> Nodes { get; } = new Dictionary<ushort, Node>();

/// <summary>
/// Constructs an empty DictionaryBasedTrie
/// </summary>
public DictionaryBasedTrie()
{

}

/// <summary>
/// Constructs a DictionaryBasedTrie from an Enumerable of FileTypes
/// </summary>
/// <param name="types"></param>
public DictionaryBasedTrie(IEnumerable<FileType> types)
{
if (types is null)
throw new ArgumentNullException(nameof(types));

foreach (var type in types)
{
Insert(type);
}
}

public FileType Search(in ReadResult readResult)
{
FileType match = null;
var enumerator = Nodes.GetEnumerator();

while (match is null && enumerator.MoveNext())
{
Node node = enumerator.Current.Value;

for (int i = node.Value; i < readResult.ReadLength; i++)
{
Node prevNode = node;

if (!prevNode.Children.TryGetValue(readResult.Array[i], out node)
&& !prevNode.Children.TryGetValue(NullStandInValue, out node))
break;

if ((object)node.Record != null)
match = node.Record;
}

if ((object)match != null)
break;
}

return match;
}

public void Insert(FileType type)
{
if (type is null)
throw new ArgumentNullException(nameof(type));

if (!Nodes.TryGetValue(type.HeaderOffset, out var offsetNode))
{
offsetNode = new Node(type.HeaderOffset);
Nodes.Add(type.HeaderOffset, offsetNode);
}

offsetNode.Insert(type);
}

private sealed class Node
{
public readonly Dictionary<ushort, Node> Children = new Dictionary<ushort, Node>();

//if complete node then this not null
public FileType Record;

public readonly ushort Value;

public Node(ushort value)
{
Value = value;
}

public void Insert(FileType type)
{
int i = 0;
ushort value = type.Header[i] ?? NullStandInValue;

if (!Children.TryGetValue(value, out Node node))
{
node = new Node(value);
Children.Add(value, node);
}

i++;

for (; i < type.Header.Length; i++)
{
value = type.Header[i] ?? NullStandInValue;

if (!node.Children.ContainsKey(value))
{
Node newNode = new Node(value);
node.Children.Add(value, newNode);
}

node = node.Children[value];
}

node.Record = type;
}
}
}
}

17 changes: 17 additions & 0 deletions src/Mime-Detective/Analyzers/IFileAnalyzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Runtime.CompilerServices;

namespace MimeDetective.Analyzers
{
public interface IReadOnlyFileAnalyzer
{
FileType Search(in ReadResult readResult);
}

public interface IFileAnalyzer : IReadOnlyFileAnalyzer
{
void Insert(FileType fileType);
}
}
Loading

0 comments on commit 06bc12d

Please sign in to comment.