Add first magic value detector.

This commit is contained in:
H. Utku Maden 2024-05-26 11:13:44 +03:00
parent 1060d7d73c
commit e6f2a74819
4 changed files with 234 additions and 0 deletions

@ -0,0 +1,58 @@
using System.Collections.Generic;
namespace ReMime.ContentResolvers.Image
{
public static class ImageMagicValues
{
private static readonly MediaType Tiff = new MediaType("image/tiff", new string[] { "nif", "tif", "tiff"});
private static readonly MediaType Jpeg = new MediaType("image/jpeg", new string[] { "jpg", "jpeg" });
public static readonly IReadOnlyList<MagicValueMediaType> List = new List<MagicValueMediaType>() {
new MagicValueMediaType(new MagicValue("BM"), new MediaType("image/bmp")),
new MagicValueMediaType(new MagicValue("GIF8"), new MediaType("image/gif")),
new MagicValueMediaType(new MagicValue("IIN1"), Tiff),
new MagicValueMediaType(new MagicValue(new byte[] { 0x4d, 0x4d, 0x00, 0x2a }), Tiff),
new MagicValueMediaType(new MagicValue(new byte[] { 0x49, 0x49, 0x2a, 0x00 }), Tiff),
new MagicValueMediaType(new MagicValue(new byte[] { 0x89, 0x50, 0x4e, 0x47 }), new MediaType("image/png")),
/* Yes this is how we are doing JPEG, I don't want to modify my thing to allow for magic values to be defined in terms of bits. */
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe0 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe1 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe2 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe3 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe4 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe5 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe6 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe7 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe8 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe9 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xea }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xeb }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xec }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xed }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xee }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xef }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf0 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf1 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf2 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf3 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf4 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf5 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf6 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf7 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf8 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf9 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfa }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfb }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfc }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfd }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfe }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xff }), Jpeg),
}.AsReadOnly();
public static void AddToMagicResolver(MagicContentResolver resolver)
{
resolver.AddMagicValues(List);
}
}
}

@ -0,0 +1,132 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
namespace ReMime.ContentResolvers
{
public record MagicValueMediaType(MagicValue Magic, MediaType MediaType);
public class MagicContentResolver : IMediaContentResolver
{
private readonly List<MediaType> _mediaTypes = new List<MediaType>();
private readonly Dictionary<string, MediaType> _extensions = new Dictionary<string, MediaType>();
private readonly Tree _tree = new Tree();
private int _maxBytes = 0;
public MagicContentResolver(IEnumerable<MagicValueMediaType> values) : this()
{
AddMagicValues(values);
}
public MagicContentResolver()
{
Image.ImageMagicValues.AddToMagicResolver(this);
}
public IReadOnlyCollection<MediaType> MediaTypes => _mediaTypes.AsReadOnly();
public void AddMagicValue(MagicValueMediaType value)
{
_maxBytes = Math.Max(_maxBytes, value.Magic.Value.Length);
_mediaTypes.Add(value.MediaType);
_tree.Add(value);
foreach (string extension in value.MediaType.Extensions)
{
_extensions[extension] = value.MediaType;
}
}
public void AddMagicValues(IEnumerable<MagicValueMediaType> values)
{
foreach (MagicValueMediaType value in values)
{
AddMagicValue(value);
}
}
public bool TryResolve(Stream str, [NotNullWhen(true)] out MediaType? mediaType)
{
Span<byte> bytes = stackalloc byte[_maxBytes];
str.Read(bytes);
return TryResolve(bytes, out mediaType);
}
public bool TryResolve(ReadOnlySpan<byte> content, [NotNullWhen(true)] out MediaType? mediaType)
{
MagicValueMediaType? type = _tree[content];
if (type == null)
{
mediaType = null;
return false;
}
else
{
mediaType = type.MediaType;
return true;
}
}
public bool TryResolve(string extension, out MediaType? mediaType)
{
return _extensions.TryGetValue(extension, out mediaType);
}
private class Tree
{
public MagicValueMediaType? Node { get; private set; }
public Dictionary<byte, Tree>? Children { get; private set; }
public MagicValueMediaType? this[ReadOnlySpan<byte> bytes]
{
get
{
if (bytes.Length == 0)
return Node;
if (Children == null)
return null;
byte b = bytes[0];
if (!Children.TryGetValue(b, out Tree? subtree))
{
return null;
}
return subtree[bytes.Slice(1)];
}
}
private void AddInternal(MagicValueMediaType magic, ReadOnlySpan<byte> bytes)
{
if (bytes.Length == 0)
{
Node = magic;
return;
}
if (Children == null)
{
Children = new Dictionary<byte, Tree>();
}
if (!Children.TryGetValue(bytes[0], out Tree? tree))
{
tree = new Tree();
Children[bytes[0]] = tree;
}
tree.AddInternal(magic, bytes.Slice(1));
}
public void Add(MagicValueMediaType magic)
{
ReadOnlySpan<byte> bytes = magic.Magic.Value;
AddInternal(magic, bytes);
}
}
}
}

@ -0,0 +1,41 @@
using System;
using System.Text;
namespace ReMime.ContentResolvers
{
public record struct MagicValue(byte[] Value)
{
public MagicValue(int value) : this(BitConverter.GetBytes(value)) { }
public MagicValue(short value) : this(BitConverter.GetBytes(value)) { }
public MagicValue(string value, Encoding? encoding = null)
: this((encoding ?? Encoding.ASCII).GetBytes(value)) { }
public MagicValue(ReadOnlySpan<byte> bytes) : this(bytes.ToArray()) { }
public bool Matches(ReadOnlySpan<byte> haystack)
{
for (int i = 0; i < haystack.Length && i < Value.Length; i++)
{
if (haystack[i] != Value[i])
return false;
}
return true;
}
public override int GetHashCode()
{
// Uses the FVN-1A algorithm in 32-bit mode.
const int PRIME = 0x01000193;
const int BASIS = unchecked((int)0x811c9dc5);
int hash = BASIS;
for (int i = 0; i < Value.Length; i++)
{
hash ^= Value[i];
hash *= PRIME;
}
return hash;
}
}
}

@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.Collections.Immutable; using System.Collections.Immutable;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using ReMime.ContentResolvers;
using ReMime.Platform; using ReMime.Platform;
namespace ReMime namespace ReMime
@ -40,6 +41,8 @@ namespace ReMime
static MediaTypeResolver() static MediaTypeResolver()
{ {
AddResolver(new MagicContentResolver(), 9998);
if (OperatingSystem.IsWindows()) if (OperatingSystem.IsWindows())
{ {
AddResolver(new Win32MediaTypeResolver()); AddResolver(new Win32MediaTypeResolver());