Improve own database.

This commit is contained in:
H. Utku Maden 2024-08-26 21:23:16 +03:00
parent 2f964dfe99
commit ccf46a765c
6 changed files with 235 additions and 69 deletions

@ -1,58 +0,0 @@
using System.Collections.Generic;
namespace ReMime.ContentResolvers.Image
{
public static class ImageMagicValues
{
private static readonly MediaType Tiff = new MediaType("image/tiff", new string[] { "nif", "tif", "tiff"});
private static readonly MediaType Jpeg = new MediaType("image/jpeg", new string[] { "jpg", "jpeg" });
public static readonly IReadOnlyList<MagicValueMediaType> List = new List<MagicValueMediaType>() {
new MagicValueMediaType(new MagicValue("BM"), new MediaType("image/bmp")),
new MagicValueMediaType(new MagicValue("GIF8"), new MediaType("image/gif")),
new MagicValueMediaType(new MagicValue("IIN1"), Tiff),
new MagicValueMediaType(new MagicValue(new byte[] { 0x4d, 0x4d, 0x00, 0x2a }), Tiff),
new MagicValueMediaType(new MagicValue(new byte[] { 0x49, 0x49, 0x2a, 0x00 }), Tiff),
new MagicValueMediaType(new MagicValue(new byte[] { 0x89, 0x50, 0x4e, 0x47 }), new MediaType("image/png")),
/* Yes this is how we are doing JPEG, I don't want to modify my thing to allow for magic values to be defined in terms of bits. */
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe0 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe1 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe2 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe3 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe4 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe5 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe6 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe7 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe8 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xe9 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xea }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xeb }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xec }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xed }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xee }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xef }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf0 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf1 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf2 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf3 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf4 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf5 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf6 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf7 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf8 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xf9 }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfa }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfb }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfc }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfd }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xfe }), Jpeg),
new MagicValueMediaType(new MagicValue(new byte[] { 0xff, 0xd8, 0xff, 0xff }), Jpeg),
}.AsReadOnly();
public static void AddToMagicResolver(MagicContentResolver resolver)
{
resolver.AddMagicValues(List);
}
}
}

@ -2,10 +2,11 @@ using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
namespace ReMime.ContentResolvers
{
public record MagicValueMediaType(MagicValue Magic, MediaType MediaType);
public record MagicValueMediaType(MediaType MediaType, MagicValue[] MagicValues, string[] Extensions);
public class MagicContentResolver : IMediaContentResolver
{
@ -21,14 +22,25 @@ namespace ReMime.ContentResolvers
public MagicContentResolver()
{
Image.ImageMagicValues.AddToMagicResolver(this);
List<MagicValueDatabaseEntry> entries;
using (Stream str = typeof(MagicContentResolver).Assembly.GetManifestResourceStream("ReMime.ContentResolvers.database.jsonc")!)
{
entries = MagicValueDatabaseEntry.GetEntries(str);
}
AddMagicValues(entries.Select(x => (MagicValueMediaType)x));
}
public IReadOnlyCollection<MediaType> MediaTypes => _mediaTypes.AsReadOnly();
public void AddMagicValue(MagicValueMediaType value)
{
_maxBytes = Math.Max(_maxBytes, value.Magic.Value.Length);
if (value.MagicValues.Length != 0)
{
_maxBytes = Math.Max(_maxBytes, value.MagicValues.Select(x => x.Value.Length).Max());
}
_mediaTypes.Add(value.MediaType);
_tree.Add(value);
@ -83,17 +95,14 @@ namespace ReMime.ContentResolvers
{
get
{
if (bytes.Length == 0)
if (bytes.Length == 0 || Children == null)
return Node;
if (Children == null)
return null;
byte b = bytes[0];
if (!Children.TryGetValue(b, out Tree? subtree))
{
return null;
return Node;
}
return subtree[bytes.Slice(1)];
@ -124,8 +133,10 @@ namespace ReMime.ContentResolvers
public void Add(MagicValueMediaType magic)
{
ReadOnlySpan<byte> bytes = magic.Magic.Value;
AddInternal(magic, bytes);
foreach (var entry in magic.MagicValues)
{
AddInternal(magic, entry.Value);
}
}
}
}

@ -1,4 +1,6 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text;
namespace ReMime.ContentResolvers
@ -6,7 +8,7 @@ namespace ReMime.ContentResolvers
/// <summary>
/// A magic value to identify file types.
/// </summary>
/// <param name="Value">The byte arary that makes up the magic value.</param>
/// <param name="Value">The byte array that makes up the magic value.</param>
public record struct MagicValue(byte[] Value)
{
public MagicValue(int value) : this(BitConverter.GetBytes(value)) { }
@ -46,5 +48,107 @@ namespace ReMime.ContentResolvers
return hash;
}
public static bool TryParse(ReadOnlySpan<char> magic, [NotNullWhen(true)] out MagicValue? value)
{
List<byte> bytes = new List<byte>();
StringBuilder builder = new StringBuilder();
value = null;
for (int i = 0; i < magic.Length; i++)
{
char chr = magic[i];
char chr2;
switch (chr)
{
case '\'':
builder.Clear();
int j;
for (j = i + 1; j < magic.Length; j++)
{
chr = magic[j];
if (chr == '\'')
{
bytes.AddRange(Encoding.ASCII.GetBytes(builder.ToString()));
break;
}
else if (chr == '\\')
{
if (j+1 >= magic.Length)
return false;
chr2 = magic[j++];
builder.Append(chr2 switch {
'n' => '\n',
'r' => '\r',
'a' => '\a',
'b' => '\b',
'f' => 'f',
'v' => '\v',
'?' => '?',
'\\' => '\\',
'\'' => '\'',
'\"' => '\"',
_ => '\0'
});
}
else
{
builder.Append(chr);
}
}
if (j == magic.Length)
{
// ASCII string overrun.
return false;
}
i = j;
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9': case 'A': case 'B':
case 'C': case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c': case 'd':
case 'e': case 'f':
// Misaligned hex string.
if (i+1 >= magic.Length)
return false;
chr2 = magic[++i];
bytes.Add((byte)(AsciiToInt(chr) << 4 | AsciiToInt(chr2)));
break;
case '\n': case '\f': case '\r': case '\t':
case ' ':
// generic whitespace.
continue;
}
}
// No bytes to match.
if (bytes.Count == 0)
return false;
value = new MagicValue(bytes.ToArray());
return true;
static int AsciiToInt(char a)
{
if (a >= '0' && a <= '9')
return a - '0';
else if (a >= 'A' && a <= 'F')
return a - 'A' + 10;
else if (a >= 'a' && a <= 'f')
return a - 'a' + 10;
else
return -1;
}
}
}
}

@ -0,0 +1,43 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace ReMime.ContentResolvers
{
[JsonSerializable(typeof(MagicValueDatabaseEntry))]
public class MagicValueDatabaseEntry
{
[JsonPropertyName("type")]
public string Type { get; set; } = string.Empty;
[JsonPropertyName("magic")]
public List<string> Magic { get; set; } = new List<string>();
[JsonPropertyName("extensions")]
public List<string> Extensions { get; set; } = new List<string>();
public static List<MagicValueDatabaseEntry> GetEntries(Stream str)
{
return JsonSerializer.Deserialize<List<MagicValueDatabaseEntry>>(str, new JsonSerializerOptions()
{
AllowTrailingCommas = true,
ReadCommentHandling = JsonCommentHandling.Skip
}) ?? throw new Exception();
}
public static explicit operator MagicValueMediaType(MagicValueDatabaseEntry entry)
{
return new MagicValueMediaType(
new MediaType(entry.Type),
entry.Magic.Select(x => (MagicValue.TryParse(x, out var value), value))
.Where(x => x.Item1)
.Select(x => (MagicValue)x.value!)
.ToArray(),
entry.Extensions.ToArray()
);
}
}
}

@ -0,0 +1,62 @@
/**
* ReMime Magic Value & File Extension Database
* ---------------------------------------------
* This is a self compiled list of magic values, file extensions and
* their mime-types. Please contribute common file formats if you come
* across them.
*
* This file is only for common file formats that do not need any extra
* detection logic. Do not add major container formats like ZIP or RIFF
* into this list.
*/
[
// #region application/*
{ "type": "application/vnd.rar", "magic": [ "'Rar!'1a07" ], "extensions": [ "rar" ] },
{ "type": "application/postscript", "magic": [ "'%!PS'"], "extensions": [ "ps", "eps", "epsf" ] },
{ "type": "application/pdf", "magic": ["'%PDF-'"], "extensions": [ "pdf" ] },
// #endregion
// #region audio/*
{ "type": "audio/mp3", "magic": [ "fffb", "fff3", "fff2", "'ID3'" ], "extensions": [ "mp3" ] },
{ "type": "audio/flac", "magic": [ "'fLaC'" ], "extensions": [ "flac" ] },
{ "type": "audio/midi", "magic": [ "'MThd'" ], "extensions": [ "mid", "midi" ] },
// #endregion
// #region font/*
{ "type": "font/woff", "magic": [ "'wOFF'" ], "extensions": [ "woff" ] },
{ "type": "font/woff2", "magic": [ "'wOF2'" ], "extensions": [ "woff2" ] },
{ "type": "font/ttf", "magic": [ "0001000000" ], "extensions": [ "ttf", "tte", "dfont" ] },
{ "type": "font/otf", "magic": [ "'OTTO'" ], "extensions": [ "otf" ]},
// #endregion
// #region image/*
{ "type": "image/bmp", "magic": [ "'BM'" ], "extensions": [ "bmp" ] },
{ "type": "image/gif", "magic": [ "'GIF8'" ], "extensions": [ "gif" ] },
{ "type": "image/tiff", "magic": [ "'IIN1'", "4d4d002a", "49492a00"], "extensions": [ "tiff", "tif", "nif" ] },
{ "type": "image/png", "magic": [ "89'PNG'" ], "extensions": [ "png" ] },
{ "type": "image/emf", "magic": [ "01000000" ], "extensions": [ "emf" ] },
{ "type": "image/wmf", "magic": [ "d7cdc69a" ], "extensions": [ "wmf" ] },
{ "type": "image/x-ico", "magic": [ "00000100" ], "extensions": [ "ico" ] },
{ "type": "image/x-qoi", "magic": [ "'qoif'" ], "extensions": [ "qoi" ]},
// The JPEG standard allows any magic value from ffd8ffe0 to ffd8ffff.
{
"type": "image/jpeg",
"magic": [
"ffd8ffe0", "ffd8ffe1", "ffd8ffe2", "ffd8ffe3",
"ffd8ffe4", "ffd8ffe5", "ffd8ffe6", "ffd8ffe7",
"ffd8ffe8", "ffd8ffe9", "ffd8ffea", "ffd8ffeb",
"ffd8ffec", "ffd8ffed", "ffd8ffee", "ffd8ffef",
"ffd8fff0", "ffd8fff1", "ffd8fff2", "ffd8fff3",
"ffd8fff4", "ffd8fff5", "ffd8fff6", "ffd8fff7",
"ffd8fff8", "ffd8fff9", "ffd8fffa", "ffd8fffb",
"ffd8fffc", "ffd8fffd", "ffd8fffe", "ffd8ffff"
],
"extensions": [ "jpeg", "jpg"]
},
// #endregion
// #region text/*
{ "type": "text/rtf", "magic": [ "'{\\rtf1'" ], "extensions": [ "rtf" ]}
// #endregion
]

@ -6,4 +6,8 @@
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<EmbeddedResource Include="ContentResolvers/database.jsonc" />
</ItemGroup>
</Project>