Compare commits

..

13 Commits

Author SHA1 Message Date
428c94d0af Fix URL. 2024-08-30 19:20:19 +03:00
7fc92d1e66 Quick update. 2024-08-30 19:17:44 +03:00
6d192adce7 Add a new README for refile. 2024-08-30 17:33:45 +03:00
75fe42a68d Create the refile package as tool. 2024-08-30 17:30:46 +03:00
868bc9a3cf Add package description. 2024-08-30 17:13:54 +03:00
dd17462e9e Finish NuGet package. 2024-08-30 17:11:40 +03:00
63b85ecfad Rename "ReMime.Cli" to ReMime.ReFile. 2024-08-30 17:01:33 +03:00
bf953ceb32 Update readme and add license. 2024-08-30 15:38:14 +03:00
f9792e2bcf Add RIFF container support. 2024-08-30 14:58:09 +03:00
ccf46a765c Improve own database. 2024-08-26 21:23:16 +03:00
2f964dfe99 Documentation pass. 2024-08-25 20:29:47 +03:00
e6f2a74819 Add first magic value detector. 2024-05-26 11:13:44 +03:00
1060d7d73c Fix silly if statement. 2024-05-26 11:13:14 +03:00
24 changed files with 905 additions and 106 deletions

21
LICENSE.md Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 ReFuel & H. Utku Maden
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -12,11 +12,12 @@ Platform Caveats
* On Linux, not all `/etc/mime.types` syntax is supported.
* None of this was written with MacOS in mind. But maybe it'll work?
Refer to `ReMime.Cli` as an example of how to use the library. Refer to in line
Refer to `ReMime.ReFile` as an example of how to use the library. Refer to in line
documentation and the given default resolvers as an example resolver to
implementations.
Contributing
------------
Feel free to contribute your own file type resolvers and bug fixes. The more
file types that can be detected accurately, the better.
file types that can be detected accurately, the better. Currently the
repository is available at https://git.mixedup.dev/ReFuel/ReMime. Accepting [email patches](<mailto:sht7ntgni@mozmail.com>).

View File

@@ -1,17 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<ItemGroup>
<ProjectReference Include="..\ReMime\ReMime.csproj" />
</ItemGroup>
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>disable</ImplicitUsings>
<Nullable>enable</Nullable>
<SelfContained>true</SelfContained>
<PublishSingleFile>true</PublishSingleFile>
<AssemblyName>refile</AssemblyName>
</PropertyGroup>
</Project>

View File

@@ -2,18 +2,20 @@
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using Microsoft.VisualBasic;
namespace ReMime.Cli
{
public static class Program
{
private const string USAGE = "remime [-r] file/directory/-...\n" +
"remime --help for more help.";
private const string USAGE = "refile [-r] file/directory/-...\n" +
"refile --help for more help.";
private const string HELP =
"ReMime Command Line Tool - Determine file Media Type\n" +
"\n" +
" remime [-r] file/directory/-...\n" +
" refile [-r] file/directory/-...\n" +
"\n" +
" file infer a file\n"+
" directory infer files in directory. Requires -r\n"+
@@ -47,7 +49,10 @@ namespace ReMime.Cli
[DoesNotReturn]
private static void ListTypes()
{
foreach (MediaType type in MediaTypeResolver.KnownTypes)
var list = MediaTypeResolver.KnownTypes.ToList();
list.Sort();
foreach (MediaType type in list)
{
Console.WriteLine("{0}\t{1}", type.FullTypeNoParameters, string.Join(' ', type.Extensions));
}

41
ReMime.ReFile/README.md Normal file
View File

@@ -0,0 +1,41 @@
ReFile - Simple Tool demonstrating ReMime
=========================================
```
ReMime Command Line Tool - Determine file Media Type
refile [-r] file/directory/-...
file infer a file
directory infer files in directory. Requires -r
- infer from standard input.
-r search files and folders recursively.
-a include hidden files.
-v verbose mode, use full paths.
--list list known mime types. Will ignore files.
--help show this help text.
```
ReMime - Simple Media Type Resolution
=====================================
ReMime is a very humble library that can identify IANA media types of file
from their file extension and its content. While being fully extensible
with your own resolvers, ReMime will also refer to your operating system's
file type database when resolving files.
Platform Caveats
----------------
* On Windows, the default resolver assumes your application has read access to
the registry.
* On Linux, not all `/etc/mime.types` syntax is supported.
* None of this was written with MacOS in mind. But maybe it'll work?
Refer to `ReMime.ReFile` as an example of how to use the library. Refer to in line
documentation and the given default resolvers as an example resolver to
implementations.
Contributing
------------
Feel free to contribute your own file type resolvers and bug fixes. The more
file types that can be detected accurately, the better. Currently the
repository is available at https://git.mixedup.dev/ReFuel/ReMime. Accepting [email patches](<mailto:sht7ntgni@mozmail.com>).

View File

@@ -0,0 +1,43 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>disable</ImplicitUsings>
<Nullable>enable</Nullable>
<AssemblyName>refile</AssemblyName>
<!--NuGet-->
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageId>ReFuel.ReMime.ReFile</PackageId>
<Version>0.1.1</Version>
<Authors>H. Utku Maden</Authors>
<Company>ReFuel</Company>
<PackageReadmeFile>README.md</PackageReadmeFile>
<PackageLicenseFile>LICENSE.md</PackageLicenseFile>
<PackageIcon>images\icon.png</PackageIcon>
<PackageProjectUrl>https://refuel.mixedup.dev/docs/ReFile.html</PackageProjectUrl>
<RepositoryUrl>https://git.mixedup.dev/ReFuel/ReMime</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageTags>detection; detector; type; file; mime; mime-type; media; media-type; analysis; tool; refile</PackageTags>
<PackageDescription>
ReMime is a very humble library that can identify IANA media types of file
from their file extension and its content. While being fully extensible
with your own resolvers, ReMime will also refer to your operating system's
file type database when resolving files.
This is an example project a tool that will resolve the media types of the
given list of files and directories.
</PackageDescription>
<PackageType>DotnetTool</PackageType>
<PackAsTool>true</PackAsTool>
<ToolCommandName>refile</ToolCommandName>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="ReFuel.ReMime" Version="0.1.1" />
<Content Include="README.md" Pack="true" PackagePath="/" />
<Content Include="../LICENSE.md" Pack="true" PackagePath="/" />
<None Include="../remime_favicon.png" Pack="true" PackagePath="images\icon.png"/>
</ItemGroup>
</Project>

View File

@@ -3,21 +3,9 @@ using ReMime.Platform;
namespace ReMime.Tests
{
public abstract class MediaTypesByExtension<T> where T : IMediaTypeResolver, new()
[TestClass]
public class MediaTypesByExtension
{
private T CIT;
protected MediaTypesByExtension()
{
Unsafe.SkipInit(out CIT);
}
[TestInitialize]
public virtual void Initialize()
{
CIT = new T();
}
readonly (string extension, string type)[] ExampleMimeTypes = new (string, string)[] {
("png", "image/png"),
("gif", "image/gif"),
@@ -34,44 +22,10 @@ namespace ReMime.Tests
{
foreach (var(ext, type) in ExampleMimeTypes)
{
Assert.IsTrue(CIT.TryResolve(ext, out MediaType? result));
Assert.IsTrue(MediaTypeResolver.TryResolve(ext, out MediaType? result));
Assert.AreEqual(result!.FullType, type);
Assert.IsTrue(result.Extensions.Contains(ext));
}
}
}
[TestClass]
public class UnixMediaTypes : MediaTypesByExtension<UnixMediaTypeResolver>
{
[TestInitialize]
public override void Initialize()
{
if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsFreeBSD())
{
base.Initialize();
}
else
{
Assert.Inconclusive("Cannot test this in this platform.");
}
}
}
[TestClass]
public class Win32MediaTypes : MediaTypesByExtension<Win32MediaTypeResolver>
{
[TestInitialize]
public override void Initialize()
{
if (OperatingSystem.IsWindows())
{
base.Initialize();
}
else
{
Assert.Inconclusive("Cannot test this in this platform.");
}
}
}
}

View File

@@ -7,16 +7,13 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReMime", "ReMime\ReMime.csp
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReMime.Tests", "ReMime.Tests\ReMime.Tests.csproj", "{FEEB5BAD-3B18-4A88-A212-32EC9DA93BDE}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReMime.Cli", "ReMime.Cli\ReMime.Cli.csproj", "{51AB44A2-D4EB-4CC8-BE4E-EF1912350629}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReMime.ReFile", "ReMime.ReFile\ReMime.ReFile.csproj", "{BC2655E6-88CF-47EB-AE1C-7B74325B7FEC}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{05FAB3CF-78AF-4D34-97D1-C3AB24D4C59F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{05FAB3CF-78AF-4D34-97D1-C3AB24D4C59F}.Debug|Any CPU.Build.0 = Debug|Any CPU
@@ -26,9 +23,12 @@ Global
{FEEB5BAD-3B18-4A88-A212-32EC9DA93BDE}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FEEB5BAD-3B18-4A88-A212-32EC9DA93BDE}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FEEB5BAD-3B18-4A88-A212-32EC9DA93BDE}.Release|Any CPU.Build.0 = Release|Any CPU
{51AB44A2-D4EB-4CC8-BE4E-EF1912350629}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{51AB44A2-D4EB-4CC8-BE4E-EF1912350629}.Debug|Any CPU.Build.0 = Debug|Any CPU
{51AB44A2-D4EB-4CC8-BE4E-EF1912350629}.Release|Any CPU.ActiveCfg = Release|Any CPU
{51AB44A2-D4EB-4CC8-BE4E-EF1912350629}.Release|Any CPU.Build.0 = Release|Any CPU
{BC2655E6-88CF-47EB-AE1C-7B74325B7FEC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{BC2655E6-88CF-47EB-AE1C-7B74325B7FEC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{BC2655E6-88CF-47EB-AE1C-7B74325B7FEC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{BC2655E6-88CF-47EB-AE1C-7B74325B7FEC}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
EndGlobal

View File

@@ -0,0 +1,18 @@
using System.Collections.Generic;
namespace ReMime.ContentResolvers
{
public interface IMagicValueResolver
{
void AddMagicValue(MagicValueMediaType value);
void AddMagicValues(IEnumerable<MagicValueMediaType> values)
{
foreach (MagicValueMediaType value in values)
{
AddMagicValue(value);
}
}
}
}

View File

@@ -0,0 +1,86 @@
using System;
namespace ReMime.ContentResolvers
{
/* You've heard of regular expressions, now prepare for magical expressions :sparkles:. */
/// <summary>
/// Bit pattern detecting state machine inspired by text regular expressions.
/// </summary>
public class MagEx
{
/**
* 0 1 2 3 4 5 6 7 8 9 a b c d e f 4-bit patterns to match.
* l h Single bit pattern.
* * Any bit pattern.
* ? Any 4-bit pattern.
* 'pattern' ASCII pattern with no terminator. Implies @.
* @ Align to 8-bits.
* % Align to 4-bits.
*/
public string Pattern { get; }
public MagEx(string pattern)
{
Pattern = pattern;
}
public bool Match(ReadOnlySpan<byte> bytes)
{
byte current;
int needle;
int haystack;
int bits;
int pi = 0;
ReadOnlySpan<char> ascii = ReadOnlySpan<char>.Empty;
for (int i = 0; i < bytes.Length; i++)
{
current = bytes[i];
bits = 8;
while (bits > 0)
{
char pat = Pattern[pi];
switch (pat)
{
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9': case 'a': case 'A':
case 'b': case 'B': case 'c': case 'C':
case 'd': case 'D': case 'e': case 'E':
case 'f': case 'F':
haystack = current & 0xF;
current >>= 4;
bits -= 4;
if (pat >= '0' && pat <= '9')
{
needle = pat - '0';
}
else if (pat >= 'a' && pat <= 'f')
{
needle = pat - 'a';
}
else
{
needle = pat - 'A';
}
if (haystack == needle)
{
pi++;
}
else
{
}
break;
}
}
}
return false;
}
}
}

View File

@@ -0,0 +1,143 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
namespace ReMime.ContentResolvers
{
public record MagicValueMediaType(MediaType MediaType, MagicValue[] MagicValues)
{
public IReadOnlyCollection<string> Extensions { get; } = MediaType.Extensions;
}
public class MagicContentResolver : IMediaContentResolver, IMagicValueResolver
{
private readonly List<MediaType> _mediaTypes = new List<MediaType>();
private readonly Dictionary<string, MediaType> _extensions = new Dictionary<string, MediaType>();
private readonly Tree _tree = new Tree();
private int _maxBytes = 0;
private MagicContentResolver()
{
IEnumerable<MagicValueMediaType> entries;
using (Stream str = typeof(MagicContentResolver).Assembly.GetManifestResourceStream("ReMime.ContentResolvers.database.jsonc")!)
{
entries = MagicValueDatabaseEntry.GetEntries(str);
}
AddMagicValues(entries);
}
public IReadOnlyCollection<MediaType> MediaTypes => _mediaTypes.AsReadOnly();
public void AddMagicValue(MagicValueMediaType value)
{
if (value.MagicValues.Length != 0)
{
_maxBytes = Math.Max(_maxBytes, value.MagicValues.Select(x => x.Value.Length).Max());
}
_mediaTypes.Add(value.MediaType);
_tree.Add(value);
foreach (string extension in value.MediaType.Extensions)
{
_extensions[extension] = value.MediaType;
}
}
public void AddMagicValues(IEnumerable<MagicValueMediaType> values)
{
foreach (MagicValueMediaType value in values)
{
AddMagicValue(value);
}
}
public bool TryResolve(Stream str, [NotNullWhen(true)] out MediaType? mediaType)
{
Span<byte> bytes = stackalloc byte[_maxBytes];
str.Read(bytes);
return TryResolve(bytes, out mediaType);
}
public bool TryResolve(ReadOnlySpan<byte> content, [NotNullWhen(true)] out MediaType? mediaType)
{
MagicValueMediaType? type = _tree[content];
if (type == null)
{
mediaType = null;
return false;
}
else
{
mediaType = type.MediaType;
return true;
}
}
public bool TryResolve(string extension, out MediaType? mediaType)
{
return _extensions.TryGetValue(extension, out mediaType);
}
public static MagicContentResolver Instance { get; } = new MagicContentResolver();
private class Tree
{
public MagicValueMediaType? Node { get; private set; }
public Dictionary<byte, Tree>? Children { get; private set; }
public MagicValueMediaType? this[ReadOnlySpan<byte> bytes]
{
get
{
if (bytes.Length == 0 || Children == null)
return Node;
byte b = bytes[0];
if (!Children.TryGetValue(b, out Tree? subtree))
{
return Node;
}
return subtree[bytes.Slice(1)];
}
}
private void AddInternal(MagicValueMediaType magic, ReadOnlySpan<byte> bytes)
{
if (bytes.Length == 0)
{
Node = magic;
return;
}
if (Children == null)
{
Children = new Dictionary<byte, Tree>();
}
if (!Children.TryGetValue(bytes[0], out Tree? tree))
{
tree = new Tree();
Children[bytes[0]] = tree;
}
tree.AddInternal(magic, bytes.Slice(1));
}
public void Add(MagicValueMediaType magic)
{
foreach (var entry in magic.MagicValues)
{
AddInternal(magic, entry.Value);
}
}
}
}
}

View File

@@ -0,0 +1,154 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text;
namespace ReMime.ContentResolvers
{
/// <summary>
/// A magic value to identify file types.
/// </summary>
/// <param name="Value">The byte array that makes up the magic value.</param>
public record struct MagicValue(byte[] Value)
{
public MagicValue(int value) : this(BitConverter.GetBytes(value)) { }
public MagicValue(short value) : this(BitConverter.GetBytes(value)) { }
public MagicValue(string value, Encoding? encoding = null)
: this((encoding ?? Encoding.ASCII).GetBytes(value)) { }
public MagicValue(ReadOnlySpan<byte> bytes) : this(bytes.ToArray()) { }
/// <summary>
/// Check if <paramref name="haystack"/> matches this magic value.
/// </summary>
/// <param name="haystack"></param>
/// <returns></returns>
public bool Matches(ReadOnlySpan<byte> haystack)
{
for (int i = 0; i < haystack.Length && i < Value.Length; i++)
{
if (haystack[i] != Value[i])
return false;
}
return true;
}
public override int GetHashCode()
{
// Uses the FVN-1A algorithm in 32-bit mode.
const int PRIME = 0x01000193;
const int BASIS = unchecked((int)0x811c9dc5);
int hash = BASIS;
for (int i = 0; i < Value.Length; i++)
{
hash ^= Value[i];
hash *= PRIME;
}
return hash;
}
public static bool TryParse(ReadOnlySpan<char> magic, [NotNullWhen(true)] out MagicValue? value)
{
List<byte> bytes = new List<byte>();
StringBuilder builder = new StringBuilder();
value = null;
for (int i = 0; i < magic.Length; i++)
{
char chr = magic[i];
char chr2;
switch (chr)
{
case '\'':
builder.Clear();
int j;
for (j = i + 1; j < magic.Length; j++)
{
chr = magic[j];
if (chr == '\'')
{
bytes.AddRange(Encoding.ASCII.GetBytes(builder.ToString()));
break;
}
else if (chr == '\\')
{
if (j+1 >= magic.Length)
return false;
chr2 = magic[j++];
builder.Append(chr2 switch {
'n' => '\n',
'r' => '\r',
'a' => '\a',
'b' => '\b',
'f' => 'f',
'v' => '\v',
'?' => '?',
'\\' => '\\',
'\'' => '\'',
'\"' => '\"',
_ => '\0'
});
}
else
{
builder.Append(chr);
}
}
if (j == magic.Length)
{
// ASCII string overrun.
return false;
}
i = j;
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
case '8': case '9': case 'A': case 'B':
case 'C': case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c': case 'd':
case 'e': case 'f':
// Misaligned hex string.
if (i+1 >= magic.Length)
return false;
chr2 = magic[++i];
bytes.Add((byte)(AsciiToInt(chr) << 4 | AsciiToInt(chr2)));
break;
case '\n': case '\f': case '\r': case '\t':
case ' ':
// generic whitespace.
continue;
}
}
// No bytes to match.
if (bytes.Count == 0)
return false;
value = new MagicValue(bytes.ToArray());
return true;
static int AsciiToInt(char a)
{
if (a >= '0' && a <= '9')
return a - '0';
else if (a >= 'A' && a <= 'F')
return a - 'A' + 10;
else if (a >= 'a' && a <= 'f')
return a - 'a' + 10;
else
return -1;
}
}
}
}

View File

@@ -0,0 +1,42 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.Json;
using System.Text.Json.Serialization;
namespace ReMime.ContentResolvers
{
public class MagicValueDatabaseEntry
{
[JsonPropertyName("type")]
public string Type { get; set; } = string.Empty;
[JsonPropertyName("magic")]
public List<string> Magic { get; set; } = new List<string>();
[JsonPropertyName("extensions")]
public List<string> Extensions { get; set; } = new List<string>();
public static IEnumerable<MagicValueMediaType> GetEntries(Stream str)
{
return JsonSerializer.Deserialize<List<MagicValueDatabaseEntry>>(str, new JsonSerializerOptions()
{
AllowTrailingCommas = true,
ReadCommentHandling = JsonCommentHandling.Skip
})?.Select(x => (MagicValueMediaType)x)
?? throw new Exception();
}
public static explicit operator MagicValueMediaType(MagicValueDatabaseEntry entry)
{
return new MagicValueMediaType(
new MediaType(entry.Type, entry.Extensions),
entry.Magic.Select(x => (MagicValue.TryParse(x, out var value), value))
.Where(x => x.Item1)
.Select(x => (MagicValue)x.value!)
.ToArray()
);
}
}
}

View File

@@ -0,0 +1,106 @@
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace ReMime.ContentResolvers
{
public class RiffResolver : IMediaContentResolver, IMagicValueResolver
{
public readonly List<MediaType> _mediaTypes = new List<MediaType>();
public readonly Dictionary<string, MediaType> _extensions = new Dictionary<string, MediaType>();
public readonly Dictionary<int, MediaType> _magicValues = new Dictionary<int, MediaType>();
public IReadOnlyCollection<MediaType> MediaTypes { get; }
private RiffResolver()
{
MediaTypes = _mediaTypes.AsReadOnly();
IEnumerable<MagicValueMediaType> entries;
using (Stream str = typeof(MagicContentResolver).Assembly.GetManifestResourceStream("ReMime.ContentResolvers.riff.jsonc")!)
{
entries = MagicValueDatabaseEntry.GetEntries(str);
}
AddMagicValues(entries);
}
public void AddMagicValues(IEnumerable<MagicValueMediaType> entries)
{
foreach (var entry in entries)
{
AddMagicValue(entry);
}
}
public bool TryResolve(Stream str, [NotNullWhen(true)] out MediaType? mediaType)
{
Span<byte> content = stackalloc byte[Unsafe.SizeOf<RiffChunk>()];
str.Read(content);
return TryResolve(content, out mediaType);
}
public bool TryResolve(ReadOnlySpan<byte> content, [NotNullWhen(true)] out MediaType? mediaType)
{
mediaType = null;
if (content.Length < Unsafe.SizeOf<RiffChunk>())
return false;
ref readonly RiffChunk chunk = ref MemoryMarshal.Cast<byte, RiffChunk>(content)[0];
if (chunk.Riff != RiffChunk.RiffValue)
return false;
return _magicValues.TryGetValue(chunk.FirstChunkType, out mediaType);
}
public bool TryResolve(string extension, out MediaType? mediaType)
{
return _extensions.TryGetValue(extension, out mediaType);
}
/// <summary>
/// Add a RIFF sub-magic value to this resolver.
/// </summary>
/// <param name="type"></param>
public void AddMagicValue(MagicValueMediaType type)
{
if (type.MagicValues.Length == 0)
throw new ArgumentException("Expected at least one media type.");
_mediaTypes.Add(type.MediaType);
foreach (string extension in type.Extensions)
{
_extensions.Add(extension, type.MediaType);
}
foreach (MagicValue magic in type.MagicValues)
{
if (magic.Value.Length != 4)
continue;
int i = MemoryMarshal.Cast<byte, int>(magic.Value)[0];
_magicValues.Add(i, type.MediaType);
}
}
public static RiffResolver Instance { get; } = new RiffResolver();
[StructLayout(LayoutKind.Auto, Size = 12)]
private struct RiffChunk
{
public int Riff;
public int Size;
public int FirstChunkType;
public const int RiffValue = 1179011410;
}
}
}

View File

@@ -0,0 +1,62 @@
/**
* ReMime Magic Value & File Extension Database
* ---------------------------------------------
* This is a self compiled list of magic values, file extensions and
* their mime-types. Please contribute common file formats if you come
* across them.
*
* This file is only for common file formats that do not need any extra
* detection logic. Do not add major container formats like ZIP or RIFF
* into this list.
*/
[
// #region application/*
{ "type": "application/vnd.rar", "magic": [ "'Rar!'1a07" ], "extensions": [ "rar" ] },
{ "type": "application/postscript", "magic": [ "'%!PS'"], "extensions": [ "ps", "eps", "epsf" ] },
{ "type": "application/pdf", "magic": ["'%PDF-'"], "extensions": [ "pdf" ] },
// #endregion
// #region audio/*
{ "type": "audio/mp3", "magic": [ "fffb", "fff3", "fff2", "'ID3'" ], "extensions": [ "mp3" ] },
{ "type": "audio/flac", "magic": [ "'fLaC'" ], "extensions": [ "flac" ] },
{ "type": "audio/midi", "magic": [ "'MThd'" ], "extensions": [ "mid", "midi" ] },
// #endregion
// #region font/*
{ "type": "font/woff", "magic": [ "'wOFF'" ], "extensions": [ "woff" ] },
{ "type": "font/woff2", "magic": [ "'wOF2'" ], "extensions": [ "woff2" ] },
{ "type": "font/ttf", "magic": [ "0001000000" ], "extensions": [ "ttf", "tte", "dfont" ] },
{ "type": "font/otf", "magic": [ "'OTTO'" ], "extensions": [ "otf" ]},
// #endregion
// #region image/*
{ "type": "image/bmp", "magic": [ "'BM'" ], "extensions": [ "bmp" ] },
{ "type": "image/gif", "magic": [ "'GIF8'" ], "extensions": [ "gif" ] },
{ "type": "image/tiff", "magic": [ "'IIN1'", "4d4d002a", "49492a00"], "extensions": [ "tiff", "tif", "nif" ] },
{ "type": "image/png", "magic": [ "89'PNG'" ], "extensions": [ "png" ] },
{ "type": "image/emf", "magic": [ "01000000" ], "extensions": [ "emf" ] },
{ "type": "image/wmf", "magic": [ "d7cdc69a" ], "extensions": [ "wmf" ] },
{ "type": "image/x-ico", "magic": [ "00000100" ], "extensions": [ "ico" ] },
{ "type": "image/x-qoi", "magic": [ "'qoif'" ], "extensions": [ "qoi" ]},
// The JPEG standard allows any magic value from ffd8ffe0 to ffd8ffff.
{
"type": "image/jpeg",
"magic": [
"ffd8ffe0", "ffd8ffe1", "ffd8ffe2", "ffd8ffe3",
"ffd8ffe4", "ffd8ffe5", "ffd8ffe6", "ffd8ffe7",
"ffd8ffe8", "ffd8ffe9", "ffd8ffea", "ffd8ffeb",
"ffd8ffec", "ffd8ffed", "ffd8ffee", "ffd8ffef",
"ffd8fff0", "ffd8fff1", "ffd8fff2", "ffd8fff3",
"ffd8fff4", "ffd8fff5", "ffd8fff6", "ffd8fff7",
"ffd8fff8", "ffd8fff9", "ffd8fffa", "ffd8fffb",
"ffd8fffc", "ffd8fffd", "ffd8fffe", "ffd8ffff"
],
"extensions": [ "jpeg", "jpg"]
},
// #endregion
// #region text/*
{ "type": "text/rtf", "magic": [ "'{\\rtf1'" ], "extensions": [ "rtf" ]}
// #endregion
]

View File

@@ -0,0 +1,21 @@
/*
* ReMime RIFF file types database.
* This is a self compiled list of magic values, file extensions and
* their media-types. Please contribute common file formats if you come
* across them.
*
* This file is only for RIFF based formats. Please do not add magic values
* for other file types here, as the magic values in this file are used
*/
[
{ "type": "video/avi", "magic": ["'AVI '"], "extensions": ["avi"]},
{ "type": "audio/wav", "magic": ["'WAVE'"], "extensions": ["wav"] },
{ "type": "image/vnd.microsoft.icon", "magic": ["'ACON'"], "extensions": ["ani"] },
// Whilst we speak about WEBP...
// My kindest f*ck you to Google for impeding standards like JPEG-XL
// for their own benefit. WEBP itself might be a fine file format
// but I do not support it if you maliciously stop implementing other
// file formats into your browsers and applications.
{ "type": "image/webp", "magic": ["'WEBP'"], "extensions": ["webp"] }
]

View File

@@ -4,6 +4,9 @@ using System.IO;
namespace ReMime
{
/// <summary>
/// Interface for all media type resolvers that inspect content.
/// </summary>
public interface IMediaContentResolver : IMediaTypeResolver
{
/// <summary>

View File

@@ -3,6 +3,9 @@ using System.Collections.Generic;
namespace ReMime
{
/// <summary>
/// Interface for all media type resolvers.
/// </summary>
public interface IMediaTypeResolver
{
/// <summary>

View File

@@ -3,17 +3,27 @@ using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using ReMime.ContentResolvers;
using ReMime.Platform;
namespace ReMime
{
/// <summary>
/// Resolve media types from file names and file contents.
/// </summary>
public static class MediaTypeResolver
{
private static readonly SortedList<int, IMediaTypeResolver> s_resolvers = new SortedList<int, IMediaTypeResolver>();
private static IReadOnlyList<MediaType>? s_mediaTypes = null;
/// <summary>
/// Enumeration of currently available media type resolvers.
/// </summary>
public static IEnumerable<IMediaTypeResolver> Resolvers => s_resolvers.Values;
/// <summary>
/// Enumeration of detectable media types.
/// </summary>
public static IEnumerable<MediaType> KnownTypes
{
get
@@ -40,26 +50,32 @@ namespace ReMime
static MediaTypeResolver()
{
if (OperatingSystem.IsWindows())
{
AddResolver(new Win32MediaTypeResolver());
}
else if (OperatingSystem.IsLinux())
{
AddResolver(new UnixMediaTypeResolver());
// TODO: add freedesktop mime type database.
}
else if (OperatingSystem.IsMacOS())
{
AddResolver(new UnixMediaTypeResolver()); //?
}
AddResolver(RiffResolver.Instance, 1000);
AddResolver(MagicContentResolver.Instance, 1001);
if (Win32MediaTypeResolver.Instance != null)
AddResolver(Win32MediaTypeResolver.Instance, 1002);
if (UnixMediaTypeResolver.Instance != null)
AddResolver(UnixMediaTypeResolver.Instance, 1002);
}
/// <summary>
/// Add a media type resolver.
/// </summary>
/// <param name="resolver">The resolver instance to add.</param>
/// <param name="priority">The resolver priority. Less is more prescedent.</param>
public static void AddResolver(IMediaTypeResolver resolver, int priority = 9999)
{
s_resolvers.Add(priority, resolver);
}
/// <summary>
/// Try to resolve the media type from a path.
/// </summary>
/// <param name="path">The path string.</param>
/// <param name="mediaType">The result media type.</param>
/// <returns>True if there was a matching media type.</returns>
/// <exception cref="ArgumentException">Issues with <paramref name="path"> string. See <see cref="Path.GetFileName"/>.</exception>
public static bool TryResolve(ReadOnlySpan<char> path, out MediaType mediaType)
{
path = Path.GetFileName(path);
@@ -95,11 +111,18 @@ namespace ReMime
return false;
}
/// <summary>
/// Try to resolve the media type from a stream.
/// </summary>
/// <param name="stream">The stream to inspect.</param>
/// <param name="mediaType">The result media type.</param>
/// <returns>True if the type was resolved.</returns>
/// <exception cref="ArgumentException">The <paramref name="stream"/> is unseekable.</exception>
public static bool TryResolve(Stream stream, out MediaType mediaType)
{
if (!stream.CanSeek)
{
throw new Exception("This stream is not seekable, cannot resolve unseekable streams.");
throw new ArgumentException("This stream is not seekable, cannot resolve unseekable streams.", nameof(stream));
}
foreach (IMediaTypeResolver resolver in Resolvers)
@@ -120,6 +143,12 @@ namespace ReMime
return false;
}
/// <summary>
/// Try to resolve the media type from a span.
/// </summary>
/// <param name="bytes">A span of bytes from the start of the media.</param>
/// <param name="mediaType">The result media type.</param>
/// <returns>True if the type was resolved.</returns>
public static bool TryResolve(ReadOnlySpan<byte> bytes, out MediaType mediaType)
{
foreach (IMediaTypeResolver resolver in Resolvers)
@@ -139,6 +168,17 @@ namespace ReMime
return false;
}
/// <summary>
/// Try to resolve the media type.
/// </summary>
/// <param name="path">The path string.</param>
/// <param name="bytes">A span of bytes from the start of the media.</param>
/// <param name="mediaType">The result media type.</param>
/// <returns><see cref="MediaTypeResult.None"/> if none matched.</returns>
/// <exception cref="ArgumentException">
/// The <paramref name="stream"/> is unseekable, or issues with <paramref name="path"> string.
/// See <see cref="Path.GetFileName"/>
/// </exception>
public static MediaTypeResult TryResolve(ReadOnlySpan<char> path, ReadOnlySpan<byte> bytes, out MediaType mediaType)
{
if (TryResolve(bytes, out mediaType))
@@ -160,6 +200,17 @@ namespace ReMime
}
}
/// <summary>
/// Try to resolve the media type.
/// </summary>
/// <param name="path">The path string.</param>
/// <param name="stream">The stream to inspect.</param>
/// <param name="mediaType">The result media type.</param>
/// <returns><see cref="MediaTypeResult.None"/> if none matched.</returns>
/// <exception cref="ArgumentException">
/// The <paramref name="stream"/> is unseekable, or issues with <paramref name="path"> string.
/// See <see cref="Path.GetFileName"/>
/// </exception>
public static MediaTypeResult TryResolve(ReadOnlySpan<char> path, Stream stream, out MediaType mediaType)
{
if (TryResolve(stream, out mediaType))
@@ -181,6 +232,13 @@ namespace ReMime
}
}
/// <summary>
/// Try to resolve the media type.
/// </summary>
/// <param name="fileInfo">The FileInfo object to the file.</param>
/// <param name="mediaType">The result media type.</param>
/// <param name="open">True to open the file and inspect the contents as well.</param>
/// <returns><see cref="MediaTypeResult.None"/> if none matched.</returns>
public static MediaTypeResult TryResolve(FileInfo fileInfo, out MediaType mediaType, bool open = true)
{
if (open)

View File

@@ -2,6 +2,9 @@ using System;
namespace ReMime
{
/// <summary>
/// The result of a combined media type query.
/// </summary>
[Flags]
public enum MediaTypeResult
{

View File

@@ -1,16 +1,20 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.IO;
namespace ReMime.Platform
{
/// <summary>
/// Media type resolver for *nix systems that have a "/etc/mime.types" file.
/// </summary>
public class UnixMediaTypeResolver : IMediaTypeResolver
{
private readonly Dictionary<string, MediaType> _extensionsMap = new Dictionary<string, MediaType>();
public IReadOnlyCollection<MediaType> MediaTypes { get; }
public UnixMediaTypeResolver()
private UnixMediaTypeResolver()
{
{
bool valid = OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsFreeBSD();
@@ -47,10 +51,21 @@ namespace ReMime.Platform
public bool TryResolve(string extension, out MediaType? mediaType)
{
if (_extensionsMap.TryGetValue(extension, out mediaType))
return true;
else
return false;
return _extensionsMap.TryGetValue(extension, out mediaType);
}
public static UnixMediaTypeResolver? Instance { get; } = null;
static UnixMediaTypeResolver()
{
try
{
Instance = new UnixMediaTypeResolver();
}
catch (Exception ex)
{
Debug.WriteLine(ex);
}
}
private static readonly char[] s_delimeters = new char[] { '\t', ' ' };

View File

@@ -1,16 +1,20 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using Microsoft.Win32;
namespace ReMime.Platform
{
/// <summary>
/// Media type resolver for Windows systems.
/// </summary>
public class Win32MediaTypeResolver : IMediaTypeResolver
{
private readonly Dictionary<string, MediaType> _extensionsMap = new Dictionary<string, MediaType>();
public IReadOnlyCollection<MediaType> MediaTypes { get; }
public Win32MediaTypeResolver()
private Win32MediaTypeResolver()
{
if (!OperatingSystem.IsWindows())
throw new PlatformNotSupportedException();
@@ -52,14 +56,20 @@ namespace ReMime.Platform
public bool TryResolve(string extension, out MediaType? mediaType)
{
if (_extensionsMap.TryGetValue(extension, out mediaType))
return _extensionsMap.TryGetValue(extension, out mediaType);
}
public static Win32MediaTypeResolver? Instance { get; } = null;
static Win32MediaTypeResolver()
{
try
{
return true;
Instance = new Win32MediaTypeResolver();
}
else
catch (Exception ex)
{
mediaType = null;
return true;
Debug.WriteLine(ex);
}
}
}

View File

@@ -1,9 +1,36 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<ImplicitUsings>disable</ImplicitUsings>
<Nullable>enable</Nullable>
<!--NuGet-->
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageId>ReFuel.ReMime</PackageId>
<Version>0.1.1</Version>
<Authors>H. Utku Maden</Authors>
<Company>ReFuel</Company>
<PackageReadmeFile>README.md</PackageReadmeFile>
<PackageLicenseFile>LICENSE.md</PackageLicenseFile>
<PackageIcon>images\icon.png</PackageIcon>
<PackageProjectUrl>https://refuel.mixedup.dev/docs/ReMime.html</PackageProjectUrl>
<RepositoryUrl>https://git.mixedup.dev/ReFuel/ReMime</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageTags>detection; detector; type; file; mime; mime-type; media; media-type; analysis</PackageTags>
<PackageDescription>
ReMime is a very humble library that can identify IANA media types of file
from their file extension and its content. While being fully extensible
with your own resolvers, ReMime will also refer to your operating system's
file type database when resolving files.
</PackageDescription>
</PropertyGroup>
<ItemGroup>
<EmbeddedResource Include="ContentResolvers/database.jsonc" />
<EmbeddedResource Include="ContentResolvers/riff.jsonc" />
<Content Include="../*.md" Pack="true" PackagePath="/" />
<None Include="../remime_favicon.png" Pack="true" PackagePath="images\icon.png"/>
</ItemGroup>
</Project>

BIN
remime_favicon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB