Browse Source

improve mime type detection

pull/6/merge
Siegfried Pammer 13 years ago
parent
commit
17479d34f6
  1. 52
      src/Main/Base/Project/Src/Services/MimeTypeDetection.cs
  2. 24
      src/Main/Base/Test/MimeDetectionTests.cs

52
src/Main/Base/Project/Src/Services/MimeTypeDetection.cs

@ -3,6 +3,7 @@ @@ -3,6 +3,7 @@
using System;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
@ -12,6 +13,13 @@ namespace ICSharpCode.SharpDevelop @@ -12,6 +13,13 @@ namespace ICSharpCode.SharpDevelop
{
const int BUFFER_SIZE = 4 * 1024;
// Known BOMs
public static readonly byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF };
public static readonly byte[] UTF16BE = new byte[] { 0xFE, 0xFF };
public static readonly byte[] UTF16LE = new byte[] { 0xFF, 0xFE };
public static readonly byte[] UTF32BE = new byte[] { 0x00, 0x00, 0xFE, 0xFF };
public static readonly byte[] UTF32LE = new byte[] { 0xFF, 0xFE, 0x00, 0x00 };
[DllImport("urlmon.dll", CharSet = CharSet.Unicode, ExactSpelling = true, SetLastError = false)]
static extern unsafe int FindMimeFromData(
IntPtr pBC,
@ -23,15 +31,45 @@ namespace ICSharpCode.SharpDevelop @@ -23,15 +31,45 @@ namespace ICSharpCode.SharpDevelop
out IntPtr ppwzMimeOut,
int dwReserved);
static byte[] DetectAndRemoveBOM(byte[] buffer, out int len)
{
len = UTF8.Length;
if (buffer.StartsWith(UTF8))
return buffer.Skip(UTF8.Length).ToArray();
len = UTF32BE.Length;
if (buffer.StartsWith(UTF32BE))
return buffer.Skip(UTF32BE.Length).ToArray();
len = UTF32LE.Length;
if (buffer.StartsWith(UTF32LE))
return buffer.Skip(UTF32LE.Length).ToArray();
len = UTF16LE.Length;
if (buffer.StartsWith(UTF16LE))
return buffer.Skip(UTF16LE.Length).ToArray();
len = UTF16BE.Length;
if (buffer.StartsWith(UTF16BE))
return buffer.Skip(UTF16BE.Length).ToArray();
len = 0;
return buffer;
}
static bool StartsWith(this byte[] buffer, byte[] start)
{
if (buffer.Length < start.Length)
return false;
int i = 0;
while (i < start.Length && buffer[i] == start[i])
i++;
return i >= start.Length;
}
static unsafe string FindMimeType(byte[] buffer, int offset, int length)
{
if (length == 0 ||
// UTF-16 Big Endian
(buffer.Length >= 2 && buffer[0] == 0xFE && buffer[1] == 0xFF) ||
// UTF-16 Little Endian
(buffer.Length >= 2 && buffer[0] == 0xFF && buffer[1] == 0xFE) ||
// UTF-32 Big Endian
(buffer.Length >= 4 && buffer[0] == 0x00 && buffer[1] == 0x00 && buffer[2] == 0xFE && buffer[3] == 0xFF))
int len;
buffer = DetectAndRemoveBOM(buffer, out len);
length -= len;
offset = (offset < len) ? 0 : offset - len;
if (length == 0)
return "text/plain";
fixed (byte *b = &buffer[offset]) {

24
src/Main/Base/Test/MimeDetectionTests.cs

@ -2,6 +2,8 @@ @@ -2,6 +2,8 @@
// This code is distributed under the GNU LGPL (for details please see \doc\license.txt)
using System;
using System.Linq;
using System.Text;
using NUnit.Framework;
namespace ICSharpCode.SharpDevelop.Tests
@ -15,15 +17,27 @@ namespace ICSharpCode.SharpDevelop.Tests @@ -15,15 +17,27 @@ namespace ICSharpCode.SharpDevelop.Tests
// always open empty files with text editor
TestMime(new byte[] {}, "text/plain");
// UTF-8
TestMime(new byte[] { 0xEF, 0xBB, 0xBF }, "text/plain");
TestMime(MimeTypeDetection.UTF8, "text/plain");
// UTF-16 Big Endian
TestMime(new byte[] { 0xFE, 0xFF }, "text/plain");
TestMime(MimeTypeDetection.UTF16BE, "text/plain");
// UTF-16 Little Endian
TestMime(new byte[] { 0xFF, 0xFE }, "text/plain");
TestMime(MimeTypeDetection.UTF16LE, "text/plain");
// UTF-32 Big Endian
TestMime(new byte[] { 0x00, 0x00, 0xFE, 0xFF }, "text/plain");
TestMime(MimeTypeDetection.UTF32BE, "text/plain");
// UTF-32 Little Endian
TestMime(new byte[] { 0xFF, 0xFE, 0x00, 0x00 }, "text/plain");
TestMime(MimeTypeDetection.UTF32LE, "text/plain");
}
[Test]
public void TextXml()
{
string xml = "<?xml version=\"1.0\" ?>";
TestMime(Encoding.Default.GetBytes(xml), "text/xml");
TestMime(MimeTypeDetection.UTF8.Concat(Encoding.Default.GetBytes(xml)).ToArray(), "text/xml");
TestMime(MimeTypeDetection.UTF16BE.Concat(Encoding.Default.GetBytes(xml)).ToArray(), "text/xml");
TestMime(MimeTypeDetection.UTF16LE.Concat(Encoding.Default.GetBytes(xml)).ToArray(), "text/xml");
TestMime(MimeTypeDetection.UTF32BE.Concat(Encoding.Default.GetBytes(xml)).ToArray(), "text/xml");
TestMime(MimeTypeDetection.UTF32LE.Concat(Encoding.Default.GetBytes(xml)).ToArray(), "text/xml");
}
void TestMime(byte[] bytes, string expectedMime)

Loading…
Cancel
Save