|
|
|
@ -3,9 +3,9 @@
@@ -3,9 +3,9 @@
|
|
|
|
|
|
|
|
|
|
using System; |
|
|
|
|
using System.IO; |
|
|
|
|
using System.Linq; |
|
|
|
|
using System.Runtime.InteropServices; |
|
|
|
|
using System.Text; |
|
|
|
|
using System.Xml; |
|
|
|
|
|
|
|
|
|
namespace ICSharpCode.SharpDevelop |
|
|
|
|
{ |
|
|
|
@ -13,12 +13,9 @@ namespace ICSharpCode.SharpDevelop
@@ -13,12 +13,9 @@ namespace ICSharpCode.SharpDevelop
|
|
|
|
|
{ |
|
|
|
|
const int BUFFER_SIZE = 4 * 1024; |
|
|
|
|
|
|
|
|
|
// Known BOMs
|
|
|
|
|
public static readonly byte[] UTF8 = new byte[] { 0xEF, 0xBB, 0xBF }; |
|
|
|
|
public static readonly byte[] UTF16BE = new byte[] { 0xFE, 0xFF }; |
|
|
|
|
public static readonly byte[] UTF16LE = new byte[] { 0xFF, 0xFE }; |
|
|
|
|
public static readonly byte[] UTF32BE = new byte[] { 0x00, 0x00, 0xFE, 0xFF }; |
|
|
|
|
public static readonly byte[] UTF32LE = new byte[] { 0xFF, 0xFE, 0x00, 0x00 }; |
|
|
|
|
public const string Binary = "application/octet-stream"; |
|
|
|
|
public const string Text = "text/plain"; |
|
|
|
|
public const string Xml = "text/xml"; |
|
|
|
|
|
|
|
|
|
[DllImport("urlmon.dll", CharSet = CharSet.Unicode, ExactSpelling = true, SetLastError = false)] |
|
|
|
|
static extern unsafe int FindMimeFromData( |
|
|
|
@ -31,47 +28,115 @@ namespace ICSharpCode.SharpDevelop
@@ -31,47 +28,115 @@ namespace ICSharpCode.SharpDevelop
|
|
|
|
|
out IntPtr ppwzMimeOut, |
|
|
|
|
int dwReserved); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static byte[] DetectAndRemoveBOM(byte[] buffer, out int len) |
|
|
|
|
public static string FindMimeType(Stream stream) |
|
|
|
|
{ |
|
|
|
|
len = UTF8.Length; |
|
|
|
|
if (buffer.StartsWith(UTF8)) |
|
|
|
|
return buffer.Skip(UTF8.Length).ToArray(); |
|
|
|
|
len = UTF32BE.Length; |
|
|
|
|
if (buffer.StartsWith(UTF32BE)) |
|
|
|
|
return buffer.Skip(UTF32BE.Length).ToArray(); |
|
|
|
|
len = UTF32LE.Length; |
|
|
|
|
if (buffer.StartsWith(UTF32LE)) |
|
|
|
|
return buffer.Skip(UTF32LE.Length).ToArray(); |
|
|
|
|
len = UTF16LE.Length; |
|
|
|
|
if (buffer.StartsWith(UTF16LE)) |
|
|
|
|
return buffer.Skip(UTF16LE.Length).ToArray(); |
|
|
|
|
len = UTF16BE.Length; |
|
|
|
|
if (buffer.StartsWith(UTF16BE)) |
|
|
|
|
return buffer.Skip(UTF16BE.Length).ToArray(); |
|
|
|
|
len = 0; |
|
|
|
|
return buffer; |
|
|
|
|
StreamReader reader; |
|
|
|
|
if (stream.Length >= 2) { |
|
|
|
|
int firstByte = stream.ReadByte(); |
|
|
|
|
int secondByte = stream.ReadByte(); |
|
|
|
|
switch ((firstByte << 8) | secondByte) { |
|
|
|
|
case 0xfffe: // UTF-16 LE BOM / UTF-32 LE BOM
|
|
|
|
|
case 0xfeff: // UTF-16 BE BOM
|
|
|
|
|
stream.Position -= 2; |
|
|
|
|
reader = new StreamReader(stream, detectEncodingFromByteOrderMarks: true); |
|
|
|
|
break; |
|
|
|
|
case 0xefbb: // start of UTF-8 BOM
|
|
|
|
|
if (stream.ReadByte() == 0xbf) { |
|
|
|
|
reader = new StreamReader(stream, Encoding.UTF8); |
|
|
|
|
break; |
|
|
|
|
} else { |
|
|
|
|
return Binary; |
|
|
|
|
} |
|
|
|
|
default: |
|
|
|
|
if (IsUTF8(stream, (byte)firstByte, (byte)secondByte)) { |
|
|
|
|
stream.Position = 0; |
|
|
|
|
reader = new StreamReader(stream, Encoding.UTF8); |
|
|
|
|
break; |
|
|
|
|
} else { |
|
|
|
|
byte[] buffer = new byte[BUFFER_SIZE]; |
|
|
|
|
int length = stream.Read(buffer, 0, BUFFER_SIZE); |
|
|
|
|
return FindMimeType(buffer, 0, length); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
return Text; |
|
|
|
|
} |
|
|
|
|
// Now we got a StreamReader with the correct encoding
|
|
|
|
|
// Check for XML now
|
|
|
|
|
try { |
|
|
|
|
XmlTextReader xmlReader = new XmlTextReader(reader); |
|
|
|
|
xmlReader.XmlResolver = null; |
|
|
|
|
xmlReader.MoveToContent(); |
|
|
|
|
return Xml; |
|
|
|
|
} catch (XmlException) { |
|
|
|
|
return Text; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static bool StartsWith(this byte[] buffer, byte[] start) |
|
|
|
|
static bool IsUTF8(Stream fs, byte firstByte, byte secondByte) |
|
|
|
|
{ |
|
|
|
|
if (buffer.Length < start.Length) |
|
|
|
|
return false; |
|
|
|
|
int i = 0; |
|
|
|
|
while (i < start.Length && buffer[i] == start[i]) |
|
|
|
|
i++; |
|
|
|
|
return i >= start.Length; |
|
|
|
|
int max = (int)Math.Min(fs.Length, 500000); // look at max. 500 KB
|
|
|
|
|
const int ASCII = 0; |
|
|
|
|
const int Error = 1; |
|
|
|
|
const int UTF8 = 2; |
|
|
|
|
const int UTF8Sequence = 3; |
|
|
|
|
int state = ASCII; |
|
|
|
|
int sequenceLength = 0; |
|
|
|
|
byte b; |
|
|
|
|
for (int i = 0; i < max; i++) { |
|
|
|
|
if (i == 0) { |
|
|
|
|
b = firstByte; |
|
|
|
|
} else if (i == 1) { |
|
|
|
|
b = secondByte; |
|
|
|
|
} else { |
|
|
|
|
b = (byte)fs.ReadByte(); |
|
|
|
|
} |
|
|
|
|
if (b < 0x80) { |
|
|
|
|
// normal ASCII character
|
|
|
|
|
if (state == UTF8Sequence) { |
|
|
|
|
state = Error; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} else if (b < 0xc0) { |
|
|
|
|
// 10xxxxxx : continues UTF8 byte sequence
|
|
|
|
|
if (state == UTF8Sequence) { |
|
|
|
|
--sequenceLength; |
|
|
|
|
if (sequenceLength < 0) { |
|
|
|
|
state = Error; |
|
|
|
|
break; |
|
|
|
|
} else if (sequenceLength == 0) { |
|
|
|
|
state = UTF8; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
state = Error; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} else if (b >= 0xc2 && b < 0xf5) { |
|
|
|
|
// beginning of byte sequence
|
|
|
|
|
if (state == UTF8 || state == ASCII) { |
|
|
|
|
state = UTF8Sequence; |
|
|
|
|
if (b < 0xe0) { |
|
|
|
|
sequenceLength = 1; // one more byte following
|
|
|
|
|
} else if (b < 0xf0) { |
|
|
|
|
sequenceLength = 2; // two more bytes following
|
|
|
|
|
} else { |
|
|
|
|
sequenceLength = 3; // three more bytes following
|
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
state = Error; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} else { |
|
|
|
|
// 0xc0, 0xc1, 0xf5 to 0xff are invalid in UTF-8 (see RFC 3629)
|
|
|
|
|
state = Error; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return state != Error; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static unsafe string FindMimeType(byte[] buffer, int offset, int length) |
|
|
|
|
{ |
|
|
|
|
int len; |
|
|
|
|
buffer = DetectAndRemoveBOM(buffer, out len); |
|
|
|
|
length -= len; |
|
|
|
|
offset = (offset < len) ? 0 : offset - len; |
|
|
|
|
if (length == 0) |
|
|
|
|
return "text/plain"; |
|
|
|
|
|
|
|
|
|
fixed (byte *b = &buffer[offset]) { |
|
|
|
|
const int FMFD_ENABLEMIMESNIFFING = 0x00000002; |
|
|
|
|
IntPtr mimeout; |
|
|
|
@ -89,16 +154,8 @@ namespace ICSharpCode.SharpDevelop
@@ -89,16 +154,8 @@ namespace ICSharpCode.SharpDevelop
|
|
|
|
|
{ |
|
|
|
|
if (buffer == null) |
|
|
|
|
throw new ArgumentNullException("buffer"); |
|
|
|
|
return FindMimeType(buffer, 0, buffer.Length); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
public static string FindMimeType(Stream stream) |
|
|
|
|
{ |
|
|
|
|
if (stream == null) |
|
|
|
|
throw new ArgumentNullException("stream"); |
|
|
|
|
byte[] buffer = new byte[BUFFER_SIZE]; |
|
|
|
|
stream.Position = 0; |
|
|
|
|
return FindMimeType(buffer, 0, stream.Read(buffer, 0, buffer.Length)); |
|
|
|
|
using (MemoryStream stream = new MemoryStream(buffer)) |
|
|
|
|
return FindMimeType(stream); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|