mirror of https://github.com/icsharpcode/ILSpy.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
740 lines
23 KiB
740 lines
23 KiB
// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt) |
|
// This code is distributed under the GNU LGPL (for details please see \doc\license.txt) |
|
|
|
using System; |
|
using System.Collections.Generic; |
|
using System.Globalization; |
|
using System.Linq; |
|
using System.Text; |
|
|
|
namespace ICSharpCode.AvalonEdit.Xml |
|
{ |
|
class TagReader: TokenReader |
|
{ |
|
AXmlParser parser; |
|
TrackedSegmentCollection trackedSegments; |
|
string input; |
|
|
|
public TagReader(AXmlParser parser, string input): base(input) |
|
{ |
|
this.parser = parser; |
|
this.trackedSegments = parser.TrackedSegments; |
|
this.input = input; |
|
} |
|
|
|
bool TryReadFromCacheOrNew<T>(out T res) where T: AXmlObject, new() |
|
{ |
|
return TryReadFromCacheOrNew(out res, t => true); |
|
} |
|
|
|
bool TryReadFromCacheOrNew<T>(out T res, Predicate<T> condition) where T: AXmlObject, new() |
|
{ |
|
T cached = trackedSegments.GetCachedObject<T>(this.CurrentLocation, 0, condition); |
|
if (cached != null) { |
|
Skip(cached.Length); |
|
AXmlParser.Assert(cached.Length > 0, "cached elements must not have zero length"); |
|
res = cached; |
|
return true; |
|
} else { |
|
res = new T(); |
|
return false; |
|
} |
|
} |
|
|
|
void OnParsed(AXmlObject obj) |
|
{ |
|
AXmlParser.Log("Parsed {0}", obj); |
|
trackedSegments.AddParsedObject(obj, this.MaxTouchedLocation > this.CurrentLocation ? (int?)this.MaxTouchedLocation : null); |
|
} |
|
|
|
/// <summary> |
|
/// Read all tags in the document in a flat sequence. |
|
/// It also includes the text between tags and possibly some properly nested Elements from cache. |
|
/// </summary> |
|
public List<AXmlObject> ReadAllTags() |
|
{ |
|
List<AXmlObject> stream = new List<AXmlObject>(); |
|
|
|
while(true) { |
|
if (IsEndOfFile()) { |
|
break; |
|
} else if (TryPeek('<')) { |
|
AXmlElement elem; |
|
if (TryReadFromCacheOrNew(out elem, e => e.IsProperlyNested)) { |
|
stream.Add(elem); |
|
} else { |
|
stream.Add(ReadTag()); |
|
} |
|
} else { |
|
stream.AddRange(ReadText(TextType.CharacterData)); |
|
} |
|
} |
|
|
|
return stream; |
|
} |
|
|
|
/// <summary> |
|
/// Context: "<" |
|
/// </summary> |
|
AXmlTag ReadTag() |
|
{ |
|
AssertHasMoreData(); |
|
|
|
AXmlTag tag; |
|
if (TryReadFromCacheOrNew(out tag)) return tag; |
|
|
|
tag.StartOffset = this.CurrentLocation; |
|
|
|
// Read the opening bracket |
|
// It identifies the type of tag and parsing behavior for the rest of it |
|
tag.OpeningBracket = ReadOpeningBracket(); |
|
|
|
if (tag.IsUnknownBang && !TryPeekWhiteSpace()) |
|
OnSyntaxError(tag, tag.StartOffset, this.CurrentLocation, "Unknown tag"); |
|
|
|
if (tag.IsStartOrEmptyTag || tag.IsEndTag || tag.IsProcessingInstruction) { |
|
// Read the name |
|
string name; |
|
if (TryReadName(out name)) { |
|
if (!IsValidName(name)) { |
|
OnSyntaxError(tag, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); |
|
} |
|
} else { |
|
OnSyntaxError(tag, "Element name expected"); |
|
} |
|
tag.Name = name; |
|
} else { |
|
tag.Name = string.Empty; |
|
} |
|
|
|
bool isXmlDeclr = tag.StartOffset == 0 && tag.Name == "xml"; |
|
|
|
if (tag.IsStartOrEmptyTag || tag.IsEndTag || isXmlDeclr) { |
|
// Read attributes for the tag |
|
while(true) { |
|
// Chech for all forbiden 'name' charcters first - see ReadName |
|
if (IsEndOfFile()) break; |
|
if (TryPeekWhiteSpace()) { |
|
tag.AddChildren(ReadText(TextType.WhiteSpace)); |
|
continue; // End of file might be next |
|
} |
|
if (TryPeek('<')) break; |
|
string endBr; |
|
int endBrStart = this.CurrentLocation; // Just peek |
|
if (TryReadClosingBracket(out endBr)) { // End tag |
|
GoBack(endBrStart); |
|
break; |
|
} |
|
|
|
// We have "=\'\"" or name - read attribute |
|
AXmlAttribute attr = ReadAttribulte(); |
|
tag.AddChild(attr); |
|
if (tag.IsEndTag) |
|
OnSyntaxError(tag, attr.StartOffset, attr.EndOffset, "Attribute not allowed in end tag."); |
|
} |
|
} else if (tag.IsDocumentType) { |
|
tag.AddChildren(ReadContentOfDTD()); |
|
} else { |
|
int start = this.CurrentLocation; |
|
IEnumerable<AXmlObject> text; |
|
if (tag.IsComment) { |
|
text = ReadText(TextType.Comment); |
|
} else if (tag.IsCData) { |
|
text = ReadText(TextType.CData); |
|
} else if (tag.IsProcessingInstruction) { |
|
text = ReadText(TextType.ProcessingInstruction); |
|
} else if (tag.IsUnknownBang) { |
|
text = ReadText(TextType.UnknownBang); |
|
} else { |
|
throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); |
|
} |
|
// Enumerate |
|
text = text.ToList(); |
|
// Backtrack at complete start |
|
if (IsEndOfFile() || (tag.IsUnknownBang && TryPeek('<'))) { |
|
GoBack(start); |
|
} else { |
|
tag.AddChildren(text); |
|
} |
|
} |
|
|
|
// Read closing bracket |
|
string bracket; |
|
TryReadClosingBracket(out bracket); |
|
tag.ClosingBracket = bracket; |
|
|
|
// Error check |
|
int brStart = this.CurrentLocation - (tag.ClosingBracket ?? string.Empty).Length; |
|
int brEnd = this.CurrentLocation; |
|
if (tag.Name == null) { |
|
// One error was reported already |
|
} else if (tag.IsStartOrEmptyTag) { |
|
if (tag.ClosingBracket != ">" && tag.ClosingBracket != "/>") OnSyntaxError(tag, brStart, brEnd, "'>' or '/>' expected"); |
|
} else if (tag.IsEndTag) { |
|
if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); |
|
} else if (tag.IsComment) { |
|
if (tag.ClosingBracket != "-->") OnSyntaxError(tag, brStart, brEnd, "'-->' expected"); |
|
} else if (tag.IsCData) { |
|
if (tag.ClosingBracket != "]]>") OnSyntaxError(tag, brStart, brEnd, "']]>' expected"); |
|
} else if (tag.IsProcessingInstruction) { |
|
if (tag.ClosingBracket != "?>") OnSyntaxError(tag, brStart, brEnd, "'?>' expected"); |
|
} else if (tag.IsUnknownBang) { |
|
if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); |
|
} else if (tag.IsDocumentType) { |
|
if (tag.ClosingBracket != ">") OnSyntaxError(tag, brStart, brEnd, "'>' expected"); |
|
} else { |
|
throw new InternalException(string.Format(CultureInfo.InvariantCulture, "Unknown opening bracket '{0}'", tag.OpeningBracket)); |
|
} |
|
|
|
// Attribute name may not apper multiple times |
|
var duplicates = tag.Children.OfType<AXmlAttribute>().GroupBy(attr => attr.Name).SelectMany(g => g.Skip(1)); |
|
foreach(AXmlAttribute attr in duplicates) { |
|
OnSyntaxError(tag, attr.StartOffset, attr.EndOffset, "Attribute with name '{0}' already exists", attr.Name); |
|
} |
|
|
|
tag.EndOffset = this.CurrentLocation; |
|
|
|
OnParsed(tag); |
|
return tag; |
|
} |
|
|
|
/// <summary> |
|
/// Reads any of the know opening brackets. (only full bracket) |
|
/// Context: "<" |
|
/// </summary> |
|
string ReadOpeningBracket() |
|
{ |
|
// We are using a lot of string literals so that the memory instances are shared |
|
//int start = this.CurrentLocation; |
|
if (TryRead('<')) { |
|
if (TryRead('/')) { |
|
return "</"; |
|
} else if (TryRead('?')) { |
|
return "<?"; |
|
} else if (TryRead('!')) { |
|
if (TryRead("--")) { |
|
return "<!--"; |
|
} else if (TryRead("[CDATA[")) { |
|
return "<![CDATA["; |
|
} else { |
|
foreach(string dtdName in AXmlTag.DtdNames) { |
|
// the dtdName includes "<!" |
|
if (TryRead(dtdName.Remove(0, 2))) return dtdName; |
|
} |
|
return "<!"; |
|
} |
|
} else { |
|
return "<"; |
|
} |
|
} else { |
|
throw new InternalException("'<' expected"); |
|
} |
|
} |
|
|
|
/// <summary> |
|
/// Reads any of the know closing brackets. (only full bracket) |
|
/// Context: any |
|
/// </summary> |
|
bool TryReadClosingBracket(out string bracket) |
|
{ |
|
// We are using a lot of string literals so that the memory instances are shared |
|
if (TryRead('>')) { |
|
bracket = ">"; |
|
} else if (TryRead("/>")) { |
|
bracket = "/>"; |
|
} else if (TryRead("?>")) { |
|
bracket = "?>"; |
|
} else if (TryRead("-->")) { |
|
bracket = "-->"; |
|
} else if (TryRead("]]>")) { |
|
bracket = "]]>"; |
|
} else { |
|
bracket = string.Empty; |
|
return false; |
|
} |
|
return true; |
|
} |
|
|
|
IEnumerable<AXmlObject> ReadContentOfDTD() |
|
{ |
|
int start = this.CurrentLocation; |
|
while(true) { |
|
if (IsEndOfFile()) break; // End of file |
|
TryMoveToNonWhiteSpace(); // Skip whitespace |
|
if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string TODO: Bug |
|
if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string |
|
if (TryRead('[')) { // Start of nested infoset |
|
// Reading infoset |
|
while(true) { |
|
if (IsEndOfFile()) break; |
|
TryMoveToAnyOf('<', ']'); |
|
if (TryPeek('<')) { |
|
if (start != this.CurrentLocation) { // Two following tags |
|
yield return MakeText(start, this.CurrentLocation); |
|
} |
|
yield return ReadTag(); |
|
start = this.CurrentLocation; |
|
} |
|
if (TryPeek(']')) break; |
|
} |
|
} |
|
TryRead(']'); // End of nested infoset |
|
if (TryPeek('>')) break; // Proper closing |
|
if (TryPeek('<')) break; // Malformed XML |
|
TryMoveNext(); // Skip anything else |
|
} |
|
if (start != this.CurrentLocation) { |
|
yield return MakeText(start, this.CurrentLocation); |
|
} |
|
} |
|
|
|
/// <summary> |
|
/// Context: name or "=\'\"" |
|
/// </summary> |
|
AXmlAttribute ReadAttribulte() |
|
{ |
|
AssertHasMoreData(); |
|
|
|
AXmlAttribute attr; |
|
if (TryReadFromCacheOrNew(out attr)) return attr; |
|
|
|
attr.StartOffset = this.CurrentLocation; |
|
|
|
// Read name |
|
string name; |
|
if (TryReadName(out name)) { |
|
if (!IsValidName(name)) { |
|
OnSyntaxError(attr, this.CurrentLocation - name.Length, this.CurrentLocation, "The name '{0}' is invalid", name); |
|
} |
|
} else { |
|
OnSyntaxError(attr, "Attribute name expected"); |
|
} |
|
attr.Name = name; |
|
|
|
// Read equals sign and surrounding whitespace |
|
int checkpoint = this.CurrentLocation; |
|
TryMoveToNonWhiteSpace(); |
|
if (TryRead('=')) { |
|
int chk2 = this.CurrentLocation; |
|
TryMoveToNonWhiteSpace(); |
|
if (!TryPeek('"') && !TryPeek('\'')) { |
|
// Do not read whitespace if quote does not follow |
|
GoBack(chk2); |
|
} |
|
attr.EqualsSign = GetText(checkpoint, this.CurrentLocation); |
|
} else { |
|
GoBack(checkpoint); |
|
OnSyntaxError(attr, "'=' expected"); |
|
attr.EqualsSign = string.Empty; |
|
} |
|
|
|
// Read attribute value |
|
int start = this.CurrentLocation; |
|
char quoteChar = TryPeek('"') ? '"' : '\''; |
|
bool startsWithQuote; |
|
if (TryRead(quoteChar)) { |
|
startsWithQuote = true; |
|
int valueStart = this.CurrentLocation; |
|
TryMoveToAnyOf(quoteChar, '<'); |
|
if (TryRead(quoteChar)) { |
|
if (!TryPeekAnyOf(' ', '\t', '\n', '\r', '/', '>', '?')) { |
|
if (TryPeekPrevious('=', 2) || (TryPeekPrevious('=', 3) && TryPeekPrevious(' ', 2))) { |
|
// This actually most likely means that we are in the next attribute value |
|
GoBack(valueStart); |
|
ReadAttributeValue(quoteChar); |
|
if (TryRead(quoteChar)) { |
|
OnSyntaxError(attr, "White space or end of tag expected"); |
|
} else { |
|
OnSyntaxError(attr, "Quote {0} expected (or add whitespace after the following one)", quoteChar); |
|
} |
|
} else { |
|
OnSyntaxError(attr, "White space or end of tag expected"); |
|
} |
|
} |
|
} else { |
|
// '<' or end of file |
|
GoBack(valueStart); |
|
ReadAttributeValue(quoteChar); |
|
OnSyntaxError(attr, "Quote {0} expected", quoteChar); |
|
} |
|
} else { |
|
startsWithQuote = false; |
|
int valueStart = this.CurrentLocation; |
|
ReadAttributeValue(null); |
|
TryRead('\"'); |
|
TryRead('\''); |
|
if (valueStart == this.CurrentLocation) { |
|
OnSyntaxError(attr, "Attribute value expected"); |
|
} else { |
|
OnSyntaxError(attr, valueStart, this.CurrentLocation, "Attribute value must be quoted"); |
|
} |
|
} |
|
attr.QuotedValue = GetText(start, this.CurrentLocation); |
|
attr.Value = Unquote(attr.QuotedValue); |
|
attr.Value = Dereference(attr, attr.Value, startsWithQuote ? start + 1 : start); |
|
|
|
attr.EndOffset = this.CurrentLocation; |
|
|
|
OnParsed(attr); |
|
return attr; |
|
} |
|
|
|
/// <summary> |
|
/// Read everything up to quote (excluding), opening/closing tag or attribute signature |
|
/// </summary> |
|
void ReadAttributeValue(char? quote) |
|
{ |
|
while(true) { |
|
if (IsEndOfFile()) return; |
|
// What is next? |
|
int start = this.CurrentLocation; |
|
TryMoveToNonWhiteSpace(); // Read white space (if any) |
|
if (quote.HasValue) { |
|
if (TryPeek(quote.Value)) return; |
|
} else { |
|
if (TryPeek('"') || TryPeek('\'')) return; |
|
} |
|
// Opening/closing tag |
|
string endBr; |
|
if (TryPeek('<') || TryReadClosingBracket(out endBr)) { |
|
GoBack(start); |
|
return; |
|
} |
|
// Try reading attribute signature |
|
string name; |
|
if (TryReadName(out name)) { |
|
int nameEnd = this.CurrentLocation; |
|
if (TryMoveToNonWhiteSpace() && TryRead("=") && |
|
TryMoveToNonWhiteSpace() && TryPeekAnyOf('"', '\'')) |
|
{ |
|
// Start of attribute. Great |
|
GoBack(start); |
|
return; // Done |
|
} else { |
|
// Just some gargabe - make it part of the value |
|
GoBack(nameEnd); |
|
continue; // Read more |
|
} |
|
} |
|
TryMoveNext(); // Accept everyting else |
|
} |
|
} |
|
|
|
AXmlText MakeText(int start, int end) |
|
{ |
|
AXmlParser.DebugAssert(end > start, "Empty text"); |
|
|
|
AXmlText text = new AXmlText() { |
|
StartOffset = start, |
|
EndOffset = end, |
|
EscapedValue = GetText(start, end), |
|
Type = TextType.Other |
|
}; |
|
|
|
OnParsed(text); |
|
return text; |
|
} |
|
|
|
const int maxEntityLength = 16; // The longest build-in one is 10 ("") |
|
const int maxTextFragmentSize = 64; |
|
const int lookAheadLength = (3 * maxTextFragmentSize) / 2; // More so that we do not get small "what was inserted" fragments |
|
|
|
/// <summary> |
|
/// Reads text and optionaly separates it into fragments. |
|
/// It can also return empty set for no appropriate text input. |
|
/// Make sure you enumerate it only once |
|
/// </summary> |
|
IEnumerable<AXmlObject> ReadText(TextType type) |
|
{ |
|
bool lookahead = false; |
|
while(true) { |
|
AXmlText text; |
|
if (TryReadFromCacheOrNew(out text, t => t.Type == type)) { |
|
// Cached text found |
|
yield return text; |
|
continue; // Read next fragment; the method can handle "no text left" |
|
} |
|
text.Type = type; |
|
|
|
// Limit the reading to just a few characters |
|
// (the first character not to be read) |
|
int fragmentEnd = Math.Min(this.CurrentLocation + maxTextFragmentSize, this.InputLength); |
|
|
|
// Look if some futher text has been already processed and align so that |
|
// we hit that chache point. It is expensive so it is off for the first run |
|
if (lookahead) { |
|
// Note: Must fit entity |
|
AXmlObject nextFragment = trackedSegments.GetCachedObject<AXmlText>(this.CurrentLocation + maxEntityLength, lookAheadLength - maxEntityLength, t => t.Type == type); |
|
if (nextFragment != null) { |
|
fragmentEnd = Math.Min(nextFragment.StartOffset, this.InputLength); |
|
AXmlParser.Log("Parsing only text ({0}-{1}) because later text was already processed", this.CurrentLocation, fragmentEnd); |
|
} |
|
} |
|
lookahead = true; |
|
|
|
text.StartOffset = this.CurrentLocation; |
|
int start = this.CurrentLocation; |
|
|
|
// Whitespace would be skipped anyway by any operation |
|
TryMoveToNonWhiteSpace(fragmentEnd); |
|
int wsEnd = this.CurrentLocation; |
|
|
|
// Try move to the terminator given by the context |
|
if (type == TextType.WhiteSpace) { |
|
TryMoveToNonWhiteSpace(fragmentEnd); |
|
} else if (type == TextType.CharacterData) { |
|
while(true) { |
|
if (!TryMoveToAnyOf(new char[] {'<', ']'}, fragmentEnd)) break; // End of fragment |
|
if (TryPeek('<')) break; |
|
if (TryPeek(']')) { |
|
if (TryPeek("]]>")) { |
|
OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 3, "']]>' is not allowed in text"); |
|
} |
|
TryMoveNext(); |
|
continue; |
|
} |
|
throw new Exception("Infinite loop"); |
|
} |
|
} else if (type == TextType.Comment) { |
|
// Do not report too many errors |
|
bool errorReported = false; |
|
while(true) { |
|
if (!TryMoveTo('-', fragmentEnd)) break; // End of fragment |
|
if (TryPeek("-->")) break; |
|
if (TryPeek("--") && !errorReported) { |
|
OnSyntaxError(text, this.CurrentLocation, this.CurrentLocation + 2, "'--' is not allowed in comment"); |
|
errorReported = true; |
|
} |
|
TryMoveNext(); |
|
} |
|
} else if (type == TextType.CData) { |
|
while(true) { |
|
// We can not use use TryMoveTo("]]>", fragmentEnd) because it may incorectly accept "]" at the end of fragment |
|
if (!TryMoveTo(']', fragmentEnd)) break; // End of fragment |
|
if (TryPeek("]]>")) break; |
|
TryMoveNext(); |
|
} |
|
} else if (type == TextType.ProcessingInstruction) { |
|
while(true) { |
|
if (!TryMoveTo('?', fragmentEnd)) break; // End of fragment |
|
if (TryPeek("?>")) break; |
|
TryMoveNext(); |
|
} |
|
} else if (type == TextType.UnknownBang) { |
|
TryMoveToAnyOf(new char[] {'<', '>'}, fragmentEnd); |
|
} else { |
|
throw new Exception("Uknown type " + type); |
|
} |
|
|
|
text.ContainsOnlyWhitespace = (wsEnd == this.CurrentLocation); |
|
|
|
// Terminal found or real end was reached; |
|
bool finished = this.CurrentLocation < fragmentEnd || IsEndOfFile(); |
|
|
|
if (!finished) { |
|
// We have to continue reading more text fragments |
|
|
|
// If there is entity reference, make sure the next segment starts with it to prevent framentation |
|
int entitySearchStart = Math.Max(start + 1 /* data for us */, this.CurrentLocation - maxEntityLength); |
|
int entitySearchLength = this.CurrentLocation - entitySearchStart; |
|
if (entitySearchLength > 0) { |
|
// Note that LastIndexOf works backward |
|
int entityIndex = input.LastIndexOf('&', this.CurrentLocation - 1, entitySearchLength); |
|
if (entityIndex != -1) { |
|
GoBack(entityIndex); |
|
} |
|
} |
|
} |
|
|
|
text.EscapedValue = GetText(start, this.CurrentLocation); |
|
if (type == TextType.CharacterData) { |
|
// Normalize end of line first |
|
text.Value = Dereference(text, NormalizeEndOfLine(text.EscapedValue), start); |
|
} else { |
|
text.Value = text.EscapedValue; |
|
} |
|
text.EndOffset = this.CurrentLocation; |
|
|
|
if (text.EscapedValue.Length > 0) { |
|
OnParsed(text); |
|
yield return text; |
|
} |
|
|
|
if (finished) { |
|
yield break; |
|
} |
|
} |
|
} |
|
|
|
#region Helper methods |
|
|
|
void OnSyntaxError(AXmlObject obj, string message, params object[] args) |
|
{ |
|
OnSyntaxError(obj, this.CurrentLocation, this.CurrentLocation + 1, message, args); |
|
} |
|
|
|
public static void OnSyntaxError(AXmlObject obj, int start, int end, string message, params object[] args) |
|
{ |
|
if (end <= start) end = start + 1; |
|
string formattedMessage = string.Format(CultureInfo.InvariantCulture, message, args); |
|
AXmlParser.Log("Syntax error ({0}-{1}): {2}", start, end, formattedMessage); |
|
obj.AddSyntaxError(new SyntaxError() { |
|
Object = obj, |
|
StartOffset = start, |
|
EndOffset = end, |
|
Message = formattedMessage, |
|
}); |
|
} |
|
|
|
static bool IsValidName(string name) |
|
{ |
|
try { |
|
System.Xml.XmlConvert.VerifyName(name); |
|
return true; |
|
} catch (System.Xml.XmlException) { |
|
return false; |
|
} |
|
} |
|
|
|
/// <summary> Remove quoting from the given string </summary> |
|
static string Unquote(string quoted) |
|
{ |
|
if (string.IsNullOrEmpty(quoted)) return string.Empty; |
|
char first = quoted[0]; |
|
if (quoted.Length == 1) return (first == '"' || first == '\'') ? string.Empty : quoted; |
|
char last = quoted[quoted.Length - 1]; |
|
if (first == '"' || first == '\'') { |
|
if (first == last) { |
|
// Remove both quotes |
|
return quoted.Substring(1, quoted.Length - 2); |
|
} else { |
|
// Remove first quote |
|
return quoted.Remove(0, 1); |
|
} |
|
} else { |
|
if (last == '"' || last == '\'') { |
|
// Remove last quote |
|
return quoted.Substring(0, quoted.Length - 1); |
|
} else { |
|
// Keep whole string |
|
return quoted; |
|
} |
|
} |
|
} |
|
|
|
static string NormalizeEndOfLine(string text) |
|
{ |
|
return text.Replace("\r\n", "\n").Replace("\r", "\n"); |
|
} |
|
|
|
string Dereference(AXmlObject owner, string text, int textLocation) |
|
{ |
|
StringBuilder sb = null; // The dereferenced text so far (all up to 'curr') |
|
int curr = 0; |
|
while(true) { |
|
// Reached end of input |
|
if (curr == text.Length) { |
|
if (sb != null) { |
|
return sb.ToString(); |
|
} else { |
|
return text; |
|
} |
|
} |
|
|
|
// Try to find reference |
|
int start = text.IndexOf('&', curr); |
|
|
|
// No more references found |
|
if (start == -1) { |
|
if (sb != null) { |
|
sb.Append(text, curr, text.Length - curr); // Add rest |
|
return sb.ToString(); |
|
} else { |
|
return text; |
|
} |
|
} |
|
|
|
// Append text before the enitiy reference |
|
if (sb == null) sb = new StringBuilder(text.Length); |
|
sb.Append(text, curr, start - curr); |
|
curr = start; |
|
|
|
// Process the entity |
|
int errorLoc = textLocation + sb.Length; |
|
|
|
// Find entity name |
|
int end = text.IndexOfAny(new char[] {'&', ';'}, start + 1, Math.Min(maxEntityLength, text.Length - (start + 1))); |
|
if (end == -1 || text[end] == '&') { |
|
// Not found |
|
OnSyntaxError(owner, errorLoc, errorLoc + 1, "Entity reference must be terminated with ';'"); |
|
// Keep '&' |
|
sb.Append('&'); |
|
curr++; |
|
continue; // Restart and next character location |
|
} |
|
string name = text.Substring(start + 1, end - (start + 1)); |
|
|
|
// Resolve the name |
|
string replacement; |
|
if (name.Length == 0) { |
|
replacement = null; |
|
OnSyntaxError(owner, errorLoc + 1, errorLoc + 1, "Entity name expected"); |
|
} else if (name == "amp") { |
|
replacement = "&"; |
|
} else if (name == "lt") { |
|
replacement = "<"; |
|
} else if (name == "gt") { |
|
replacement = ">"; |
|
} else if (name == "apos") { |
|
replacement = "'"; |
|
} else if (name == "quot") { |
|
replacement = "\""; |
|
} else if (name.Length > 0 && name[0] == '#') { |
|
int num; |
|
if (name.Length > 1 && name[1] == 'x') { |
|
if (!int.TryParse(name.Substring(2), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture.NumberFormat, out num)) { |
|
num = -1; |
|
OnSyntaxError(owner, errorLoc + 3, errorLoc + 1 + name.Length, "Hexadecimal code of unicode character expected"); |
|
} |
|
} else { |
|
if (!int.TryParse(name.Substring(1), NumberStyles.None, CultureInfo.InvariantCulture.NumberFormat, out num)) { |
|
num = -1; |
|
OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Numeric code of unicode character expected"); |
|
} |
|
} |
|
if (num != -1) { |
|
try { |
|
replacement = char.ConvertFromUtf32(num); |
|
} catch (ArgumentOutOfRangeException) { |
|
replacement = null; |
|
OnSyntaxError(owner, errorLoc + 2, errorLoc + 1 + name.Length, "Invalid unicode character U+{0:X} ({0})", num); |
|
} |
|
} else { |
|
replacement = null; |
|
} |
|
} else if (!IsValidName(name)) { |
|
replacement = null; |
|
OnSyntaxError(owner, errorLoc + 1, errorLoc + 1, "Invalid entity name"); |
|
} else { |
|
replacement = null; |
|
if (parser.UnknownEntityReferenceIsError) { |
|
OnSyntaxError(owner, errorLoc, errorLoc + 1 + name.Length + 1, "Unknown entity reference '{0}'", name); |
|
} |
|
} |
|
|
|
// Append the replacement to output |
|
if (replacement != null) { |
|
sb.Append(replacement); |
|
} else { |
|
sb.Append('&'); |
|
sb.Append(name); |
|
sb.Append(';'); |
|
} |
|
curr = end + 1; |
|
continue; |
|
} |
|
} |
|
|
|
#endregion |
|
} |
|
}
|
|
|