|
|
@ -59,7 +59,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
/// <![CDATA[
|
|
|
|
/// <![CDATA[
|
|
|
|
/// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
|
|
|
|
/// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
|
|
|
|
/// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
|
|
|
|
/// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
|
|
|
|
/// P.instr.: "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
|
|
|
|
/// P.instr.: "<?" Name? (RawText)* "?>"
|
|
|
|
/// Comment: "<!--" (RawText)* "-->"
|
|
|
|
/// Comment: "<!--" (RawText)* "-->"
|
|
|
|
/// CData: "<![CDATA[" (RawText)* "]]" ">"
|
|
|
|
/// CData: "<![CDATA[" (RawText)* "]]" ">"
|
|
|
|
/// DTD: "<!DOCTYPE" (RawText+ RawTag)* RawText* ">" (DOCTYPE or other DTD names)
|
|
|
|
/// DTD: "<!DOCTYPE" (RawText+ RawTag)* RawText* ">" (DOCTYPE or other DTD names)
|
|
|
@ -74,6 +74,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
///
|
|
|
|
///
|
|
|
|
/// Note that there can always be multiple consequtive RawText nodes.
|
|
|
|
/// Note that there can always be multiple consequtive RawText nodes.
|
|
|
|
/// This is to ensure that idividual texts are not too long.
|
|
|
|
/// This is to ensure that idividual texts are not too long.
|
|
|
|
|
|
|
|
///
|
|
|
|
|
|
|
|
/// XML Spec: http://www.w3.org/TR/xml/
|
|
|
|
|
|
|
|
/// XML EBNF: http://www.jelks.nu/XML/xmlebnf.html
|
|
|
|
|
|
|
|
///
|
|
|
|
|
|
|
|
/// Internals:
|
|
|
|
|
|
|
|
///
|
|
|
|
|
|
|
|
/// "Try" methods can silently fail by returning false.
|
|
|
|
|
|
|
|
/// MoveTo methods do not move if they are already at the given target
|
|
|
|
|
|
|
|
/// If methods return some object, it must be no-empty. It is up to the caller to ensure
|
|
|
|
|
|
|
|
/// the context is appropriate for reading.
|
|
|
|
|
|
|
|
///
|
|
|
|
/// </remarks>
|
|
|
|
/// </remarks>
|
|
|
|
public class XmlParser |
|
|
|
public class XmlParser |
|
|
|
{ |
|
|
|
{ |
|
|
@ -163,6 +174,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
|
|
|
|
|
|
|
|
void OnParsed(RawObject obj) |
|
|
|
void OnParsed(RawObject obj) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
if (obj.Length == 0 && !(obj is RawDocument)) { |
|
|
|
|
|
|
|
throw new Exception(string.Format("Could not parse {0}. It has zero length.", obj)); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
// TODO: Record touched memory
|
|
|
|
parsedItems.Add(obj); |
|
|
|
parsedItems.Add(obj); |
|
|
|
System.Diagnostics.Debug.WriteLine("XML Parser: Parsed " + obj.ToString()); |
|
|
|
System.Diagnostics.Debug.WriteLine("XML Parser: Parsed " + obj.ToString()); |
|
|
|
} |
|
|
|
} |
|
|
@ -222,15 +237,6 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// The methods start with 'try' to make it clear they can silently fail.
|
|
|
|
|
|
|
|
// Read methods without 'try' have to succed or throw exception.
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
// For example:
|
|
|
|
|
|
|
|
// while(true) TryMoveNext(); is obviously infinite loop
|
|
|
|
|
|
|
|
// whereas
|
|
|
|
|
|
|
|
// while(true) MoveNext(); should eventulay throw exception (if MoveNext it existed)
|
|
|
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool TryMoveNext() |
|
|
|
bool TryMoveNext() |
|
|
|
{ |
|
|
|
{ |
|
|
|
if (currentLocation == readingEnd) return false; |
|
|
|
if (currentLocation == readingEnd) return false; |
|
|
@ -330,7 +336,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static char[] WhiteSpaceChars = new char[] {' ', '\n', '\r', '\t'}; |
|
|
|
static char[] WhiteSpaceChars = new char[] {' ', '\n', '\r', '\t'}; |
|
|
|
static char[] WhiteSpaceAndReservedChars = new char[] {' ', '\n', '\r', '\t', '<', '=', '>', '/', '?'}; |
|
|
|
static char[] WhiteSpaceAndReservedChars = new char[] {' ', '\n', '\r', '\t', '=', '\'', '"', '<', '>', '/', '?'}; |
|
|
|
|
|
|
|
|
|
|
|
bool TryPeekWhiteSpace() |
|
|
|
bool TryPeekWhiteSpace() |
|
|
|
{ |
|
|
|
{ |
|
|
@ -339,20 +345,41 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
return WhiteSpaceChars.Contains(input[currentLocation]); |
|
|
|
return WhiteSpaceChars.Contains(input[currentLocation]); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
string ReadName() |
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
|
/// Read a name token.
|
|
|
|
|
|
|
|
/// The following characters are not allowed:
|
|
|
|
|
|
|
|
/// "" End of file
|
|
|
|
|
|
|
|
/// " \n\r\t" Whitesapce
|
|
|
|
|
|
|
|
/// "=\'\"" Attribute value
|
|
|
|
|
|
|
|
/// "<" Openning Tag
|
|
|
|
|
|
|
|
/// ">/?" Closing Tag
|
|
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
|
|
bool TryReadName(out string res) |
|
|
|
{ |
|
|
|
{ |
|
|
|
AssertHasMoreData(); |
|
|
|
AssertHasMoreData(); |
|
|
|
|
|
|
|
|
|
|
|
int start = currentLocation; |
|
|
|
int start = currentLocation; |
|
|
|
TryMoveToAnyOf(WhiteSpaceAndReservedChars.ToArray()); |
|
|
|
TryMoveToAnyOf(WhiteSpaceAndReservedChars.ToArray()); |
|
|
|
return GetText(start, currentLocation); |
|
|
|
if (start == currentLocation) { |
|
|
|
|
|
|
|
res = null; |
|
|
|
|
|
|
|
return false; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
res = GetText(start, currentLocation); |
|
|
|
|
|
|
|
// TODO: Check that it is valid XML name
|
|
|
|
|
|
|
|
return true; |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
|
/// Context: any
|
|
|
|
|
|
|
|
/// </summary>
|
|
|
|
RawDocument ReadDocument() |
|
|
|
RawDocument ReadDocument() |
|
|
|
{ |
|
|
|
{ |
|
|
|
RawDocument doc; |
|
|
|
RawDocument doc; |
|
|
|
if (TryReadFromCacheOrNew(out doc)) return doc; |
|
|
|
if (TryReadFromCacheOrNew(out doc)) return doc; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: Errors in document structure
|
|
|
|
doc.StartOffset = currentLocation; |
|
|
|
doc.StartOffset = currentLocation; |
|
|
|
while(true) { |
|
|
|
while(true) { |
|
|
|
if (IsEndOfFile()) { |
|
|
|
if (IsEndOfFile()) { |
|
|
@ -369,6 +396,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
return doc; |
|
|
|
return doc; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
|
/// Context: "<"
|
|
|
|
|
|
|
|
/// </summary>
|
|
|
|
RawObject ReadElementOrTag() |
|
|
|
RawObject ReadElementOrTag() |
|
|
|
{ |
|
|
|
{ |
|
|
|
AssertHasMoreData(); |
|
|
|
AssertHasMoreData(); |
|
|
@ -382,6 +412,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
|
/// Context: "<"
|
|
|
|
|
|
|
|
/// </summary>
|
|
|
|
RawElement ReadElement() |
|
|
|
RawElement ReadElement() |
|
|
|
{ |
|
|
|
{ |
|
|
|
AssertHasMoreData(); |
|
|
|
AssertHasMoreData(); |
|
|
@ -393,7 +426,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
// Read start tag
|
|
|
|
// Read start tag
|
|
|
|
element.AddChild(ReadTag()); |
|
|
|
element.AddChild(ReadTag()); |
|
|
|
Debug.Assert(element.StartTag.IsStartTag); |
|
|
|
Debug.Assert(element.StartTag.IsStartTag); |
|
|
|
// Read content and end tag
|
|
|
|
// Read content and end tag (only if properly closed)
|
|
|
|
if (element.StartTag.ClosingBracket == ">") { |
|
|
|
if (element.StartTag.ClosingBracket == ">") { |
|
|
|
while(true) { |
|
|
|
while(true) { |
|
|
|
if (IsEndOfFile()) { |
|
|
|
if (IsEndOfFile()) { |
|
|
@ -408,11 +441,18 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
element.EndOffset = currentLocation; |
|
|
|
element.EndOffset = currentLocation; |
|
|
|
|
|
|
|
// TODO: Closing tag matches
|
|
|
|
|
|
|
|
// TODO: Heuristic on closing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: ERROR - attribute name may not apper multiple times
|
|
|
|
|
|
|
|
|
|
|
|
OnParsed(element); |
|
|
|
OnParsed(element); |
|
|
|
return element; |
|
|
|
return element; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
|
/// Context: "<"
|
|
|
|
|
|
|
|
/// </summary>
|
|
|
|
RawTag ReadTag() |
|
|
|
RawTag ReadTag() |
|
|
|
{ |
|
|
|
{ |
|
|
|
AssertHasMoreData(); |
|
|
|
AssertHasMoreData(); |
|
|
@ -426,27 +466,25 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
// It identifies the type of tag and parsing behavior for the rest of it
|
|
|
|
// It identifies the type of tag and parsing behavior for the rest of it
|
|
|
|
tag.OpeningBracket = ReadOpeningBracket(); |
|
|
|
tag.OpeningBracket = ReadOpeningBracket(); |
|
|
|
|
|
|
|
|
|
|
|
if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) { |
|
|
|
if (tag.IsStartTag || tag.IsEndTag) { |
|
|
|
// Read the name
|
|
|
|
// Read the name
|
|
|
|
if (HasMoreData()) { |
|
|
|
string name; |
|
|
|
tag.Name = ReadName(); |
|
|
|
if (TryReadName(out name)) tag.Name = name; |
|
|
|
} |
|
|
|
// TODO: Error - bad name
|
|
|
|
|
|
|
|
// TODO: Error - no name?
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: Error - = or " or ' not expected
|
|
|
|
|
|
|
|
|
|
|
|
// Read attributes for the tag
|
|
|
|
// Read attributes for the tag
|
|
|
|
while(true) { |
|
|
|
while(true) { |
|
|
|
if (TryPeekWhiteSpace()) { |
|
|
|
// Chech for all forbiden 'name' charcters first - see ReadName
|
|
|
|
tag.AddChildren(ReadText(RawTextType.WhiteSpace)); |
|
|
|
if (IsEndOfFile()) break; |
|
|
|
} |
|
|
|
if (TryPeekWhiteSpace()) tag.AddChildren(ReadText(RawTextType.WhiteSpace)); |
|
|
|
string bracket; |
|
|
|
|
|
|
|
if (TryReadClosingBracket(out bracket)) { |
|
|
|
|
|
|
|
tag.ClosingBracket = bracket; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (TryPeek('<')) break; |
|
|
|
if (TryPeek('<')) break; |
|
|
|
if (HasMoreData()) { |
|
|
|
if (TryPeek('>') || TryPeek('/') || TryPeek('?')) break; // End tag
|
|
|
|
tag.AddChild(ReadAttribulte()); |
|
|
|
|
|
|
|
continue; |
|
|
|
// We have "=\'\"" or name - read attribute
|
|
|
|
} |
|
|
|
tag.AddChild(ReadAttribulte()); |
|
|
|
break; // End of file
|
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} else if (tag.IsComment) { |
|
|
|
} else if (tag.IsComment) { |
|
|
|
// TODO: Backtrack if file end reached
|
|
|
|
// TODO: Backtrack if file end reached
|
|
|
@ -454,22 +492,27 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} else if (tag.IsCData) { |
|
|
|
} else if (tag.IsCData) { |
|
|
|
// TODO: Backtrack if file end reached
|
|
|
|
// TODO: Backtrack if file end reached
|
|
|
|
tag.AddChildren(ReadText(RawTextType.CData)); |
|
|
|
tag.AddChildren(ReadText(RawTextType.CData)); |
|
|
|
|
|
|
|
} else if (tag.IsProcessingInstruction) { |
|
|
|
|
|
|
|
string name; |
|
|
|
|
|
|
|
if (TryReadName(out name)) tag.Name = name; |
|
|
|
|
|
|
|
// TODO: Error - bad name
|
|
|
|
|
|
|
|
// TODO: Error - no name?
|
|
|
|
|
|
|
|
// TODO: Backtrack if file end reached
|
|
|
|
|
|
|
|
tag.AddChildren(ReadText(RawTextType.ProcessingInstruction)); |
|
|
|
|
|
|
|
} else if (tag.IsUnknownBang) { |
|
|
|
|
|
|
|
// TODO: Backtack if '<' (or end of file)
|
|
|
|
|
|
|
|
tag.AddChildren(ReadText(RawTextType.UnknownBang)); |
|
|
|
} else if (tag.IsDocumentType) { |
|
|
|
} else if (tag.IsDocumentType) { |
|
|
|
tag.AddChildren(ReadContentOfDTD()); |
|
|
|
tag.AddChildren(ReadContentOfDTD()); |
|
|
|
} else if (tag.IsUnknownBang) { |
|
|
|
|
|
|
|
if (HasMoreData()) { |
|
|
|
|
|
|
|
int start = currentLocation; |
|
|
|
|
|
|
|
TryMoveToAnyOf('<', '>'); |
|
|
|
|
|
|
|
tag.AddChild(MakeText(start, currentLocation)); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket)); |
|
|
|
throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket)); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (tag.ClosingBracket == null) { |
|
|
|
// Read closing bracket
|
|
|
|
string bracket; |
|
|
|
string bracket; |
|
|
|
if (TryReadClosingBracket(out bracket)) tag.ClosingBracket = bracket; |
|
|
|
if (TryReadClosingBracket(out bracket)) tag.ClosingBracket = bracket; |
|
|
|
} |
|
|
|
// TODO: else ERROR - Missing closing bracket
|
|
|
|
|
|
|
|
// TODO: check correct closing bracket (special case if end of file)
|
|
|
|
|
|
|
|
|
|
|
|
tag.EndOffset = currentLocation; |
|
|
|
tag.EndOffset = currentLocation; |
|
|
|
|
|
|
|
|
|
|
@ -478,8 +521,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// <summary>
|
|
|
|
/// Reads any of the know opening brackets
|
|
|
|
/// Reads any of the know opening brackets. (only full bracket)
|
|
|
|
/// Also accepts them if they are incomplete; one charater is suffcient
|
|
|
|
/// Context: "<"
|
|
|
|
/// </summary>
|
|
|
|
/// </summary>
|
|
|
|
string ReadOpeningBracket() |
|
|
|
string ReadOpeningBracket() |
|
|
|
{ |
|
|
|
{ |
|
|
@ -500,6 +543,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
// the dtdName includes "<!"
|
|
|
|
// the dtdName includes "<!"
|
|
|
|
if (TryRead(dtdName.Remove(0, 2))) return dtdName; |
|
|
|
if (TryRead(dtdName.Remove(0, 2))) return dtdName; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// TODO: Error - unkown bang tag
|
|
|
|
return "<!"; |
|
|
|
return "<!"; |
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
} else { |
|
|
@ -511,31 +555,24 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
/// <summary>
|
|
|
|
/// Reads any of the know closing brackets
|
|
|
|
/// Reads any of the know closing brackets. (only full bracket)
|
|
|
|
/// Also accepts them if they are incomplete; one charater is suffcient
|
|
|
|
/// Context: any
|
|
|
|
/// </summary>
|
|
|
|
/// </summary>
|
|
|
|
bool TryReadClosingBracket(out string bracket) |
|
|
|
bool TryReadClosingBracket(out string bracket) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
// TODO: Touched memory
|
|
|
|
// We are using a lot of string literals so that the memory instances are shared
|
|
|
|
// We are using a lot of string literals so that the memory instances are shared
|
|
|
|
int start = currentLocation; |
|
|
|
int start = currentLocation; |
|
|
|
if (TryRead('>')) { |
|
|
|
if (TryRead('>')) { |
|
|
|
bracket = ">"; |
|
|
|
bracket = ">"; |
|
|
|
} else if (TryRead('/')) { |
|
|
|
} else if (TryRead("/>")) { |
|
|
|
if (TryRead('>')) { |
|
|
|
bracket = "/>"; |
|
|
|
bracket = "/>"; |
|
|
|
} else if (TryRead("?>")) { |
|
|
|
} else { |
|
|
|
bracket = "?>"; |
|
|
|
bracket = "/"; |
|
|
|
} else if (TryRead("-->")) { |
|
|
|
} |
|
|
|
bracket = "-->"; |
|
|
|
} else if (TryRead('?')) { |
|
|
|
} else if (TryRead("]]>")) { |
|
|
|
if (TryRead('>')) { |
|
|
|
bracket = "]]>"; |
|
|
|
bracket = "?>"; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
bracket = "?"; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} else if (TryReadPartOf("-->")) { |
|
|
|
|
|
|
|
bracket = GetText(start, currentLocation); |
|
|
|
|
|
|
|
} else if (TryReadPartOf("]]>")) { |
|
|
|
|
|
|
|
bracket = GetText(start, currentLocation); |
|
|
|
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
bracket = null; |
|
|
|
bracket = null; |
|
|
|
return false; |
|
|
|
return false; |
|
|
@ -557,7 +594,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
if (IsEndOfFile()) break; |
|
|
|
if (IsEndOfFile()) break; |
|
|
|
TryMoveToAnyOf('<', ']'); |
|
|
|
TryMoveToAnyOf('<', ']'); |
|
|
|
if (TryPeek('<')) { |
|
|
|
if (TryPeek('<')) { |
|
|
|
yield return MakeText(start, currentLocation); |
|
|
|
if (start != currentLocation) { // Two following tags
|
|
|
|
|
|
|
|
yield return MakeText(start, currentLocation); |
|
|
|
|
|
|
|
} |
|
|
|
yield return ReadTag(); |
|
|
|
yield return ReadTag(); |
|
|
|
start = currentLocation; |
|
|
|
start = currentLocation; |
|
|
|
} |
|
|
|
} |
|
|
@ -574,6 +613,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
|
|
/// Context: name or "=\'\""
|
|
|
|
|
|
|
|
/// </summary>
|
|
|
|
RawAttribute ReadAttribulte() |
|
|
|
RawAttribute ReadAttribulte() |
|
|
|
{ |
|
|
|
{ |
|
|
|
AssertHasMoreData(); |
|
|
|
AssertHasMoreData(); |
|
|
@ -582,26 +624,45 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
if (TryReadFromCacheOrNew(out attr)) return attr; |
|
|
|
if (TryReadFromCacheOrNew(out attr)) return attr; |
|
|
|
|
|
|
|
|
|
|
|
attr.StartOffset = currentLocation; |
|
|
|
attr.StartOffset = currentLocation; |
|
|
|
if (HasMoreData()) attr.Name = ReadName(); |
|
|
|
|
|
|
|
|
|
|
|
// Read name
|
|
|
|
|
|
|
|
string name; |
|
|
|
|
|
|
|
if (TryReadName(out name)) attr.Name = name; |
|
|
|
|
|
|
|
// TODO: else ERROR - attribute name expected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Read equals sign and surrounding whitespace
|
|
|
|
int checkpoint = currentLocation; |
|
|
|
int checkpoint = currentLocation; |
|
|
|
TryMoveToNonWhiteSpace(); |
|
|
|
TryMoveToNonWhiteSpace(); |
|
|
|
if (TryRead('=')) { |
|
|
|
if (TryRead('=')) { |
|
|
|
TryMoveToNonWhiteSpace(); |
|
|
|
TryMoveToNonWhiteSpace(); |
|
|
|
attr.EqualsSign += GetText(checkpoint, currentLocation); |
|
|
|
attr.EqualsSign = GetText(checkpoint, currentLocation); |
|
|
|
// Read attribute value
|
|
|
|
|
|
|
|
int start = currentLocation; |
|
|
|
|
|
|
|
if (TryRead('"')) { |
|
|
|
|
|
|
|
TryMoveToAnyOf('"', '<'); |
|
|
|
|
|
|
|
TryRead('"'); |
|
|
|
|
|
|
|
attr.Value = GetText(start, currentLocation); |
|
|
|
|
|
|
|
} else if (TryRead('\'')) { |
|
|
|
|
|
|
|
TryMoveToAnyOf('\'', '<'); |
|
|
|
|
|
|
|
TryRead('\''); |
|
|
|
|
|
|
|
attr.Value = GetText(start, currentLocation); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
|
|
|
|
// TODO: Track touched memory
|
|
|
|
currentLocation = checkpoint; |
|
|
|
currentLocation = checkpoint; |
|
|
|
|
|
|
|
// TODO: ERROR - Equals expected
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Read attribute value
|
|
|
|
|
|
|
|
int start = currentLocation; |
|
|
|
|
|
|
|
if (TryRead('"')) { |
|
|
|
|
|
|
|
TryMoveToAnyOf('"', '<'); |
|
|
|
|
|
|
|
TryRead('"'); |
|
|
|
|
|
|
|
// TODO: Some backtracking?
|
|
|
|
|
|
|
|
// TODO: ERROR - Attribute value not closed
|
|
|
|
|
|
|
|
attr.Value = GetText(start, currentLocation); |
|
|
|
|
|
|
|
} else if (TryRead('\'')) { |
|
|
|
|
|
|
|
TryMoveToAnyOf('\'', '<'); |
|
|
|
|
|
|
|
TryRead('\''); |
|
|
|
|
|
|
|
// TODO: Some backtracking?
|
|
|
|
|
|
|
|
// TODO: ERROR - Attribute value not closed
|
|
|
|
|
|
|
|
attr.Value = GetText(start, currentLocation); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
// TODO: ERROR - Attribute value expected
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: Heuristic for missing " or '
|
|
|
|
|
|
|
|
// TODO: Normalize attribute values
|
|
|
|
|
|
|
|
|
|
|
|
attr.EndOffset = currentLocation; |
|
|
|
attr.EndOffset = currentLocation; |
|
|
|
|
|
|
|
|
|
|
|
OnParsed(attr); |
|
|
|
OnParsed(attr); |
|
|
@ -614,11 +675,14 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
StartOffset = start, |
|
|
|
StartOffset = start, |
|
|
|
EndOffset = end, |
|
|
|
EndOffset = end, |
|
|
|
Value = GetText(start, end), |
|
|
|
Value = GetText(start, end), |
|
|
|
|
|
|
|
Type = RawTextType.Other |
|
|
|
}; |
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OnParsed(text); |
|
|
|
return text; |
|
|
|
return text; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
const int maxEntityLenght = 12; // 6 for build-in ones
|
|
|
|
const int maxEntityLenght = 12; // The longest build-in one is 10 ("")
|
|
|
|
const int maxTextFragmentSize = 8; |
|
|
|
const int maxTextFragmentSize = 8; |
|
|
|
const int lookAheadLenght = (3 * maxTextFragmentSize) / 2; |
|
|
|
const int lookAheadLenght = (3 * maxTextFragmentSize) / 2; |
|
|
|
const int backtrackLenght = 4; // 2: get back over "]]" 1: so that we have some data 1: safety
|
|
|
|
const int backtrackLenght = 4; // 2: get back over "]]" 1: so that we have some data 1: safety
|
|
|
@ -629,6 +693,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
/// </summary>
|
|
|
|
/// </summary>
|
|
|
|
IEnumerable<RawObject> ReadText(RawTextType type) |
|
|
|
IEnumerable<RawObject> ReadText(RawTextType type) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
// TODO: Rewrite
|
|
|
|
|
|
|
|
|
|
|
|
bool lookahead = false; |
|
|
|
bool lookahead = false; |
|
|
|
while(true) { |
|
|
|
while(true) { |
|
|
|
RawText text; |
|
|
|
RawText text; |
|
|
@ -664,13 +730,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
if (type == RawTextType.WhiteSpace) { |
|
|
|
if (type == RawTextType.WhiteSpace) { |
|
|
|
TryMoveToNonWhiteSpace(); |
|
|
|
TryMoveToNonWhiteSpace(); |
|
|
|
} else if (type == RawTextType.CharacterData) { |
|
|
|
} else if (type == RawTextType.CharacterData) { |
|
|
|
|
|
|
|
// TODO: "]]>" is error
|
|
|
|
TryMoveTo('<'); |
|
|
|
TryMoveTo('<'); |
|
|
|
} else if (type == RawTextType.Comment) { |
|
|
|
} else if (type == RawTextType.Comment) { |
|
|
|
TryMoveTo("--"); |
|
|
|
// TODO: "--" is error
|
|
|
|
} else if (type == RawTextType.DocumentTypeDefinition) { |
|
|
|
TryMoveTo("-->"); |
|
|
|
TryMoveTo('>'); |
|
|
|
|
|
|
|
} else if (type == RawTextType.CData) { |
|
|
|
} else if (type == RawTextType.CData) { |
|
|
|
TryMoveTo("]]>"); |
|
|
|
TryMoveTo("]]>"); |
|
|
|
|
|
|
|
} else if (type == RawTextType.ProcessingInstruction) { |
|
|
|
|
|
|
|
TryMoveTo("?>"); |
|
|
|
|
|
|
|
} else if (type == RawTextType.UnknownBang) { |
|
|
|
|
|
|
|
TryMoveToAnyOf('<', '>'); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
throw new Exception("Uknown type " + type); |
|
|
|
throw new Exception("Uknown type " + type); |
|
|
|
} |
|
|
|
} |
|
|
@ -689,7 +759,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser |
|
|
|
|
|
|
|
|
|
|
|
// If there is entity reference, make sure the next segment starts with it to prevent framentation
|
|
|
|
// If there is entity reference, make sure the next segment starts with it to prevent framentation
|
|
|
|
int entitySearchStart = Math.Max(start + 1 /* data for us */, backtrack - maxEntityLenght); |
|
|
|
int entitySearchStart = Math.Max(start + 1 /* data for us */, backtrack - maxEntityLenght); |
|
|
|
int entityIndex = input.LastIndexOf('&', entitySearchStart, backtrack - entitySearchStart); |
|
|
|
// Note that LastIndexOf works backward
|
|
|
|
|
|
|
|
int entityIndex = input.LastIndexOf('&', backtrack, backtrack - entitySearchStart); |
|
|
|
if (entityIndex != -1) { |
|
|
|
if (entityIndex != -1) { |
|
|
|
backtrack = entityIndex; |
|
|
|
backtrack = entityIndex; |
|
|
|
} |
|
|
|
} |
|
|
|