Browse Source

XML Parser: Added documentation. Support for comments, processing instructions and CData.

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/trunk@4594 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
shortcuts
David Srbecký 16 years ago
parent
commit
86ab937261
  1. 101
      src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs
  2. 431
      src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs

101
src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs

@ -26,8 +26,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -26,8 +26,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
/// <summary>
/// The base class for all XML objects. The objects store the precise text
/// representation so that generated text will preciesly match original.
/// Abstact base class for all types
/// </summary>
public abstract class RawObject: TextSegment
{
@ -105,23 +104,29 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -105,23 +104,29 @@ namespace ICSharpCode.AvalonEdit.XmlParser
System.Diagnostics.Debug.WriteLine("XML Linq: " + format, args);
}
protected XName EncodeXName(string name, string ns)
protected XName EncodeXName(string name)
{
string namesapce = string.Empty;
int colonIndex = name.IndexOf(':');
if (colonIndex != -1) {
namesapce = name.Substring(0, colonIndex);
name = name.Substring(colonIndex + 1);
}
if (string.IsNullOrEmpty(name)) name = "_";
name = XmlConvert.EncodeLocalName(name);
if (ns == null) ns = string.Empty;
ns = XmlConvert.EncodeLocalName(ns);
return XName.Get(name, ns);
namesapce = XmlConvert.EncodeLocalName(namesapce);
return XName.Get(name, namesapce);
}
}
/// <summary>
/// Abstact base class for all types that can contain child nodes
/// </summary>
public abstract class RawContainer: RawObject
{
/// <summary>
/// Children of the node. Can be Elements, Attributes, etc...
/// Please do not modify directly!
/// Children of the node. It is read-only.
/// Note that is has CollectionChanged event.
/// </summary>
public ChildrenCollection<RawObject> Children { get; private set; }
@ -152,19 +157,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -152,19 +157,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
);
}
// The following should be the only methods that are ever
// used to modify the children collection
// Only these four methods should be used to modify the collection
public void AddChild(RawObject item)
internal void AddChild(RawObject item)
{
item.Parent = this;
this.Children.InsertItems(this.Children.Count, new RawObject[] {item}.ToList());
this.InsertChildren(this.Children.Count, new RawObject[] {item}.ToList());
}
internal void AddChildren(IList<RawObject> items)
{
this.InsertChildren(this.Children.Count, items);
}
/// <summary>
/// Insert children, set parent for them and notify the document
/// </summary>
protected virtual void Insert(int index, IList<RawObject> items)
void InsertChildren(int index, IList<RawObject> items)
{
if (items.Count == 1) {
LogDom("Inserting {0} at index {1}", items[0], index);
@ -187,7 +195,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -187,7 +195,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
/// <summary>
/// Remove children, set parent to null for them and notify the document
/// </summary>
protected virtual void RemoveAt(int index, int count)
void RemoveChildrenAt(int index, int count)
{
List<RawObject> removed = new List<RawObject>(count);
for(int i = 0; i < count; i++) {
@ -234,7 +242,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -234,7 +242,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
for(int j = i; j < srcList.Count; j++) {
itemsToAdd.Add(srcList[j]);
}
Insert(i, itemsToAdd);
InsertChildren(i, itemsToAdd);
i++; continue;
}
RawObject srcItem = srcList[i];
@ -259,7 +267,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -259,7 +267,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
for(int j = i; j < srcItemIndex; j++) {
itemsToAdd.Add(srcList[j]);
}
Insert(i, itemsToAdd);
InsertChildren(i, itemsToAdd);
i = srcItemIndex;
goto continue2;
}
@ -268,7 +276,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -268,7 +276,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
for(int dstItemIndex = i; dstItemIndex < dstList.Count; dstItemIndex++) {
RawObject dst = dstList[dstItemIndex];
if (srcItem.StartOffset == dst.StartOffset && srcItem.GetType() == dst.GetType()) {
RemoveAt(i, dstItemIndex - i);
RemoveChildrenAt(i, dstItemIndex - i);
goto continue2;
}
}
@ -279,12 +287,12 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -279,12 +287,12 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
// Remove fluf in hope that element/attribute update will occur next
if (!(dstItem is RawElement) && !(dstItem is RawAttribute)) {
RemoveAt(i, 1);
RemoveChildrenAt(i, 1);
continue;
}
// Otherwise just add the item
{
Insert(i, new RawObject[] {srcList[i]}.ToList());
InsertChildren(i, new RawObject[] {srcList[i]}.ToList());
i++; continue;
}
// Continue for inner loops
@ -292,11 +300,14 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -292,11 +300,14 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
// Remove extra items
if (dstList.Count > srcList.Count) {
RemoveAt(srcList.Count, dstList.Count - srcList.Count);
RemoveChildrenAt(srcList.Count, dstList.Count - srcList.Count);
}
}
}
/// <summary>
/// The root object of the XML document
/// </summary>
public class RawDocument: RawContainer
{
public event EventHandler<RawObjectEventArgs> ObjectAttached;
@ -346,12 +357,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -346,12 +357,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
}
/// <summary>
/// Represents any markup starting with "&lt;" and (hopefully) ending with ">"
/// </summary>
public class RawTag: RawContainer
{
public string OpeningBracket { get; set; } // "<" or "</"
public string Namesapce { get; set; }
public string OpeningBracket { get; set; }
public string Name { get; set; }
public string ClosingBracket { get; set; } // ">" or "/>" for well formed
public string ClosingBracket { get; set; }
// Exactly one of the folling will be true
public bool IsStartTag { get { return OpeningBracket == "<"; } }
public bool IsEndTag { get { return OpeningBracket == "</"; } }
public bool IsProcessingInstruction { get { return OpeningBracket == "<?"; } }
public bool IsComment { get { return OpeningBracket.StartsWith("<!") && !IsDocumentType && !IsCData; } }
public bool IsDocumentType { get { return OpeningBracket.StartsWith("<!D"); } }
public bool IsCData { get { return OpeningBracket.StartsWith("<!["); } }
public override void UpdateDataFrom(RawObject source)
{
@ -359,12 +380,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -359,12 +380,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
base.UpdateDataFrom(source);
RawTag src = (RawTag)source;
if (this.OpeningBracket != src.OpeningBracket ||
this.Namesapce != src.Namesapce ||
this.Name != src.Name ||
this.ClosingBracket != src.ClosingBracket)
{
this.OpeningBracket = src.OpeningBracket;
this.Namesapce = src.Namesapce;
this.Name = src.Name;
this.ClosingBracket = src.ClosingBracket;
OnLocalDataChanged();
@ -377,10 +396,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -377,10 +396,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
}
/// <summary>
/// Logical grouping of other nodes together. The first child is always the start tag.
/// </summary>
public class RawElement: RawContainer
{
/// <summary>
/// StartTag of an element. It is always the first child and its identity does not change.
/// </summary>
public RawTag StartTag {
get {
if (this.Children.Count == 0) return null;
return (RawTag)this.Children[0];
}
}
@ -400,7 +426,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -400,7 +426,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
{
if (xElem == null) {
LogLinq("Creating XElement '{0}'", this.StartTag.Name);
xElem = new XElement(EncodeXName(this.StartTag.Name, this.StartTag.Namesapce));
xElem = new XElement(EncodeXName(this.StartTag.Name));
xElem.AddAnnotation(this);
UpdateXElement(true);
UpdateXElementAttributes(true);
@ -416,7 +442,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -416,7 +442,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
{
if (!firstUpdate) LogLinq("Updating XElement '{0}'", this.StartTag.Name);
xElem.Name = EncodeXName(this.StartTag.Name, this.StartTag.Namesapce);
xElem.Name = EncodeXName(this.StartTag.Name);
}
internal void UpdateXElementAttributes(bool firstUpdate)
@ -456,9 +482,11 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -456,9 +482,11 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
}
/// <summary>
/// Name-value pair in a tag
/// </summary>
public class RawAttribute: RawObject
{
public string Namesapce { get; set; }
public string Name { get; set; }
public string EqualsSign { get; set; }
public string Value { get; set; }
@ -468,12 +496,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -468,12 +496,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
if (this.ReadCallID == source.ReadCallID) return;
base.UpdateDataFrom(source);
RawAttribute src = (RawAttribute)source;
if (this.Namesapce != src.Namesapce ||
this.Name != src.Name ||
if (this.Name != src.Name ||
this.EqualsSign != src.EqualsSign ||
this.Value != src.Value)
{
this.Namesapce = src.Namesapce;
this.Name = src.Name;
this.EqualsSign = src.EqualsSign;
this.Value = src.Value;
@ -487,7 +513,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -487,7 +513,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
{
if (xAttr == null) {
LogLinq("Creating XAttribute '{0}={1}'", this.Name, this.Value);
xAttr = new XAttribute(EncodeXName(this.Name, this.Namesapce), string.Empty);
xAttr = new XAttribute(EncodeXName(this.Name), string.Empty);
xAttr.AddAnnotation(this);
bool deleted = false;
UpdateXAttribute(true, ref deleted);
@ -500,7 +526,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -500,7 +526,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
{
if (!firstUpdate) LogLinq("Updating XAttribute '{0}={1}'", this.Name, this.Value);
if (xAttr.Name == EncodeXName(this.Name, this.Namesapce)) {
if (xAttr.Name == EncodeXName(this.Name)) {
xAttr.Value = this.Value ?? string.Empty;
} else {
XElement xParent = xAttr.Parent;
@ -517,6 +543,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -517,6 +543,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
}
/// <summary>
/// Whitespace or character data
/// </summary>
public class RawText: RawObject
{
public string Value { get; set; }

431
src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs

@ -13,11 +13,67 @@ using System.Xml.Linq; @@ -13,11 +13,67 @@ using System.Xml.Linq;
using ICSharpCode.AvalonEdit.Document;
// Missing XML comment
#pragma warning disable 1591
namespace ICSharpCode.AvalonEdit.XmlParser
{
/// <summary>
/// Creates object tree from XML document.
/// </summary>
/// <remarks>
/// The created tree fully describes the document and thus the orginal XML file can be
/// exactly reproduced.
///
/// Any further parses will reparse only the changed parts and the existing three will
/// be updated with the changes. The user can add event handlers to be notified of
/// the changes. The parser tries to minimize the number of changes to the tree.
/// (for example, it will add a single child at the start of collection rather than
/// clearing the collection and adding new children)
///
/// The object tree consists of following types:
/// RawObject - Abstact base class for all types
/// RawContainer - Abstact base class for all types that can contain child nodes
/// RawDocument - The root object of the XML document
/// RawElement - Logical grouping of other nodes together. The first child is always the start tag.
/// RawTag - Represents any markup starting with "&lt;" and (hopefully) ending with ">"
/// RawAttribute - Name-value pair in a tag
/// RawText - Whitespace or character data
///
/// For example, see the following XML and the produced object tree:
/// <![CDATA[
/// <!-- My favourite quote -->
/// <quote author="Albert Einstein">
/// Make everything as simple as possible, but not simpler.
/// </quote>
///
/// RawDocument
/// RawTag "<!--" "-->"
/// RawText " My favourite quote "
/// RawElement
/// RawTag "<" "quote" ">"
/// RawText " "
/// RawAttribute 'author="Albert Einstein"'
/// RawText "\n Make everything as simple as possible, but not simpler.\n"
/// RawTag "</" "quote" ">"
/// ]]>
///
/// The precise content of RawTag depends on what it represents:
/// <![CDATA[
/// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
/// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
/// P.instr.: "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
/// Comment: "<!" partof("--")? (RawText)* "-->" (Name is always null)
/// DTD: "<!" partof("DOCTYPE") (RawText)* ">" (Name is always null)
/// CData: "<!" partof("[CDATA[") (RawText)* "]]" ">" (Name is always null)
/// ]]>
///
/// The type of tag can be identified by the opening backet.
/// There are helpper properties in the RawTag class to identify the type, exactly
/// one of the properties will be true.
///
/// The closing bracket may be missing or may be different for mallformed XML.
///
/// Note that there can always be multiple consequtive RawText nodes.
/// This is to ensure that idividual texts are not too long.
/// </remarks>
public class XmlParser
{
RawDocument userDocument = new RawDocument();
@ -26,6 +82,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -26,6 +82,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
TextSegmentCollection<RawObject> parsedItems = new TextSegmentCollection<RawObject>();
List<DocumentChangeEventArgs> changesSinceLastParse = new List<DocumentChangeEventArgs>();
/// <summary>
/// Create new parser, but do not parse the text yet.
/// </summary>
public XmlParser(TextDocument textDocument)
{
this.userLinqDocument = userDocument.GetXDocument();
@ -35,6 +94,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -35,6 +94,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
};
}
/// <summary>
/// Incrementaly parse the document
/// </summary>
public RawDocument Parse()
{
currentLocation = 0;
@ -99,6 +161,30 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -99,6 +161,30 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return currentLocation < input.Length;
}
void AssertHasMoreData()
{
if (currentLocation == input.Length) {
throw new Exception("Unexpected end of files");
}
}
// The methods start with 'try' to make it clear they can silently fail.
// Read methods without 'try' have to succed or throw exception.
//
// For example:
// while(true) TryMoveNext(); is obviously infinite loop
// whereas
// while(true) MoveNext(); should eventulay throw exception (if MoveNext it existed)
//
bool TryMoveNext()
{
if (currentLocation == input.Length) return false;
currentLocation++;
return true;
}
bool TryRead(char c)
{
if (currentLocation == input.Length) return false;
@ -121,6 +207,18 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -121,6 +207,18 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
}
/// <summary> Returns true if at least one character was read </summary>
bool TryReadPartOf(string text)
{
if (TryPeek(text[0])) {
// Keep reading until character differs or we have end of file
foreach(char c in text) if (!TryRead(c)) break;
return true;
} else {
return false;
}
}
bool TryPeek(char c)
{
if (currentLocation == input.Length) return false;
@ -135,7 +233,16 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -135,7 +233,16 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return input.Substring(currentLocation, text.Length) == text;
}
bool TryMoveTo(params char[] c)
bool TryMoveTo(char c)
{
while(true) {
if (currentLocation == input.Length) return false;
if (input[currentLocation] == c) return true;
currentLocation++;
}
}
bool TryMoveToAnyOf(params char[] c)
{
while(true) {
if (currentLocation == input.Length) return false;
@ -154,32 +261,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -154,32 +261,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
static char[] WhiteSpaceChars = new char[] {' ', '\n', '\r', '\t'};
static char[] WhiteSpaceAndReservedChars = new char[] {' ', '\n', '\r', '\t', '<', '=', '>', '/', ':', '?'};
bool? IsWhiteSpace()
{
if (currentLocation == input.Length) {
return null;
} else {
return WhiteSpaceChars.Contains(input[currentLocation]);
}
}
static char[] WhiteSpaceAndReservedChars = new char[] {' ', '\n', '\r', '\t', '<', '=', '>', '/', '?'};
bool? IsWhiteSpaceOrReserved()
bool TryPeekWhiteSpace()
{
if (currentLocation == input.Length) {
return null;
} else {
return WhiteSpaceAndReservedChars.Contains(input[currentLocation]);
}
if (currentLocation == input.Length) return false;
return WhiteSpaceChars.Contains(input[currentLocation]);
}
string ReadName()
{
Debug.Assert(HasMoreData());
AssertHasMoreData();
int start = currentLocation;
TryMoveTo(WhiteSpaceAndReservedChars.ToArray());
TryMoveToAnyOf(WhiteSpaceAndReservedChars.ToArray());
return GetText(start, currentLocation);
}
@ -195,7 +291,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -195,7 +291,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
if (IsEndOfFile()) {
break;
} else if (TryPeek('<')) {
doc.AddChild(ReadElement());
doc.AddChild(ReadElementOrTag());
} else {
doc.AddChild(ReadCharacterData());
}
@ -207,9 +303,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -207,9 +303,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return doc;
}
RawObject ReadElementOrTag()
{
AssertHasMoreData();
if (TryPeek("<!") || TryPeek("</") || TryPeek("<?")) {
return ReadTag();
} else if (TryPeek('<')) {
return ReadElement();
} else {
throw new Exception("'<' expected");
}
}
RawElement ReadElement()
{
Debug.Assert(HasMoreData() && TryPeek('<'));
AssertHasMoreData();
RawElement element = ReadFromCache<RawElement>(currentLocation);
if (element != null) return element;
@ -219,27 +328,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -219,27 +328,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser
element.StartOffset = currentLocation;
// Read start tag
element.AddChild(ReadTag());
// Read content
if (element.StartTag.ClosingBracket == ">" &&
element.StartTag.OpeningBracket != "<?" &&
element.StartTag.OpeningBracket != "<!" &&
element.StartTag.OpeningBracket != "<!--" )
{
Debug.Assert(element.StartTag.IsStartTag);
// Read content and end tag
if (element.StartTag.ClosingBracket == ">") {
while(true) {
if (IsEndOfFile()) {
break;
} else if (TryPeek('<')) {
if (TryPeek("</")) break;
element.AddChild(ReadElement());
RawObject content = ReadElementOrTag();
if (content is RawTag && ((RawTag)content).IsEndTag) break;
element.AddChild(content);
} else {
element.AddChild(ReadCharacterData());
}
}
}
// Read end tag
if (TryPeek("</")) {
element.AddChild(ReadTag());
}
element.EndOffset = currentLocation;
LogParsed(element);
@ -247,9 +350,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -247,9 +350,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return element;
}
// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
// P.instr.: "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
// Comment: "<!" partof("--")? (RawText)* "-->" (Name is always null)
// CData: "<!" partof("[CDATA[") (RawText)* "]]" ">" (Name is always null)
// DTD: "<!" partof("DOCTYPE") (RawText)* ">" (Name is always null)
RawTag ReadTag()
{
Debug.Assert(HasMoreData() && TryPeek('<'));
AssertHasMoreData();
RawTag tag = ReadFromCache<RawTag>(currentLocation);
if (tag != null) return tag;
@ -257,52 +368,52 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -257,52 +368,52 @@ namespace ICSharpCode.AvalonEdit.XmlParser
tag = new RawTag();
tag.StartOffset = currentLocation;
if (TryRead('<')) {
tag.OpeningBracket = "<";
if (TryRead('/')) {
tag.OpeningBracket += "/";
} else if (TryRead('?')) {
tag.OpeningBracket += "?";
} else if (TryRead("!--")) {
tag.OpeningBracket += "!--";
} else if (TryRead('!')) {
tag.OpeningBracket += "!";
}
}
if (HasMoreData()) {
tag.Name = ReadName();
if (TryRead(':')) {
tag.Namesapce = tag.Name;
// Read the opening bracket
// It identifies the type of tag and parsing behavior for the rest of it
tag.OpeningBracket = ReadOpeningBracket();
// Read the name
if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) {
if (HasMoreData()) {
tag.Name = ReadName();
}
}
// Read attributes
while(true) {
if (IsWhiteSpace() == true) {
tag.AddChild(ReadWhiteSpace());
}
if (TryRead('>')) {
tag.ClosingBracket = ">";
break;
} else if (TryRead('/')) {
tag.ClosingBracket = "/";
if (TryRead('>')) {
tag.ClosingBracket += ">";
if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) {
// Read attributes for the tag
while(true) {
if (TryPeekWhiteSpace()) {
tag.AddChild(ReadWhiteSpace());
}
break;
} else if (TryRead('?')) {
tag.ClosingBracket = "?";
if (TryRead('>')) {
tag.ClosingBracket += ">";
string bracket;
if (TryReadClosingBracket(out bracket)) {
tag.ClosingBracket = bracket;
break;
}
break;
}
if (TryPeek('<')) break;
if (HasMoreData()) {
tag.AddChild(ReadAttribulte());
continue;
if (TryPeek('<')) break;
if (HasMoreData()) {
tag.AddChild(ReadAttribulte());
continue;
}
break; // End of file
}
} else {
// Simple tag types
if (tag.IsComment) {
// TODO: Be strict only if the opening bracket is complete
tag.AddChildren(ReadTextUntil("-->").ToList());
} else if (tag.IsCData) {
// TODO: Be strict only if the opening bracket is complete
tag.AddChildren(ReadTextUntil("]]>").ToList());
} else if (tag.IsDocumentType) {
// TODO: Nested definition
tag.AddChildren(ReadTextUntil(">").ToList());
}
string bracket;
if (TryReadClosingBracket(out bracket)) {
tag.ClosingBracket = bracket;
}
break;
}
tag.EndOffset = currentLocation;
@ -311,28 +422,77 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -311,28 +422,77 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return tag;
}
RawText ReadWhiteSpace()
/// <summary>
/// Reads any of the know opening brackets
/// Also accepts them if they are incomplete; one charater is suffcient
/// </summary>
string ReadOpeningBracket()
{
Debug.Assert(HasMoreData() && IsWhiteSpace() == true);
RawText ws = ReadFromCache<RawText>(currentLocation);
if (ws != null) return ws;
ws = new RawText();
ws.StartOffset = currentLocation;
// We are using a lot of string literals so that the memory instances are shared
int start = currentLocation;
while(IsWhiteSpace() == true) currentLocation++;
ws.Value = GetText(start, currentLocation);
ws.EndOffset = currentLocation;
parsedItems.Add(ws);
return ws;
if (TryRead('<')) {
if (TryRead('/')) {
return "</";
} else if (TryRead('!')) {
if (TryRead('-')) {
if (TryRead('-')) {
return "<!--";
} else {
return "<!-";
}
} else if (TryReadPartOf("[CDATA[")) {
return GetText(start, currentLocation);
} else if (TryReadPartOf("DOCTYPE")) {
return GetText(start, currentLocation);
} else {
return "<!";
}
} else if (TryRead('?')) {
return "<?";
} else {
return "<";
}
} else {
throw new Exception("'<' expected");
}
}
/// <summary>
/// Reads any of the know closing brackets
/// Also accepts them if they are incomplete; one charater is suffcient
/// </summary>
bool TryReadClosingBracket(out string bracket)
{
// We are using a lot of string literals so that the memory instances are shared
int start = currentLocation;
if (TryRead('>')) {
bracket = ">";
} else if (TryRead('/')) {
if (TryRead('>')) {
bracket = "/>";
} else {
bracket = "/";
}
} else if (TryRead('?')) {
if (TryRead('>')) {
bracket = "?>";
} else {
bracket = "?";
}
} else if (TryReadPartOf("-->")) {
bracket = GetText(start, currentLocation);
} else if (TryReadPartOf("]]>")) {
bracket = GetText(start, currentLocation);
} else {
bracket = null;
return false;
}
return true;
}
RawAttribute ReadAttribulte()
{
Debug.Assert(HasMoreData());
AssertHasMoreData();
RawAttribute attr = ReadFromCache<RawAttribute>(currentLocation);
if (attr != null) return attr;
@ -340,33 +500,23 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -340,33 +500,23 @@ namespace ICSharpCode.AvalonEdit.XmlParser
attr = new RawAttribute();
attr.StartOffset = currentLocation;
if (HasMoreData()) {
attr.Name = ReadName();
if (TryRead(':')) {
attr.Namesapce = attr.Name;
attr.Name = ReadName();
}
}
if (HasMoreData()) attr.Name = ReadName();
int checkpoint = currentLocation;
attr.EqualsSign = string.Empty;
if (IsWhiteSpace() == true) attr.EqualsSign += ReadWhiteSpace().Value;
if (TryPeekWhiteSpace()) attr.EqualsSign += ReadWhiteSpace().Value;
if (TryRead('=')) {
attr.EqualsSign += "=";
if (IsWhiteSpace() == true) attr.EqualsSign += ReadWhiteSpace().Value;
if (IsWhiteSpaceOrReserved() == false) {
// Read attribute value
int start = currentLocation;
if (TryRead('"')) {
TryMoveTo('"', '<');
TryRead('"');
attr.Value = GetText(start, currentLocation);
} else if (TryRead('\'')) {
TryMoveTo('\'', '<');
TryRead('\'');
attr.Value = GetText(start, currentLocation);
} else {
attr.Value = ReadName();
}
if (TryPeekWhiteSpace()) attr.EqualsSign += ReadWhiteSpace().Value;
// Read attribute value
int start = currentLocation;
if (TryRead('"')) {
TryMoveToAnyOf('"', '<');
TryRead('"');
attr.Value = GetText(start, currentLocation);
} else if (TryRead('\'')) {
TryMoveToAnyOf('\'', '<');
TryRead('\'');
attr.Value = GetText(start, currentLocation);
}
} else {
attr.EqualsSign = null;
@ -378,6 +528,27 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -378,6 +528,27 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return attr;
}
RawText ReadWhiteSpace()
{
AssertHasMoreData();
RawText ws = ReadFromCache<RawText>(currentLocation);
if (ws != null) return ws;
ws = new RawText();
ws.StartOffset = currentLocation;
int start = currentLocation;
while(TryPeekWhiteSpace()) TryMoveNext();
ws.Value = GetText(start, currentLocation);
ws.EndOffset = currentLocation;
Debug.Assert(ws.Value.Length > 0);
parsedItems.Add(ws);
return ws;
}
RawText ReadCharacterData()
{
Debug.Assert(HasMoreData());
@ -393,8 +564,36 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -393,8 +564,36 @@ namespace ICSharpCode.AvalonEdit.XmlParser
charData.Value = GetText(start, currentLocation);
charData.EndOffset = currentLocation;
Debug.Assert(charData.Value.Length > 0);
parsedItems.Add(charData);
return charData;
}
IEnumerable<RawObject> ReadTextUntil(string closingText)
{
Debug.Assert(HasMoreData());
RawText charData = ReadFromCache<RawText>(currentLocation);
// TODO: How many return? Ensure the output is same as before
if (charData != null) yield return charData;
charData = new RawText();
charData.StartOffset = currentLocation;
int start = currentLocation;
while(true) {
if (!TryMoveTo(closingText[0])) break; // End of file
if (TryPeek(closingText)) break; // Match
TryMoveNext();
}
charData.Value = GetText(start, currentLocation);
charData.EndOffset = currentLocation;
Debug.Assert(charData.Value.Length > 0);
parsedItems.Add(charData);
yield return charData;
}
}
}

Loading…
Cancel
Save