XML Parser: Added documentation. Support for comments, processing instructions and CData.

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/trunk@4594 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
16 years ago · 86ab937261
2 changed files with 380 additions and 152 deletions
--- a/src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs
+++ b/src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs
@ -26,8 +26,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -26,8 +26,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 	}
 	
 	/// <summary>
-	/// The base class for all XML objects.  The objects store the precise text 
-	/// representation so that generated text will preciesly match original.
+	/// Abstact base class for all types
 	/// </summary>
 	public abstract class RawObject: TextSegment
 	{
@ -105,23 +104,29 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -105,23 +104,29 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			System.Diagnostics.Debug.WriteLine("XML Linq: " + format, args);
 		}
 		
-		protected XName EncodeXName(string name, string ns)
+		protected XName EncodeXName(string name)
 		{
+			string namesapce = string.Empty;
+			int colonIndex = name.IndexOf(':');
+			if (colonIndex != -1) {
+				namesapce = name.Substring(0, colonIndex);
+				name = name.Substring(colonIndex + 1);
+			}
 			if (string.IsNullOrEmpty(name)) name = "_";
 			name = XmlConvert.EncodeLocalName(name);
-			
-			if (ns == null) ns = string.Empty;
-			ns = XmlConvert.EncodeLocalName(ns);
-			
-			return XName.Get(name, ns);
+			namesapce = XmlConvert.EncodeLocalName(namesapce);
+			return XName.Get(name, namesapce);
 		}
 	}
 	
+	/// <summary>
+	/// Abstact base class for all types that can contain child nodes
+	/// </summary>
 	public abstract class RawContainer: RawObject
 	{
 		/// <summary>
-		/// Children of the node.  Can be Elements, Attributes, etc...
-		/// Please do not modify directly!
+		/// Children of the node.  It is read-only.
+		/// Note that is has CollectionChanged event.
 		/// </summary>
 		public ChildrenCollection<RawObject> Children { get; private set; }
 		
@ -152,19 +157,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -152,19 +157,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			);
 		}
 		
-		// The following should be the only methods that are ever
-		// used to modify the children collection
+		// Only these four methods should be used to modify the collection
 		
-		public void AddChild(RawObject item)
+		internal void AddChild(RawObject item)
 		{
-			item.Parent = this;
-			this.Children.InsertItems(this.Children.Count, new RawObject[] {item}.ToList());
+			this.InsertChildren(this.Children.Count, new RawObject[] {item}.ToList());
+		}
+		
+		internal void AddChildren(IList<RawObject> items)
+		{
+			this.InsertChildren(this.Children.Count, items);
 		}
 		
 		/// <summary>
 		/// Insert children, set parent for them and notify the document
 		/// </summary>
-		protected virtual void Insert(int index, IList<RawObject> items)
+		void InsertChildren(int index, IList<RawObject> items)
 		{
 			if (items.Count == 1) {
 				LogDom("Inserting {0} at index {1}", items[0], index);
@ -187,7 +195,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -187,7 +195,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		/// <summary>
 		/// Remove children, set parent to null for them and notify the document
 		/// </summary>
-		protected virtual void RemoveAt(int index, int count)
+		void RemoveChildrenAt(int index, int count)
 		{
 			List<RawObject> removed = new List<RawObject>(count);
 			for(int i = 0; i < count; i++) {
@ -234,7 +242,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -234,7 +242,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 					for(int j = i; j < srcList.Count; j++) {
 						itemsToAdd.Add(srcList[j]);
 					}
-					Insert(i, itemsToAdd);
+					InsertChildren(i, itemsToAdd);
 					i++; continue;
 				}
 				RawObject srcItem = srcList[i];
@ -259,7 +267,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -259,7 +267,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 						for(int j = i; j < srcItemIndex; j++) {
 							itemsToAdd.Add(srcList[j]);
 						}
-						Insert(i, itemsToAdd);
+						InsertChildren(i, itemsToAdd);
 						i = srcItemIndex;
 						goto continue2;
 					}
@ -268,7 +276,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -268,7 +276,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 				for(int dstItemIndex = i; dstItemIndex < dstList.Count; dstItemIndex++) {
 					RawObject dst = dstList[dstItemIndex];
 					if (srcItem.StartOffset == dst.StartOffset && srcItem.GetType() == dst.GetType()) {
-						RemoveAt(i, dstItemIndex - i);
+						RemoveChildrenAt(i, dstItemIndex - i);
 						goto continue2;
 					}
 				}
@ -279,12 +287,12 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -279,12 +287,12 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 				}
 				// Remove fluf in hope that element/attribute update will occur next
 				if (!(dstItem is RawElement) && !(dstItem is RawAttribute)) {
-					RemoveAt(i, 1);
+					RemoveChildrenAt(i, 1);
 					continue;
 				}
 				// Otherwise just add the item
 				{
-					Insert(i, new RawObject[] {srcList[i]}.ToList());
+					InsertChildren(i, new RawObject[] {srcList[i]}.ToList());
 					i++; continue;
 				}
 				// Continue for inner loops
@ -292,11 +300,14 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -292,11 +300,14 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			}
 			// Remove extra items
 			if (dstList.Count > srcList.Count) {
-				RemoveAt(srcList.Count, dstList.Count - srcList.Count);
+				RemoveChildrenAt(srcList.Count, dstList.Count - srcList.Count);
 			}
 		}
 	}
 	
+	/// <summary>
+	/// The root object of the XML document
+	/// </summary>
 	public class RawDocument: RawContainer
 	{
 		public event EventHandler<RawObjectEventArgs> ObjectAttached;
@ -346,12 +357,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -346,12 +357,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		}
 	}
 	
+	/// <summary>
+	/// Represents any markup starting with "&lt;" and (hopefully) ending with ">"
+	/// </summary>
 	public class RawTag: RawContainer
 	{
-		public string OpeningBracket { get; set; } // "<" or "</"
-		public string Namesapce { get; set; }
+		public string OpeningBracket { get; set; }
 		public string Name { get; set; }
-		public string ClosingBracket { get; set; } // ">" or "/>" for well formed
+		public string ClosingBracket { get; set; }
+		
+		// Exactly one of the folling will be true
+		public bool IsStartTag              { get { return OpeningBracket == "<"; } }
+		public bool IsEndTag                { get { return OpeningBracket == "</"; } }
+		public bool IsProcessingInstruction { get { return OpeningBracket == "<?"; } }
+		public bool IsComment               { get { return OpeningBracket.StartsWith("<!") && !IsDocumentType && !IsCData; } }
+		public bool IsDocumentType          { get { return OpeningBracket.StartsWith("<!D"); } }
+		public bool IsCData                 { get { return OpeningBracket.StartsWith("<!["); } }
 		
 		public override void UpdateDataFrom(RawObject source)
 		{
@ -359,12 +380,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -359,12 +380,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			base.UpdateDataFrom(source);
 			RawTag src = (RawTag)source;
 			if (this.OpeningBracket != src.OpeningBracket ||
-			    this.Namesapce != src.Namesapce ||
 				this.Name != src.Name ||
 				this.ClosingBracket != src.ClosingBracket)
 			{
 				this.OpeningBracket = src.OpeningBracket;
-				this.Namesapce = src.Namesapce;
 				this.Name = src.Name;
 				this.ClosingBracket = src.ClosingBracket;
 				OnLocalDataChanged();
@ -377,10 +396,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -377,10 +396,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		}
 	}
 	
+	/// <summary>
+	/// Logical grouping of other nodes together.  The first child is always the start tag.
+	/// </summary>
 	public class RawElement: RawContainer
 	{
+		/// <summary>
+		/// StartTag of an element.  It is always the first child and its identity does not change.
+		/// </summary>
 		public RawTag StartTag {
 			get {
+				if (this.Children.Count == 0) return null;
 				return (RawTag)this.Children[0];
 			}
 		}
@ -400,7 +426,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -400,7 +426,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		{
 			if (xElem == null) {
 				LogLinq("Creating XElement '{0}'", this.StartTag.Name);
-				xElem = new XElement(EncodeXName(this.StartTag.Name, this.StartTag.Namesapce));
+				xElem = new XElement(EncodeXName(this.StartTag.Name));
 				xElem.AddAnnotation(this);
 				UpdateXElement(true);
 				UpdateXElementAttributes(true);
@ -416,7 +442,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -416,7 +442,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		{
 			if (!firstUpdate) LogLinq("Updating XElement '{0}'", this.StartTag.Name);
 			
-			xElem.Name = EncodeXName(this.StartTag.Name, this.StartTag.Namesapce);
+			xElem.Name = EncodeXName(this.StartTag.Name);
 		}
 		
 		internal void UpdateXElementAttributes(bool firstUpdate)
@ -456,9 +482,11 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -456,9 +482,11 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		}
 	}
 	
+	/// <summary>
+	/// Name-value pair in a tag
+	/// </summary>
 	public class RawAttribute: RawObject
 	{
-		public string Namesapce { get; set; }
 		public string Name { get; set; }
 		public string EqualsSign { get; set; }
 		public string Value { get; set; }
@ -468,12 +496,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -468,12 +496,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			if (this.ReadCallID == source.ReadCallID) return;
 			base.UpdateDataFrom(source);
 			RawAttribute src = (RawAttribute)source;
-			if (this.Namesapce != src.Namesapce ||
-				this.Name != src.Name ||
+			if (this.Name != src.Name ||
 				this.EqualsSign != src.EqualsSign ||
 				this.Value != src.Value)
 			{
-				this.Namesapce = src.Namesapce;
 				this.Name = src.Name;
 				this.EqualsSign = src.EqualsSign;
 				this.Value = src.Value;
@ -487,7 +513,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -487,7 +513,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		{
 			if (xAttr == null) {
 				LogLinq("Creating XAttribute '{0}={1}'", this.Name, this.Value);
-				xAttr = new XAttribute(EncodeXName(this.Name, this.Namesapce), string.Empty);
+				xAttr = new XAttribute(EncodeXName(this.Name), string.Empty);
 				xAttr.AddAnnotation(this);
 				bool deleted = false;
 				UpdateXAttribute(true, ref deleted);
@ -500,7 +526,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -500,7 +526,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		{
 			if (!firstUpdate) LogLinq("Updating XAttribute '{0}={1}'", this.Name, this.Value);
 			
-			if (xAttr.Name == EncodeXName(this.Name, this.Namesapce)) {
+			if (xAttr.Name == EncodeXName(this.Name)) {
 				xAttr.Value = this.Value ?? string.Empty;
 			} else {
 				XElement xParent = xAttr.Parent;
@ -517,6 +543,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -517,6 +543,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		}
 	}
 	
+	/// <summary>
+	/// Whitespace or character data
+	/// </summary>
 	public class RawText: RawObject
 	{
 		public string Value { get; set; }
--- a/src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs
+++ b/src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs
@ -13,11 +13,67 @@ using System.Xml.Linq;
				@@ -13,11 +13,67 @@ using System.Xml.Linq;

 using ICSharpCode.AvalonEdit.Document;

-// Missing XML comment
-#pragma warning disable 1591
-
 namespace ICSharpCode.AvalonEdit.XmlParser
 {
+	/// <summary>
+	/// Creates object tree from XML document.
+	/// </summary>
+	/// <remarks>
+	/// The created tree fully describes the document and thus the orginal XML file can be
+	/// exactly reproduced.
+	/// 
+	/// Any further parses will reparse only the changed parts and the existing three will
+	/// be updated with the changes.  The user can add event handlers to be notified of
+	/// the changes.  The parser tries to minimize the number of changes to the tree.
+	/// (for example, it will add a single child at the start of collection rather than
+	/// clearing the collection and adding new children)
+	/// 
+	/// The object tree consists of following types:
+	///   RawObject - Abstact base class for all types
+	///     RawContainer - Abstact base class for all types that can contain child nodes
+	///       RawDocument - The root object of the XML document
+	///       RawElement - Logical grouping of other nodes together.  The first child is always the start tag.
+	///       RawTag - Represents any markup starting with "&lt;" and (hopefully) ending with ">"
+	///     RawAttribute - Name-value pair in a tag
+	///     RawText - Whitespace or character data
+	/// 
+	/// For example, see the following XML and the produced object tree:
+	/// <![CDATA[
+	///   <!-- My favourite quote -->
+	///   <quote author="Albert Einstein">
+	///     Make everything as simple as possible, but not simpler.
+	///   </quote>
+	/// 
+	///   RawDocument
+	///     RawTag "<!--" "-->"
+	///       RawText " My favourite quote "
+	///     RawElement
+	///       RawTag "<" "quote" ">"
+	///         RawText " "
+	///         RawAttribute 'author="Albert Einstein"'
+	///       RawText "\n  Make everything as simple as possible, but not simpler.\n"
+	///       RawTag "</" "quote" ">"
+	/// ]]>
+	/// 
+	/// The precise content of RawTag depends on what it represents:
+	/// <![CDATA[
+	///   Start tag:  "<"  Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
+	///   End tag:    "</" Name? (RawText+ RawAttribute)* RawText* ">"
+	///   P.instr.:   "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
+	///   Comment:    "<!" partof("--")?     (RawText)* "-->"     (Name is always null)
+	///   DTD:        "<!" partof("DOCTYPE") (RawText)* ">"       (Name is always null)
+	///   CData:      "<!" partof("[CDATA[") (RawText)* "]]" ">"  (Name is always null)
+	/// ]]>
+	/// 
+	/// The type of tag can be identified by the opening backet.
+	/// There are helpper properties in the RawTag class to identify the type, exactly
+	/// one of the properties will be true.
+	/// 
+	/// The closing bracket may be missing or may be different for mallformed XML.
+	/// 
+	/// Note that there can always be multiple consequtive RawText nodes.
+	/// This is to ensure that idividual texts are not too long.
+	/// </remarks>
 	public class XmlParser
 	{
 		RawDocument userDocument = new RawDocument();
@ -26,6 +82,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -26,6 +82,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		TextSegmentCollection<RawObject> parsedItems = new TextSegmentCollection<RawObject>();
 		List<DocumentChangeEventArgs> changesSinceLastParse = new List<DocumentChangeEventArgs>();
 		
+		/// <summary>
+		/// Create new parser, but do not parse the text yet.
+		/// </summary>
 		public XmlParser(TextDocument textDocument)
 		{
 			this.userLinqDocument = userDocument.GetXDocument();
@ -35,6 +94,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -35,6 +94,9 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			};
 		}
 		
+		/// <summary>
+		/// Incrementaly parse the document
+		/// </summary>
 		public RawDocument Parse()
 		{
 			currentLocation = 0;
@ -99,6 +161,30 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -99,6 +161,30 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			return currentLocation < input.Length;
 		}
 		
+		void AssertHasMoreData()
+		{
+			if (currentLocation == input.Length) {
+				throw new Exception("Unexpected end of files");
+			}
+		}
+		
+		// The methods start with 'try' to make it clear they can silently fail.
+		// Read methods without 'try' have to succed or throw exception.
+		//
+		// For example:
+		//   while(true) TryMoveNext();   is obviously infinite loop
+		// whereas
+		//   while(true) MoveNext();   should eventulay throw exception (if MoveNext it existed)
+		//
+		
+		bool TryMoveNext()
+		{
+			if (currentLocation == input.Length) return false;
+			
+			currentLocation++;
+			return true;
+		}
+		
 		bool TryRead(char c)
 		{
 			if (currentLocation == input.Length) return false;
@ -121,6 +207,18 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -121,6 +207,18 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			}
 		}
 		
+		/// <summary> Returns true if at least one character was read </summary>
+		bool TryReadPartOf(string text)
+		{
+			if (TryPeek(text[0])) {
+				// Keep reading until character differs or we have end of file
+				foreach(char c in text) if (!TryRead(c)) break;
+				return true;
+			} else {
+				return false;
+			}
+		}
+		
 		bool TryPeek(char c)
 		{
 			if (currentLocation == input.Length) return false;
@ -135,7 +233,16 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -135,7 +233,16 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			return input.Substring(currentLocation, text.Length) == text;
 		}
 		
-		bool TryMoveTo(params char[] c)
+		bool TryMoveTo(char c)
+		{
+			while(true) {
+				if (currentLocation == input.Length) return false;
+				if (input[currentLocation] == c) return true;
+				currentLocation++;
+			}
+		}
+		
+		bool TryMoveToAnyOf(params char[] c)
 		{
 			while(true) {
 				if (currentLocation == input.Length) return false;
@ -154,32 +261,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -154,32 +261,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 		}
 		
 		static char[] WhiteSpaceChars = new char[] {' ', '\n', '\r', '\t'};
-		static char[] WhiteSpaceAndReservedChars = new char[] {' ', '\n', '\r', '\t', '<', '=', '>', '/', ':', '?'};
-		
-		bool? IsWhiteSpace()
-		{
-			if (currentLocation == input.Length) {
-				return null;
-			} else {
-				return WhiteSpaceChars.Contains(input[currentLocation]);
-			}
-		}
+		static char[] WhiteSpaceAndReservedChars = new char[] {' ', '\n', '\r', '\t', '<', '=', '>', '/', '?'};
 		
-		bool? IsWhiteSpaceOrReserved()
+		bool TryPeekWhiteSpace()
 		{
-			if (currentLocation == input.Length) {
-				return null;
-			} else {
-				return WhiteSpaceAndReservedChars.Contains(input[currentLocation]);
-			}
+			if (currentLocation == input.Length) return false;
+			
+			return WhiteSpaceChars.Contains(input[currentLocation]);
 		}
 		
 		string ReadName()
 		{
-			Debug.Assert(HasMoreData());
+			AssertHasMoreData();
 			
 			int start = currentLocation;
-			TryMoveTo(WhiteSpaceAndReservedChars.ToArray());
+			TryMoveToAnyOf(WhiteSpaceAndReservedChars.ToArray());
 			return GetText(start, currentLocation);
 		}
 		
@ -195,7 +291,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -195,7 +291,7 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 				if (IsEndOfFile()) {
 					break;
 				} else if (TryPeek('<')) {
-					doc.AddChild(ReadElement());
+					doc.AddChild(ReadElementOrTag());
 				} else {
 					doc.AddChild(ReadCharacterData());
 				}
@ -207,9 +303,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -207,9 +303,22 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			return doc;
 		}
 		
+		RawObject ReadElementOrTag()
+		{
+			AssertHasMoreData();
+			
+			if (TryPeek("<!") || TryPeek("</") || TryPeek("<?")) {
+				return ReadTag();
+			} else if (TryPeek('<')) {
+				return ReadElement();
+			} else {
+				throw new Exception("'<' expected");
+			}
+		}
+		
 		RawElement ReadElement()
 		{
-			Debug.Assert(HasMoreData() && TryPeek('<'));
+			AssertHasMoreData();
 			
 			RawElement element = ReadFromCache<RawElement>(currentLocation);
 			if (element != null) return element;
@ -219,27 +328,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -219,27 +328,21 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			element.StartOffset = currentLocation;
 			// Read start tag
 			element.AddChild(ReadTag());
-			// Read content
-			if (element.StartTag.ClosingBracket == ">" &&
-			    element.StartTag.OpeningBracket != "<?" &&
-			    element.StartTag.OpeningBracket != "<!" &&
-			    element.StartTag.OpeningBracket != "<!--" )
-			{
+			Debug.Assert(element.StartTag.IsStartTag);
+			// Read content and end tag
+			if (element.StartTag.ClosingBracket == ">") {
 				while(true) {
 					if (IsEndOfFile()) {
 						break;
 					} else if (TryPeek('<')) {
-						if (TryPeek("</")) break;
-						element.AddChild(ReadElement());
+						RawObject content = ReadElementOrTag();
+						if (content is RawTag && ((RawTag)content).IsEndTag) break;
+						element.AddChild(content);
 					} else {
 						element.AddChild(ReadCharacterData());
 					}
 				}
 			}
-			// Read end tag
-			if (TryPeek("</")) {
-				element.AddChild(ReadTag());
-			}
 			element.EndOffset = currentLocation;
 			
 			LogParsed(element);
@ -247,9 +350,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -247,9 +350,17 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			return element;
 		}
 		
+		
+		//   Start tag:  "<"  Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
+		//   End tag:    "</" Name? (RawText+ RawAttribute)* RawText* ">"
+		//   P.instr.:   "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
+		//   Comment:    "<!" partof("--")?     (RawText)* "-->"     (Name is always null)
+		//   CData:      "<!" partof("[CDATA[") (RawText)* "]]" ">"  (Name is always null)
+		//   DTD:        "<!" partof("DOCTYPE") (RawText)* ">"       (Name is always null)
+		
 		RawTag ReadTag()
 		{
-			Debug.Assert(HasMoreData() && TryPeek('<'));
+			AssertHasMoreData();
 			
 			RawTag tag = ReadFromCache<RawTag>(currentLocation);
 			if (tag != null) return tag;
@ -257,52 +368,52 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -257,52 +368,52 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			tag = new RawTag();
 			
 			tag.StartOffset = currentLocation;
-			if (TryRead('<')) {
-				tag.OpeningBracket = "<";
-				if (TryRead('/')) {
-					tag.OpeningBracket += "/";
-				} else if (TryRead('?')) {
-					tag.OpeningBracket += "?";
-				} else if (TryRead("!--")) {
-					tag.OpeningBracket += "!--";
-				} else if (TryRead('!')) {
-					tag.OpeningBracket += "!";
-				}
-			}
-			if (HasMoreData()) {
-				tag.Name = ReadName();
-				if (TryRead(':')) {
-					tag.Namesapce = tag.Name;
+			
+			// Read the opening bracket
+			// It identifies the type of tag and parsing behavior for the rest of it
+			tag.OpeningBracket = ReadOpeningBracket();
+			
+			// Read the name
+			if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) {
+				if (HasMoreData()) {
 					tag.Name = ReadName();
 				}
 			}
-			// Read attributes
-			while(true) {
-				if (IsWhiteSpace() == true) {
-					tag.AddChild(ReadWhiteSpace());
-				}
-				if (TryRead('>')) {
-					tag.ClosingBracket = ">";
-					break;
-				} else 	if (TryRead('/')) {
-					tag.ClosingBracket = "/";
-					if (TryRead('>')) {
-						tag.ClosingBracket += ">";
+			
+			if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) {
+				// Read attributes for the tag
+				while(true) {
+					if (TryPeekWhiteSpace()) {
+						tag.AddChild(ReadWhiteSpace());
 					}
-					break;
-				} else 	if (TryRead('?')) {
-					tag.ClosingBracket = "?";
-					if (TryRead('>')) {
-						tag.ClosingBracket += ">";
+					string bracket;
+					if (TryReadClosingBracket(out bracket)) {
+						tag.ClosingBracket = bracket;
+						break;
 					}
-					break;
-				} 
-				if (TryPeek('<')) break;
-				if (HasMoreData()) {
-					tag.AddChild(ReadAttribulte());
-					continue;
+					if (TryPeek('<')) break;
+					if (HasMoreData()) {
+						tag.AddChild(ReadAttribulte());
+						continue;
+					}
+					break; // End of file
+				}
+			} else {
+				// Simple tag types
+				if (tag.IsComment) {
+					// TODO: Be strict only if the opening bracket is complete
+					tag.AddChildren(ReadTextUntil("-->").ToList());
+				} else if (tag.IsCData) {
+					// TODO: Be strict only if the opening bracket is complete
+					tag.AddChildren(ReadTextUntil("]]>").ToList());
+				} else if (tag.IsDocumentType) {
+					// TODO: Nested definition
+					tag.AddChildren(ReadTextUntil(">").ToList());
+				}
+				string bracket;
+				if (TryReadClosingBracket(out bracket)) {
+					tag.ClosingBracket = bracket;
 				}
-				break;
 			}
 			tag.EndOffset = currentLocation;
 			
@ -311,28 +422,77 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -311,28 +422,77 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			return tag;
 		}
 		
-		RawText ReadWhiteSpace()
+		/// <summary>
+		/// Reads any of the know opening brackets
+		/// Also accepts them if they are incomplete; one charater is suffcient
+		/// </summary>
+		string ReadOpeningBracket()
 		{
-			Debug.Assert(HasMoreData() && IsWhiteSpace() == true);
-			
-			RawText ws = ReadFromCache<RawText>(currentLocation);
-			if (ws != null) return ws;
-			
-			ws = new RawText();
-			
-			ws.StartOffset = currentLocation;
+			// We are using a lot of string literals so that the memory instances are shared
 			int start = currentLocation;
-			while(IsWhiteSpace() == true) currentLocation++;
-			ws.Value = GetText(start, currentLocation);
-			ws.EndOffset = currentLocation;
-			
-			parsedItems.Add(ws);
-			return ws;
+			if (TryRead('<')) {
+				if (TryRead('/')) {
+					return "</";
+				} else if (TryRead('!')) {
+					if (TryRead('-')) {
+						if (TryRead('-')) {
+							return "<!--";
+						} else {
+							return "<!-";
+						}
+					} else if (TryReadPartOf("[CDATA[")) {
+						return GetText(start, currentLocation);
+					} else if (TryReadPartOf("DOCTYPE")) {
+						return GetText(start, currentLocation);
+					} else {
+						return "<!";
+					}
+				} else if (TryRead('?')) {
+					return "<?";
+				} else {
+					return "<";
+				}
+			} else {
+				throw new Exception("'<' expected");
+			}
+		}
+		
+		/// <summary>
+		/// Reads any of the know closing brackets
+		/// Also accepts them if they are incomplete; one charater is suffcient
+		/// </summary>
+		bool TryReadClosingBracket(out string bracket)
+		{
+			// We are using a lot of string literals so that the memory instances are shared
+			int start = currentLocation;
+			if (TryRead('>')) {
+				bracket = ">";
+			} else 	if (TryRead('/')) {
+				if (TryRead('>')) {
+					bracket = "/>";
+				} else {
+					bracket = "/";
+				}
+			} else 	if (TryRead('?')) {
+				if (TryRead('>')) {
+					bracket = "?>";
+				} else {
+					bracket = "?";
+				}
+			} else if (TryReadPartOf("-->")) {
+				bracket = GetText(start, currentLocation);
+			} else if (TryReadPartOf("]]>")) {
+				bracket = GetText(start, currentLocation);
+			} else {
+				bracket = null;
+				return false;
+			}
+			return true;
 		}
 		
 		RawAttribute ReadAttribulte()
 		{
-			Debug.Assert(HasMoreData());
+			AssertHasMoreData();
 			
 			RawAttribute attr = ReadFromCache<RawAttribute>(currentLocation);
 			if (attr != null) return attr;
@ -340,33 +500,23 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -340,33 +500,23 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			attr = new RawAttribute();
 			
 			attr.StartOffset = currentLocation;
-			if (HasMoreData()) {
-				attr.Name = ReadName();
-				if (TryRead(':')) {
-					attr.Namesapce = attr.Name;
-					attr.Name = ReadName();
-				}
-			}
+			if (HasMoreData()) attr.Name = ReadName();
 			int checkpoint = currentLocation;
 			attr.EqualsSign = string.Empty; 
-			if (IsWhiteSpace() == true) attr.EqualsSign += ReadWhiteSpace().Value;
+			if (TryPeekWhiteSpace()) attr.EqualsSign += ReadWhiteSpace().Value;
 			if (TryRead('=')) {
 				attr.EqualsSign += "=";
-				if (IsWhiteSpace() == true) attr.EqualsSign += ReadWhiteSpace().Value;
-				if (IsWhiteSpaceOrReserved() == false) {
-					// Read attribute value
-					int start = currentLocation;
-					if (TryRead('"')) {
-						TryMoveTo('"', '<');
-						TryRead('"');
-						attr.Value = GetText(start, currentLocation);
-					} else if (TryRead('\'')) {
-						TryMoveTo('\'', '<');
-						TryRead('\'');
-						attr.Value = GetText(start, currentLocation);
-					} else {
-						attr.Value = ReadName();
-					}
+				if (TryPeekWhiteSpace()) attr.EqualsSign += ReadWhiteSpace().Value;
+				// Read attribute value
+				int start = currentLocation;
+				if (TryRead('"')) {
+					TryMoveToAnyOf('"', '<');
+					TryRead('"');
+					attr.Value = GetText(start, currentLocation);
+				} else if (TryRead('\'')) {
+					TryMoveToAnyOf('\'', '<');
+					TryRead('\'');
+					attr.Value = GetText(start, currentLocation);
 				}
 			} else {
 				attr.EqualsSign = null;
@ -378,6 +528,27 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -378,6 +528,27 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			return attr;
 		}
 		
+		RawText ReadWhiteSpace()
+		{
+			AssertHasMoreData();
+			
+			RawText ws = ReadFromCache<RawText>(currentLocation);
+			if (ws != null) return ws;
+			
+			ws = new RawText();
+			
+			ws.StartOffset = currentLocation;
+			int start = currentLocation;
+			while(TryPeekWhiteSpace()) TryMoveNext();
+			ws.Value = GetText(start, currentLocation);
+			ws.EndOffset = currentLocation;
+			
+			Debug.Assert(ws.Value.Length > 0);
+			
+			parsedItems.Add(ws);
+			return ws;
+		}
+		
 		RawText ReadCharacterData()
 		{
 			Debug.Assert(HasMoreData());
@ -393,8 +564,36 @@ namespace ICSharpCode.AvalonEdit.XmlParser
				@@ -393,8 +564,36 @@ namespace ICSharpCode.AvalonEdit.XmlParser
 			charData.Value = GetText(start, currentLocation);
 			charData.EndOffset = currentLocation;
 			
+			Debug.Assert(charData.Value.Length > 0);
+			
 			parsedItems.Add(charData);
 			return charData;
 		}
+		
+		IEnumerable<RawObject> ReadTextUntil(string closingText)
+		{
+			Debug.Assert(HasMoreData());
+			
+			RawText charData = ReadFromCache<RawText>(currentLocation);
+			// TODO: How many return?  Ensure the output is same as before
+			if (charData != null) yield return charData;
+			
+			charData = new RawText();
+			
+			charData.StartOffset = currentLocation;
+			int start = currentLocation;
+			while(true) {
+				if (!TryMoveTo(closingText[0])) break; // End of file
+				if (TryPeek(closingText)) break; // Match
+				TryMoveNext();
+			}
+			charData.Value = GetText(start, currentLocation);
+			charData.EndOffset = currentLocation;
+			
+			Debug.Assert(charData.Value.Length > 0);
+			
+			parsedItems.Add(charData);
+			yield return charData;
+		}
 	}
 }