// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt) // This code is distributed under the GNU LGPL (for details please see \doc\license.txt) using System; using System.Collections.Generic; using System.Diagnostics; using System.Globalization; using System.Linq; using System.Threading; using ICSharpCode.AvalonEdit.Document; namespace ICSharpCode.AvalonEdit.Xml { /// /// Creates object tree from XML document. /// /// /// The created tree fully describes the document and thus the orginal XML file can be /// exactly reproduced. /// /// Any further parses will reparse only the changed parts and the existing tree will /// be updated with the changes. The user can add event handlers to be notified of /// the changes. The parser tries to minimize the number of changes to the tree. /// (for example, it will add a single child at the start of collection rather than /// clearing the collection and adding new children) /// /// The object tree consists of following types: /// RawObject - Abstact base class for all types /// RawContainer - Abstact base class for all types that can contain child nodes /// RawDocument - The root object of the XML document /// RawElement - Logical grouping of other nodes together. The first child is always the start tag. /// RawTag - Represents any markup starting with "<" and (hopefully) ending with ">" /// RawAttribute - Name-value pair in a tag /// RawText - Whitespace or character data /// /// For example, see the following XML and the produced object tree: /// /// /// Make everything as simple as possible, but not simpler. /// /// /// RawDocument /// RawTag "" /// RawText " My favourite quote " /// RawElement /// RawTag "<" "quote" ">" /// RawText " " /// RawAttribute 'author="Albert Einstein"' /// RawText "\n Make everything as simple as possible, but not simpler.\n" /// RawTag "" /// ]]> /// /// The precise content of RawTag depends on what it represents: /// " | "/>") /// End tag: "" /// P.instr.: "" /// Comment: "" /// CData: "" /// DTD: "" (DOCTYPE or other DTD names) /// UknownBang: "" /// ]]> /// /// The type of tag can be identified by the opening backet. /// There are helpper properties in the RawTag class to identify the type, exactly /// one of the properties will be true. /// /// The closing bracket may be missing or may be different for mallformed XML. /// /// Note that there can always be multiple consequtive RawText nodes. /// This is to ensure that idividual texts are not too long. /// /// XML Spec: http://www.w3.org/TR/xml/ /// XML EBNF: http://www.jelks.nu/XML/xmlebnf.html /// /// Internals: /// /// "Try" methods can silently fail by returning false. /// MoveTo methods do not move if they are already at the given target /// If methods return some object, it must be no-empty. It is up to the caller to ensure /// the context is appropriate for reading. /// /// public class AXmlParser { AXmlDocument userDocument; internal TrackedSegmentCollection TrackedSegments { get; private set; } /// /// Generate syntax error when seeing enity reference other then the build-in ones /// public bool UnknownEntityReferenceIsError { get; set; } /// Create new parser public AXmlParser() { this.Lock = new ReaderWriterLockSlim(LockRecursionPolicy.SupportsRecursion); ClearInternal(); } /// Throws exception if condition is false internal static void Assert(bool condition, string message) { if (!condition) { throw new InternalException("Assertion failed: " + message); } } /// Throws exception if condition is false [Conditional("DEBUG")] internal static void DebugAssert(bool condition, string message) { if (!condition) { throw new InternalException("Assertion failed: " + message); } } [Conditional("DEBUG")] internal static void Log(string text, params object[] pars) { //System.Diagnostics.Debug.WriteLine(string.Format(CultureInfo.InvariantCulture, "XML: " + text, pars)); } /// /// Incrementaly parse the given text. /// You have to hold the write lock. /// /// /// The full XML text of the new document. /// /// /// Changes since last parse. Null will cause full reparse. /// public AXmlDocument Parse(string input, IEnumerable changesSinceLastParse) { if (!Lock.IsWriteLockHeld) throw new InvalidOperationException("Lock needed!"); // Use changes to invalidate cache if (changesSinceLastParse != null) { this.TrackedSegments.UpdateOffsetsAndInvalidate(changesSinceLastParse); } else { this.TrackedSegments.InvalidateAll(); } TagReader tagReader = new TagReader(this, input); List tags = tagReader.ReadAllTags(); AXmlDocument parsedDocument = new TagMatchingHeuristics(this, input, tags).ReadDocument(); tagReader.PrintStringCacheStats(); AXmlParser.Log("Updating main DOM tree..."); userDocument.UpdateTreeFrom(parsedDocument); userDocument.DebugCheckConsistency(true); Assert(userDocument.GetSelfAndAllChildren().Count() == parsedDocument.GetSelfAndAllChildren().Count(), "Parsed document and updated document have different number of children"); return userDocument; } /// /// Makes calls to Parse() thread-safe. Use Lock everywhere Parse() is called. /// public ReaderWriterLockSlim Lock { get; private set; } /// /// Returns the last cached version of the document. /// /// No read lock is held by the current thread. public AXmlDocument LastDocument { get { if (!Lock.IsReadLockHeld) throw new InvalidOperationException("Read lock needed!"); return userDocument; } } /// /// Clears the parser data. /// /// No write lock is held by the current thread. public void Clear() { if (!Lock.IsWriteLockHeld) throw new InvalidOperationException("Write lock needed!"); ClearInternal(); } void ClearInternal() { this.UnknownEntityReferenceIsError = true; this.TrackedSegments = new TrackedSegmentCollection(); this.userDocument = new AXmlDocument() { Parser = this }; this.userDocument.Document = this.userDocument; // Track the document this.TrackedSegments.AddParsedObject(this.userDocument, null); this.userDocument.IsCached = false; } } }