mirror of https://github.com/icsharpcode/ILSpy.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
439 lines
14 KiB
439 lines
14 KiB
// Copyright (c) AlphaSierraPapa for the SharpDevelop Team (for details please see \doc\copyright.txt) |
|
// This code is distributed under the GNU LGPL (for details please see \doc\license.txt) |
|
|
|
using System; |
|
using System.Collections.Generic; |
|
using System.Linq; |
|
using System.Text; |
|
|
|
using ICSharpCode.AvalonEdit.Utils; |
|
|
|
namespace ICSharpCode.AvalonEdit.Xml |
|
{ |
|
class TagMatchingHeuristics |
|
{ |
|
const int maxConfigurationCount = 10; |
|
|
|
AXmlParser parser; |
|
TrackedSegmentCollection trackedSegments; |
|
string input; |
|
List<AXmlObject> tags; |
|
|
|
public TagMatchingHeuristics(AXmlParser parser, string input, List<AXmlObject> tags) |
|
{ |
|
this.parser = parser; |
|
this.trackedSegments = parser.TrackedSegments; |
|
this.input = input; |
|
this.tags = tags; |
|
} |
|
|
|
public AXmlDocument ReadDocument() |
|
{ |
|
AXmlDocument doc = new AXmlDocument() { Parser = parser }; |
|
|
|
// AXmlParser.Log("Flat stream: {0}", PrintObjects(tags)); |
|
List<AXmlObject> valid = MatchTags(tags); |
|
// AXmlParser.Log("Fixed stream: {0}", PrintObjects(valid)); |
|
IEnumerator<AXmlObject> validStream = valid.GetEnumerator(); |
|
validStream.MoveNext(); // Move to first |
|
while(true) { |
|
// End of stream? |
|
try { |
|
if (validStream.Current == null) break; |
|
} catch (InvalidCastException) { |
|
break; |
|
} |
|
doc.AddChild(ReadTextOrElement(validStream)); |
|
} |
|
|
|
if (doc.Children.Count > 0) { |
|
doc.StartOffset = doc.FirstChild.StartOffset; |
|
doc.EndOffset = doc.LastChild.EndOffset; |
|
} |
|
|
|
// Check well formed |
|
foreach(AXmlTag xmlDeclaration in doc.Children.OfType<AXmlTag>().Where(t => t.IsProcessingInstruction && string.Equals(t.Name, "xml", StringComparison.OrdinalIgnoreCase))) { |
|
if (xmlDeclaration.StartOffset != 0) |
|
TagReader.OnSyntaxError(doc, xmlDeclaration.StartOffset, xmlDeclaration.StartOffset + 5, |
|
"XML declaration must be at the start of document"); |
|
} |
|
int elemCount = doc.Children.OfType<AXmlElement>().Count(); |
|
if (elemCount == 0) |
|
TagReader.OnSyntaxError(doc, doc.EndOffset, doc.EndOffset, |
|
"Root element is missing"); |
|
if (elemCount > 1) { |
|
AXmlElement next = doc.Children.OfType<AXmlElement>().Skip(1).First(); |
|
TagReader.OnSyntaxError(doc, next.StartOffset, next.StartOffset, |
|
"Only one root element is allowed"); |
|
} |
|
foreach(AXmlTag tag in doc.Children.OfType<AXmlTag>()) { |
|
if (tag.IsCData) |
|
TagReader.OnSyntaxError(doc, tag.StartOffset, tag.EndOffset, |
|
"CDATA not allowed in document root"); |
|
} |
|
foreach(AXmlText text in doc.Children.OfType<AXmlText>()) { |
|
if (!text.ContainsOnlyWhitespace) |
|
TagReader.OnSyntaxError(doc, text.StartOffset, text.EndOffset, |
|
"Only whitespace is allowed in document root"); |
|
} |
|
|
|
|
|
AXmlParser.Log("Constructed {0}", doc); |
|
trackedSegments.AddParsedObject(doc, null); |
|
return doc; |
|
} |
|
|
|
static AXmlObject ReadSingleObject(IEnumerator<AXmlObject> objStream) |
|
{ |
|
AXmlObject obj = objStream.Current; |
|
objStream.MoveNext(); |
|
return obj; |
|
} |
|
|
|
AXmlObject ReadTextOrElement(IEnumerator<AXmlObject> objStream) |
|
{ |
|
AXmlObject curr = objStream.Current; |
|
if (curr is AXmlText || curr is AXmlElement) { |
|
return ReadSingleObject(objStream); |
|
} else { |
|
AXmlTag currTag = (AXmlTag)curr; |
|
if (currTag == StartTagPlaceholder) { |
|
return ReadElement(objStream); |
|
} else if (currTag.IsStartOrEmptyTag) { |
|
return ReadElement(objStream); |
|
} else { |
|
return ReadSingleObject(objStream); |
|
} |
|
} |
|
} |
|
|
|
AXmlElement ReadElement(IEnumerator<AXmlObject> objStream) |
|
{ |
|
AXmlElement element = new AXmlElement(); |
|
element.IsProperlyNested = true; |
|
|
|
// Read start tag |
|
AXmlTag startTag = ReadSingleObject(objStream) as AXmlTag; |
|
AXmlParser.DebugAssert(startTag != null, "Start tag expected"); |
|
AXmlParser.DebugAssert(startTag.IsStartOrEmptyTag || startTag == StartTagPlaceholder, "Start tag expected"); |
|
if (startTag == StartTagPlaceholder) { |
|
element.HasStartOrEmptyTag = false; |
|
element.IsProperlyNested = false; |
|
TagReader.OnSyntaxError(element, objStream.Current.StartOffset, objStream.Current.EndOffset, |
|
"Matching openning tag was not found"); |
|
} else { |
|
element.HasStartOrEmptyTag = true; |
|
element.AddChild(startTag); |
|
} |
|
|
|
// Read content and end tag |
|
if (startTag == StartTagPlaceholder || // Check first in case the start tag is null |
|
element.StartTag.IsStartTag) |
|
{ |
|
while(true) { |
|
AXmlTag currTag = objStream.Current as AXmlTag; // Peek |
|
if (currTag == EndTagPlaceholder) { |
|
TagReader.OnSyntaxError(element, element.LastChild.EndOffset, element.LastChild.EndOffset, |
|
"Expected '</{0}>'", element.StartTag.Name); |
|
ReadSingleObject(objStream); |
|
element.HasEndTag = false; |
|
element.IsProperlyNested = false; |
|
break; |
|
} else if (currTag != null && currTag.IsEndTag) { |
|
if (element.HasStartOrEmptyTag && currTag.Name != element.StartTag.Name) { |
|
TagReader.OnSyntaxError(element, currTag.StartOffset + 2, currTag.StartOffset + 2 + currTag.Name.Length, |
|
"Expected '{0}'. End tag must have same name as start tag.", element.StartTag.Name); |
|
} |
|
element.AddChild(ReadSingleObject(objStream)); |
|
element.HasEndTag = true; |
|
break; |
|
} |
|
AXmlObject nested = ReadTextOrElement(objStream); |
|
|
|
AXmlElement nestedAsElement = nested as AXmlElement; |
|
if (nestedAsElement != null) { |
|
if (!nestedAsElement.IsProperlyNested) |
|
element.IsProperlyNested = false; |
|
element.AddChildren(Split(nestedAsElement).ToList()); |
|
} else { |
|
element.AddChild(nested); |
|
} |
|
} |
|
} else { |
|
element.HasEndTag = false; |
|
} |
|
|
|
element.StartOffset = element.FirstChild.StartOffset; |
|
element.EndOffset = element.LastChild.EndOffset; |
|
|
|
AXmlParser.Assert(element.HasStartOrEmptyTag || element.HasEndTag, "Must have at least start or end tag"); |
|
|
|
AXmlParser.Log("Constructed {0}", element); |
|
trackedSegments.AddParsedObject(element, null); // Need all elements in cache for offset tracking |
|
return element; |
|
} |
|
|
|
IEnumerable<AXmlObject> Split(AXmlElement elem) |
|
{ |
|
int myIndention = GetIndentLevel(elem); |
|
// Has start tag and no end tag ? (other then empty-element tag) |
|
if (elem.HasStartOrEmptyTag && elem.StartTag.IsStartTag && !elem.HasEndTag && myIndention != -1) { |
|
int lastAccepted = 0; // Accept start tag |
|
while (lastAccepted + 1 < elem.Children.Count) { |
|
AXmlObject nextItem = elem.Children[lastAccepted + 1]; |
|
if (nextItem is AXmlText) { |
|
lastAccepted++; continue; // Accept |
|
} else { |
|
// Include all more indented items |
|
if (GetIndentLevel(nextItem) > myIndention) { |
|
lastAccepted++; continue; // Accept |
|
} else { |
|
break; // Reject |
|
} |
|
} |
|
} |
|
// Accepted everything? |
|
if (lastAccepted + 1 == elem.Children.Count) { |
|
yield return elem; |
|
yield break; |
|
} |
|
AXmlParser.Log("Splitting {0} - take {1} of {2} nested", elem, lastAccepted, elem.Children.Count - 1); |
|
AXmlElement topHalf = new AXmlElement(); |
|
topHalf.HasStartOrEmptyTag = elem.HasStartOrEmptyTag; |
|
topHalf.HasEndTag = elem.HasEndTag; |
|
topHalf.AddChildren(elem.Children.Take(1 + lastAccepted)); // Start tag + nested |
|
topHalf.StartOffset = topHalf.FirstChild.StartOffset; |
|
topHalf.EndOffset = topHalf.LastChild.EndOffset; |
|
TagReader.OnSyntaxError(topHalf, topHalf.LastChild.EndOffset, topHalf.LastChild.EndOffset, |
|
"Expected '</{0}>'", topHalf.StartTag.Name); |
|
|
|
AXmlParser.Log("Constructed {0}", topHalf); |
|
trackedSegments.AddParsedObject(topHalf, null); |
|
yield return topHalf; |
|
for(int i = lastAccepted + 1; i < elem.Children.Count; i++) { |
|
yield return elem.Children[i]; |
|
} |
|
} else { |
|
yield return elem; |
|
} |
|
} |
|
|
|
int GetIndentLevel(AXmlObject obj) |
|
{ |
|
int offset = obj.StartOffset - 1; |
|
int level = 0; |
|
while(true) { |
|
if (offset < 0) break; |
|
char c = input[offset]; |
|
if (c == ' ') { |
|
level++; |
|
} else if (c == '\t') { |
|
level += 4; |
|
} else if (c == '\r' || c == '\n') { |
|
break; |
|
} else { |
|
return -1; |
|
} |
|
offset--; |
|
} |
|
return level; |
|
} |
|
|
|
/// <summary> |
|
/// Stack of still unmatched start tags. |
|
/// It includes the cost and backtack information. |
|
/// </summary> |
|
class Configuration |
|
{ |
|
/// <summary> Unmatched start tags </summary> |
|
public ImmutableStack<AXmlTag> StartTags { get; set; } |
|
/// <summary> Properly nested tags </summary> |
|
public ImmutableStack<AXmlObject> Document { get; set; } |
|
/// <summary> Number of needed modificaitons to the document </summary> |
|
public int Cost { get; set; } |
|
} |
|
|
|
/// <summary> |
|
/// Dictionary which stores the cheapest configuration |
|
/// </summary> |
|
class Configurations: Dictionary<ImmutableStack<AXmlTag>, Configuration> |
|
{ |
|
public Configurations() |
|
{ |
|
} |
|
|
|
public Configurations(IEnumerable<Configuration> configs) |
|
{ |
|
foreach(Configuration config in configs) { |
|
this.Add(config); |
|
} |
|
} |
|
|
|
/// <summary> Overwrite only if cheaper </summary> |
|
public void Add(Configuration newConfig) |
|
{ |
|
Configuration oldConfig; |
|
if (this.TryGetValue(newConfig.StartTags, out oldConfig)) { |
|
if (newConfig.Cost < oldConfig.Cost) { |
|
this[newConfig.StartTags] = newConfig; |
|
} |
|
} else { |
|
base.Add(newConfig.StartTags, newConfig); |
|
} |
|
} |
|
|
|
public override string ToString() |
|
{ |
|
StringBuilder sb = new StringBuilder(); |
|
foreach(var kvp in this) { |
|
sb.Append("\n - '"); |
|
foreach(AXmlTag startTag in kvp.Value.StartTags.Reverse()) { |
|
sb.Append('<'); |
|
sb.Append(startTag.Name); |
|
sb.Append('>'); |
|
} |
|
sb.AppendFormat("' = {0}", kvp.Value.Cost); |
|
} |
|
return sb.ToString(); |
|
} |
|
} |
|
|
|
// Tags used to guide the element creation |
|
readonly AXmlTag StartTagPlaceholder = new AXmlTag(); |
|
readonly AXmlTag EndTagPlaceholder = new AXmlTag(); |
|
|
|
/// <summary> |
|
/// Add start or end tag placeholders so that the documment is properly nested |
|
/// </summary> |
|
List<AXmlObject> MatchTags(IEnumerable<AXmlObject> objs) |
|
{ |
|
Configurations configurations = new Configurations(); |
|
configurations.Add(new Configuration { |
|
StartTags = ImmutableStack<AXmlTag>.Empty, |
|
Document = ImmutableStack<AXmlObject>.Empty, |
|
Cost = 0, |
|
}); |
|
foreach(AXmlObject obj in objs) { |
|
configurations = ProcessObject(configurations, obj); |
|
} |
|
// Close any remaining start tags |
|
foreach(Configuration conifg in configurations.Values) { |
|
while(!conifg.StartTags.IsEmpty) { |
|
conifg.StartTags = conifg.StartTags.Pop(); |
|
conifg.Document = conifg.Document.Push(EndTagPlaceholder); |
|
conifg.Cost += 1; |
|
} |
|
} |
|
// AXmlParser.Log("Configurations after closing all remaining tags:" + configurations.ToString()); |
|
Configuration bestConfig = configurations.Values.OrderBy(v => v.Cost).First(); |
|
AXmlParser.Log("Best configuration has cost {0}", bestConfig.Cost); |
|
|
|
return bestConfig.Document.Reverse().ToList(); |
|
} |
|
|
|
/// <summary> Get posible configurations after considering given object </summary> |
|
Configurations ProcessObject(Configurations oldConfigs, AXmlObject obj) |
|
{ |
|
AXmlParser.Log("Processing {0}", obj); |
|
|
|
AXmlTag objAsTag = obj as AXmlTag; |
|
AXmlElement objAsElement = obj as AXmlElement; |
|
AXmlParser.DebugAssert(objAsTag != null || objAsElement != null || obj is AXmlText, obj.GetType().Name + " not expected"); |
|
if (objAsElement != null) |
|
AXmlParser.Assert(objAsElement.IsProperlyNested, "Element not properly nested"); |
|
|
|
Configurations newConfigs = new Configurations(); |
|
|
|
foreach(var kvp in oldConfigs) { |
|
Configuration oldConfig = kvp.Value; |
|
var oldStartTags = oldConfig.StartTags; |
|
var oldDocument = oldConfig.Document; |
|
int oldCost = oldConfig.Cost; |
|
|
|
if (objAsTag != null && objAsTag.IsStartTag) { |
|
newConfigs.Add(new Configuration { // Push start-tag (cost 0) |
|
StartTags = oldStartTags.Push(objAsTag), |
|
Document = oldDocument.Push(objAsTag), |
|
Cost = oldCost, |
|
}); |
|
} else if (objAsTag != null && objAsTag.IsEndTag) { |
|
newConfigs.Add(new Configuration { // Ignore (cost 1) |
|
StartTags = oldStartTags, |
|
Document = oldDocument.Push(StartTagPlaceholder).Push(objAsTag), |
|
Cost = oldCost + 1, |
|
}); |
|
if (!oldStartTags.IsEmpty && oldStartTags.Peek().Name != objAsTag.Name) { |
|
newConfigs.Add(new Configuration { // Pop 1 item (cost 1) - not mathcing |
|
StartTags = oldStartTags.Pop(), |
|
Document = oldDocument.Push(objAsTag), |
|
Cost = oldCost + 1, |
|
}); |
|
} |
|
int popedCount = 0; |
|
var startTags = oldStartTags; |
|
var doc = oldDocument; |
|
foreach(AXmlTag poped in oldStartTags) { |
|
popedCount++; |
|
if (poped.Name == objAsTag.Name) { |
|
newConfigs.Add(new Configuration { // Pop 'x' items (cost x-1) - last one is matching |
|
StartTags = startTags.Pop(), |
|
Document = doc.Push(objAsTag), |
|
Cost = oldCost + popedCount - 1, |
|
}); |
|
} |
|
startTags = startTags.Pop(); |
|
doc = doc.Push(EndTagPlaceholder); |
|
} |
|
} else { |
|
// Empty tag or other tag type or text or properly nested element |
|
newConfigs.Add(new Configuration { // Ignore (cost 0) |
|
StartTags = oldStartTags, |
|
Document = oldDocument.Push(obj), |
|
Cost = oldCost, |
|
}); |
|
} |
|
} |
|
|
|
// Log("New configurations:" + newConfigs.ToString()); |
|
|
|
Configurations bestNewConfigurations = new Configurations( |
|
newConfigs.Values.OrderBy(v => v.Cost).Take(maxConfigurationCount) |
|
); |
|
|
|
// AXmlParser.Log("Best new configurations:" + bestNewConfigurations.ToString()); |
|
|
|
return bestNewConfigurations; |
|
} |
|
|
|
#region Helper methods |
|
/* |
|
string PrintObjects(IEnumerable<AXmlObject> objs) |
|
{ |
|
StringBuilder sb = new StringBuilder(); |
|
foreach(AXmlObject obj in objs) { |
|
if (obj is AXmlTag) { |
|
if (obj == StartTagPlaceholder) { |
|
sb.Append("#StartTag#"); |
|
} else if (obj == EndTagPlaceholder) { |
|
sb.Append("#EndTag#"); |
|
} else { |
|
sb.Append(((AXmlTag)obj).OpeningBracket); |
|
sb.Append(((AXmlTag)obj).Name); |
|
sb.Append(((AXmlTag)obj).ClosingBracket); |
|
} |
|
} else if (obj is AXmlElement) { |
|
sb.Append('['); |
|
sb.Append(PrintObjects(((AXmlElement)obj).Children)); |
|
sb.Append(']'); |
|
} else if (obj is AXmlText) { |
|
sb.Append('~'); |
|
} else { |
|
throw new InternalException("Should not be here: " + obj); |
|
} |
|
} |
|
return sb.ToString(); |
|
} |
|
*/ |
|
#endregion |
|
} |
|
}
|
|
|