Browse Source

XML Parser: Support DTD just enough so that it is properly parsed and skipped

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/trunk@4602 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
shortcuts
David Srbecký 16 years ago
parent
commit
af248d6973
  1. 12
      src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs
  2. 122
      src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs

12
src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/RawObjects.cs

@ -362,6 +362,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -362,6 +362,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser
/// </summary>
public class RawTag: RawContainer
{
public static readonly string[] DTDNames = new string[] {"<!DOCTYPE", "<!NOTATION", "<!ELEMENT", "<!ATTLIST", "<!ENTITY"};
public string OpeningBracket { get; set; }
public string Name { get; set; }
public string ClosingBracket { get; set; }
@ -370,9 +372,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -370,9 +372,10 @@ namespace ICSharpCode.AvalonEdit.XmlParser
public bool IsStartTag { get { return OpeningBracket == "<"; } }
public bool IsEndTag { get { return OpeningBracket == "</"; } }
public bool IsProcessingInstruction { get { return OpeningBracket == "<?"; } }
public bool IsComment { get { return OpeningBracket.StartsWith("<!") && !IsDocumentType && !IsCData; } }
public bool IsDocumentType { get { return OpeningBracket.StartsWith("<!D"); } }
public bool IsCData { get { return OpeningBracket.StartsWith("<!["); } }
public bool IsComment { get { return OpeningBracket == "<!--"; } }
public bool IsCData { get { return OpeningBracket == "<![CDATA["; } }
public bool IsDocumentType { get { return DTDNames.Contains(OpeningBracket); } }
public bool IsUnknownBang { get { return OpeningBracket == "<!"; } }
public override void UpdateDataFrom(RawObject source)
{
@ -549,7 +552,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -549,7 +552,8 @@ namespace ICSharpCode.AvalonEdit.XmlParser
CharacterData,
Comment,
CData,
DocumentTypeDefinition
DocumentTypeDefinition,
Other
}
/// <summary>

122
src/Libraries/AvalonEdit/ICSharpCode.AvalonEdit/XmlParser/XmlParser.cs

@ -57,12 +57,13 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -57,12 +57,13 @@ namespace ICSharpCode.AvalonEdit.XmlParser
///
/// The precise content of RawTag depends on what it represents:
/// <![CDATA[
/// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
/// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
/// P.instr.: "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
/// Comment: "<!" partof("--")? (RawText)* "-->" (Name is always null)
/// DTD: "<!" partof("DOCTYPE") (RawText)* ">" (Name is always null)
/// CData: "<!" partof("[CDATA[") (RawText)* "]]" ">" (Name is always null)
/// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
/// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
/// P.instr.: "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
/// Comment: "<!--" (RawText)* "-->"
/// CData: "<![CDATA[" (RawText)* "]]" ">"
/// DTD: "<!DOCTYPE" (RawText+ RawTag)* RawText* ">" (DOCTYPE or other DTD names)
/// UknownBang: "<!" (RawText)* ">"
/// ]]>
///
/// The type of tag can be identified by the opening backet.
@ -412,13 +413,6 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -412,13 +413,6 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return element;
}
// Start tag: "<" Name? (RawText+ RawAttribute)* RawText* (">" | "/>")
// End tag: "</" Name? (RawText+ RawAttribute)* RawText* ">"
// P.instr.: "<?" Name? (RawText+ RawAttribute)* RawText* "?>"
// Comment: "<!" partof("--")? (RawText)* "-->" (Name is always null)
// CData: "<!" partof("[CDATA[") (RawText)* "]]" ">" (Name is always null)
// DTD: "<!" partof("DOCTYPE") (RawText)* ">" (Name is always null)
RawTag ReadTag()
{
AssertHasMoreData();
@ -432,14 +426,11 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -432,14 +426,11 @@ namespace ICSharpCode.AvalonEdit.XmlParser
// It identifies the type of tag and parsing behavior for the rest of it
tag.OpeningBracket = ReadOpeningBracket();
// Read the name
if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) {
// Read the name
if (HasMoreData()) {
tag.Name = ReadName();
}
}
if (tag.IsStartTag || tag.IsEndTag || tag.IsProcessingInstruction) {
// Read attributes for the tag
while(true) {
if (TryPeekWhiteSpace()) {
@ -457,23 +448,29 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -457,23 +448,29 @@ namespace ICSharpCode.AvalonEdit.XmlParser
}
break; // End of file
}
} else {
// Simple tag types
if (tag.IsComment) {
// TODO: Be strict only if the opening bracket is complete
tag.AddChildren(ReadText(RawTextType.Comment));
} else if (tag.IsCData) {
// TODO: Be strict only if the opening bracket is complete
tag.AddChildren(ReadText(RawTextType.CData));
} else if (tag.IsDocumentType) {
// TODO: Nested definition
tag.AddChildren(ReadText(RawTextType.DocumentTypeDefinition));
} else if (tag.IsComment) {
// TODO: Backtrack if file end reached
tag.AddChildren(ReadText(RawTextType.Comment));
} else if (tag.IsCData) {
// TODO: Backtrack if file end reached
tag.AddChildren(ReadText(RawTextType.CData));
} else if (tag.IsDocumentType) {
tag.AddChildren(ReadContentOfDTD());
} else if (tag.IsUnknownBang) {
if (HasMoreData()) {
int start = currentLocation;
TryMoveToAnyOf('<', '>');
tag.AddChild(MakeText(start, currentLocation));
}
} else {
throw new Exception(string.Format("Unknown opening bracket '{0}'", tag.OpeningBracket));
}
if (tag.ClosingBracket == null) {
string bracket;
if (TryReadClosingBracket(out bracket)) {
tag.ClosingBracket = bracket;
}
if (TryReadClosingBracket(out bracket)) tag.ClosingBracket = bracket;
}
tag.EndOffset = currentLocation;
OnParsed(tag);
@ -491,22 +488,20 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -491,22 +488,20 @@ namespace ICSharpCode.AvalonEdit.XmlParser
if (TryRead('<')) {
if (TryRead('/')) {
return "</";
} else if (TryRead('?')) {
return "<?";
} else if (TryRead('!')) {
if (TryRead('-')) {
if (TryRead('-')) {
return "<!--";
} else {
return "<!-";
}
} else if (TryReadPartOf("[CDATA[")) {
return GetText(start, currentLocation);
} else if (TryReadPartOf("DOCTYPE")) {
return GetText(start, currentLocation);
if (TryRead("--")) {
return "<!--";
} else if (TryRead("[CDATA[")) {
return "<![CDATA[";
} else {
foreach(string dtdName in RawTag.DTDNames) {
// the dtdName includes "<!"
if (TryRead(dtdName.Remove(0, 2))) return dtdName;
}
return "<!";
}
} else if (TryRead('?')) {
return "<?";
} else {
return "<";
}
@ -548,6 +543,37 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -548,6 +543,37 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return true;
}
IEnumerable<RawObject> ReadContentOfDTD()
{
int start = currentLocation;
while(true) {
if (IsEndOfFile()) break; // End of file
TryMoveToNonWhiteSpace(); // Skip whitespace
if (TryRead('\'')) TryMoveTo('\''); // Skip single quoted string
if (TryRead('\"')) TryMoveTo('\"'); // Skip single quoted string
if (TryRead('[')) { // Start of nested infoset
// Reading infoset
while(true) {
if (IsEndOfFile()) break;
TryMoveToAnyOf('<', ']');
if (TryPeek('<')) {
yield return MakeText(start, currentLocation);
yield return ReadTag();
start = currentLocation;
}
if (TryPeek(']')) break;
}
}
TryRead(']'); // End of nested infoset
if (TryPeek('>')) break; // Proper closing
if (TryPeek('<')) break; // Malformed XML
TryMoveNext(); // Skip anything else
}
if (start != currentLocation) {
yield return MakeText(start, currentLocation);
}
}
RawAttribute ReadAttribulte()
{
AssertHasMoreData();
@ -582,6 +608,16 @@ namespace ICSharpCode.AvalonEdit.XmlParser @@ -582,6 +608,16 @@ namespace ICSharpCode.AvalonEdit.XmlParser
return attr;
}
RawText MakeText(int start, int end)
{
RawText text = new RawText() {
StartOffset = start,
EndOffset = end,
Value = GetText(start, end),
};
return text;
}
const int maxEntityLenght = 12; // 6 for build-in ones
const int maxTextFragmentSize = 8;
const int lookAheadLenght = (3 * maxTextFragmentSize) / 2;

Loading…
Cancel
Save