Browse Source

Improved C# lexer performance.

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/trunk@907 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
shortcuts
Daniel Grunwald 20 years ago
parent
commit
c24a108bb4
  1. 6
      src/Libraries/NRefactory/Project/NRefactory.csproj
  2. 39
      src/Libraries/NRefactory/Project/Src/Lexer/AbstractLexer.cs
  3. 147
      src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
  4. 9
      src/Libraries/NRefactory/Project/Src/Lexer/ILexer.cs
  5. 2
      src/Libraries/NRefactory/Project/Src/Lexer/Special/SpecialTracker.cs
  6. 1
      src/Libraries/NRefactory/Project/Src/Parser/AbstractParser.cs
  7. 2044
      src/Libraries/NRefactory/Project/Src/Parser/CSharp/Parser.cs
  8. 4
      src/Libraries/NRefactory/Project/Src/Parser/CSharp/cs.ATG
  9. 2081
      src/Libraries/NRefactory/Project/Src/Parser/VBNet/Parser.cs
  10. 2
      src/Libraries/NRefactory/Project/Src/Parser/VBNet/VBNET.ATG
  11. 42
      src/Libraries/NRefactory/Test/Lexer/CSharp/CustomLexerTests.cs
  12. 17
      src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs
  13. 1
      src/Libraries/NRefactory/Test/Lexer/VBNet/LiteralsTests.cs

6
src/Libraries/NRefactory/Project/NRefactory.csproj

@ -15,6 +15,12 @@
<OutputType>Library</OutputType> <OutputType>Library</OutputType>
<SignAssembly>true</SignAssembly> <SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>Resources\ICSharpCode.NRefactory.snk</AssemblyOriginatorKeyFile> <AssemblyOriginatorKeyFile>Resources\ICSharpCode.NRefactory.snk</AssemblyOriginatorKeyFile>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<RegisterForComInterop>False</RegisterForComInterop>
<GenerateSerializationAssemblies>Auto</GenerateSerializationAssemblies>
<BaseAddress>4194304</BaseAddress>
<PlatformTarget>AnyCPU</PlatformTarget>
<FileAlignment>4096</FileAlignment>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "> <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<Optimize>False</Optimize> <Optimize>False</Optimize>

39
src/Libraries/NRefactory/Project/Src/Lexer/AbstractLexer.cs

@ -37,6 +37,17 @@ namespace ICSharpCode.NRefactory.Parser
// used for the original value of strings (with escape sequences). // used for the original value of strings (with escape sequences).
protected StringBuilder originalValue = new StringBuilder(); protected StringBuilder originalValue = new StringBuilder();
protected bool skipAllComments = false;
public bool SkipAllComments {
get {
return skipAllComments;
}
set {
skipAllComments = value;
}
}
protected int Line { protected int Line {
get { get {
return line; return line;
@ -170,7 +181,7 @@ namespace ICSharpCode.NRefactory.Parser
if (curToken == null) { if (curToken == null) {
curToken = Next(); curToken = Next();
specialTracker.InformToken(curToken.kind); specialTracker.InformToken(curToken.kind);
// Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")"); //Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
return curToken; return curToken;
} }
@ -184,7 +195,7 @@ namespace ICSharpCode.NRefactory.Parser
} }
curToken = curToken.next; curToken = curToken.next;
// Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")"); //Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
return curToken; return curToken;
} }
@ -201,9 +212,17 @@ namespace ICSharpCode.NRefactory.Parser
throw new NotSupportedException(); throw new NotSupportedException();
} }
protected bool IsIdentifierPart(char ch) protected bool IsIdentifierPart(int ch)
{ {
return Char.IsLetterOrDigit(ch) || ch == '_'; // char.IsLetter is slow, so optimize for raw ASCII
if (ch < 48) return false; // 48 = '0'
if (ch <= 57) return true; // 57 = '9'
if (ch < 65) return false; // 65 = 'A'
if (ch <= 90) return true; // 90 = 'Z'
if (ch == 95) return true; // 95 = '_'
if (ch < 97) return false; // 97 = 'a'
if (ch <= 122) return true; // 97 = 'z'
return char.IsLetter((char)ch); // accept unicode letters
} }
protected bool IsHex(char digit) protected bool IsHex(char digit)
@ -250,6 +269,16 @@ namespace ICSharpCode.NRefactory.Parser
return false; return false;
} }
protected void SkipToEOL()
{
int nextChar;
while ((nextChar = reader.Read()) != -1) {
if (HandleLineEnd((char)nextChar)) {
break;
}
}
}
protected string ReadToEOL() protected string ReadToEOL()
{ {
sb.Length = 0; sb.Length = 0;
@ -259,7 +288,7 @@ namespace ICSharpCode.NRefactory.Parser
// Return read string, if EOL is reached // Return read string, if EOL is reached
if (HandleLineEnd(ch)) { if (HandleLineEnd(ch)) {
return sb.ToString();; return sb.ToString();
} }
sb.Append(ch); sb.Append(ch);

147
src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs

@ -27,6 +27,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
int nextChar; int nextChar;
while ((nextChar = ReaderRead()) != -1) { while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar; char ch = (char)nextChar;
if (ch == ' ' || ch == '\t')
continue;
if (Char.IsWhiteSpace(ch)) { if (Char.IsWhiteSpace(ch)) {
HandleLineEnd(ch); HandleLineEnd(ch);
@ -112,17 +114,17 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
string ReadIdent(char ch) string ReadIdent(char ch)
{ {
int peek;
int curPos = 1; int curPos = 1;
identBuffer[0] = ch; identBuffer[0] = ch;
int peek; while (IsIdentifierPart(peek = ReaderPeek())) {
while ((peek = ReaderPeek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) {
ReaderRead(); ReaderRead();
if (curPos < MAX_IDENTIFIER_LENGTH) { if (curPos < MAX_IDENTIFIER_LENGTH) {
identBuffer[curPos++] = ch; identBuffer[curPos++] = (char)peek;
} else { } else {
errors.Error(Line, Col, String.Format("Identifier too long")); errors.Error(Line, Col, String.Format("Identifier too long"));
while ((peek = ReaderPeek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) { while (IsIdentifierPart(ReaderPeek())) {
ReaderRead(); ReaderRead();
} }
break; break;
@ -160,7 +162,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
ReaderRead(); // skip 'x' ReaderRead(); // skip 'x'
sb.Length = 0; // Remove '0' from 0x prefix from the stringvalue sb.Length = 0; // Remove '0' from 0x prefix from the stringvalue
while (IsHex((char)ReaderPeek())) { while (IsHex((char)ReaderPeek())) {
sb.Append(Char.ToUpper((char)ReaderRead(), CultureInfo.InvariantCulture)); sb.Append((char)ReaderRead());
} }
if (sb.Length == 0) { if (sb.Length == 0) {
sb.Append('0'); // dummy value to prevent exception sb.Append('0'); // dummy value to prevent exception
@ -176,7 +178,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
peek = (char)ReaderPeek(); peek = (char)ReaderPeek();
} }
Token nextToken = null; // if we accedently read a 'dot' Token nextToken = null; // if we accidently read a 'dot'
if (peek == '.') { // read floating point number if (peek == '.') { // read floating point number
ReaderRead(); ReaderRead();
peek = (char)ReaderPeek(); peek = (char)ReaderPeek();
@ -258,7 +260,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
} }
if (isdecimal) { if (isdecimal) {
try { try {
return new Token(Tokens.Literal, x, y, stringValue, Decimal.Parse(digit, CultureInfo.InvariantCulture)); return new Token(Tokens.Literal, x, y, stringValue, Decimal.Parse(digit, NumberStyles.Any, CultureInfo.InvariantCulture));
} catch (Exception) { } catch (Exception) {
errors.Error(y, x, String.Format("Can't parse decimal {0}", digit)); errors.Error(y, x, String.Format("Can't parse decimal {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue, 0m); return new Token(Tokens.Literal, x, y, stringValue, 0m);
@ -702,8 +704,11 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
string ReadCommentToEOL() string ReadCommentToEOL()
{ {
if (specialCommentHash == null) {
return ReadToEOL();
}
sb.Length = 0; sb.Length = 0;
StringBuilder curWord = specialCommentHash != null ? new StringBuilder() : null; StringBuilder curWord = new StringBuilder();
int nextChar; int nextChar;
while ((nextChar = ReaderRead()) != -1) { while ((nextChar = ReaderRead()) != -1) {
@ -714,19 +719,17 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
} }
sb.Append(ch); sb.Append(ch);
if (specialCommentHash != null) { if (IsIdentifierPart(nextChar)) {
if (Char.IsLetter(ch)) { curWord.Append(ch);
curWord.Append(ch); } else {
} else { string tag = curWord.ToString();
string tag = curWord.ToString(); curWord.Length = 0;
curWord.Length = 0; if (specialCommentHash.ContainsKey(tag)) {
if (specialCommentHash.ContainsKey(tag)) { Point p = new Point(Col, Line);
Point p = new Point(Col, Line); string comment = ch + ReadToEOL();
string comment = ch + ReadToEOL(); tagComments.Add(new TagComment(tag, comment, p, new Point(Col, Line)));
tagComments.Add(new TagComment(tag, comment, p, new Point(Col, Line))); sb.Append(comment);
sb.Append(comment); break;
break;
}
} }
} }
} }
@ -735,32 +738,46 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
void ReadSingleLineComment(CommentType commentType) void ReadSingleLineComment(CommentType commentType)
{ {
specialTracker.StartComment(commentType, new Point(Col, Line)); if (skipAllComments) {
specialTracker.AddString(ReadCommentToEOL()); SkipToEOL();
specialTracker.FinishComment(new Point(Col, Line)); } else {
specialTracker.StartComment(commentType, new Point(Col, Line));
specialTracker.AddString(ReadCommentToEOL());
specialTracker.FinishComment(new Point(Col, Line));
}
} }
void ReadMultiLineComment() void ReadMultiLineComment()
{ {
specialTracker.StartComment(CommentType.Block, new Point(Col, Line));
int nextChar; int nextChar;
while ((nextChar = ReaderRead()) != -1) { if (skipAllComments) {
char ch = (char)nextChar; while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (HandleLineEnd(ch)) { if (ch == '*' && ReaderPeek() == '/') {
specialTracker.AddChar('\n'); ReaderRead();
continue; return;
}
} }
} else {
// End of multiline comment reached ? specialTracker.StartComment(CommentType.Block, new Point(Col, Line));
if (ch == '*' && ReaderPeek() == '/') { while ((nextChar = ReaderRead()) != -1) {
ReaderRead(); char ch = (char)nextChar;
specialTracker.FinishComment(new Point(Col, Line));
return; if (HandleLineEnd(ch)) {
specialTracker.AddChar('\n');
continue;
}
// End of multiline comment reached ?
if (ch == '*' && ReaderPeek() == '/') {
ReaderRead();
specialTracker.FinishComment(new Point(Col, Line));
return;
}
specialTracker.AddChar(ch);
} }
specialTracker.AddChar(ch); specialTracker.FinishComment(new Point(Col, Line));
} }
specialTracker.FinishComment(new Point(Col, Line));
// Reached EOF before end of multiline comment. // Reached EOF before end of multiline comment.
errors.Error(Line, Col, String.Format("Reached EOF before the end of a multiline comment")); errors.Error(Line, Col, String.Format("Reached EOF before the end of a multiline comment"));
} }
@ -774,16 +791,58 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
public override void SkipCurrentBlock() public override void SkipCurrentBlock()
{ {
int braceCount = 0; int braceCount = 0;
Token t; while (curToken != null) {
while ((t = LookAhead).kind != Tokens.EOF) { if (curToken.kind == Tokens.OpenCurlyBrace) {
if (t.kind == Tokens.OpenCurlyBrace) {
++braceCount; ++braceCount;
} else if (t.kind == Tokens.CloseCurlyBrace) { } else if (curToken.kind == Tokens.CloseCurlyBrace) {
if (--braceCount < 0) if (--braceCount < 0)
return; return;
} }
NextToken(); lastToken = curToken;
curToken = curToken.next;
}
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
switch (nextChar) {
case '{':
braceCount++;
break;
case '}':
if (--braceCount < 0) {
curToken = new Token(Tokens.CloseCurlyBrace, Col, Line);
return;
}
break;
case '/':
int peek = ReaderPeek();
if (peek == '/' || peek == '*') {
ReadComment();
}
break;
case '#':
SkipToEOL();
break;
case '"':
ReadString();
break;
case '\'':
ReadChar();
break;
case '\r':
case '\n':
HandleLineEnd((char)nextChar);
break;
case '@':
int next = ReaderRead();
if (next == -1) {
errors.Error(Line, Col, String.Format("EOF after @"));
} else if (next == '"') {
ReadVerbatimString();
}
break;
}
} }
curToken = new Token(Tokens.EOF, Col, Line);
} }
} }
} }

9
src/Libraries/NRefactory/Project/Src/Lexer/ILexer.cs

@ -42,6 +42,15 @@ namespace ICSharpCode.NRefactory.Parser
set; set;
} }
/// <summary>
/// Gets/Sets if the lexer should skip adding comments to the special tracker. Set this
/// property to true to improve lexing performance.
/// </summary>
bool SkipAllComments {
get;
set;
}
/// <summary> /// <summary>
/// Returns the comments that had been read and containing tag key words. /// Returns the comments that had been read and containing tag key words.
/// </summary> /// </summary>

2
src/Libraries/NRefactory/Project/Src/Lexer/Special/SpecialTracker.cs

@ -57,7 +57,7 @@ namespace ICSharpCode.NRefactory.Parser
{ {
this.currentCommentType = commentType; this.currentCommentType = commentType;
this.startPosition = startPosition; this.startPosition = startPosition;
this.sb.Length = 0; this.sb.Length = 0;
} }
public void AddChar(char c) public void AddChar(char c)

1
src/Libraries/NRefactory/Project/Src/Parser/AbstractParser.cs

@ -59,7 +59,6 @@ namespace ICSharpCode.NRefactory.Parser
this.errors = lexer.Errors; this.errors = lexer.Errors;
this.lexer = lexer; this.lexer = lexer;
errors.SynErr = new ErrorCodeProc(SynErr); errors.SynErr = new ErrorCodeProc(SynErr);
lexer.NextToken();
} }
public abstract void Parse(); public abstract void Parse();

2044
src/Libraries/NRefactory/Project/Src/Parser/CSharp/Parser.cs

File diff suppressed because it is too large Load Diff

4
src/Libraries/NRefactory/Project/Src/Parser/CSharp/cs.ATG

@ -44,6 +44,7 @@ public void Error(string s)
public override Expression ParseExpression() public override Expression ParseExpression()
{ {
lexer.NextToken();
Expression expr; Expression expr;
Expr(out expr); Expr(out expr);
return expr; return expr;
@ -648,7 +649,8 @@ PRODUCTIONS
/*--- compilation unit: */ /*--- compilation unit: */
CS CS
(. compilationUnit = new CompilationUnit(); .) (. lexer.NextToken(); /* get the first token */
compilationUnit = new CompilationUnit(); .)
= =
{ UsingDirective } { UsingDirective }
{ IF (IsGlobalAttrTarget()) GlobalAttributeSection } { IF (IsGlobalAttrTarget()) GlobalAttributeSection }

2081
src/Libraries/NRefactory/Project/Src/Parser/VBNet/Parser.cs

File diff suppressed because it is too large Load Diff

2
src/Libraries/NRefactory/Project/Src/Parser/VBNet/VBNET.ATG

@ -62,6 +62,7 @@ public void Error(string s)
public override Expression ParseExpression() public override Expression ParseExpression()
{ {
lexer.NextToken();
Expression expr; Expression expr;
Expr(out expr); Expr(out expr);
return expr; return expr;
@ -476,6 +477,7 @@ PRODUCTIONS
VBNET VBNET
(. (.
lexer.NextToken(); // get the first token
compilationUnit = new CompilationUnit(); compilationUnit = new CompilationUnit();
withStatements = new Stack(); withStatements = new Stack();
.) = .) =

42
src/Libraries/NRefactory/Test/Lexer/CSharp/CustomLexerTests.cs

@ -32,6 +32,15 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind); Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
} }
[Test]
public void TestIdentifier()
{
ILexer lexer = GenerateLexer(new StringReader("a_Bc05"));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.Identifier, t.kind);
Assert.AreEqual("a_Bc05", t.val);
}
[Test] [Test]
public void TestSkippedEmptyBlock() public void TestSkippedEmptyBlock()
{ {
@ -55,5 +64,38 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind); Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind);
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind); Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
} }
[Test]
public void TestSkippedNonEmptyBlockWithPeek()
{
ILexer lexer = GenerateLexer(new StringReader("{ TestMethod(\"}\"); // }}}\n" +
"while(1) {break;} }+"));
Assert.AreEqual(Tokens.OpenCurlyBrace, lexer.NextToken().kind);
lexer.NextToken();
lexer.StartPeek();
lexer.Peek();
lexer.Peek();
lexer.Peek();
lexer.SkipCurrentBlock();
Assert.AreEqual(Tokens.CloseCurlyBrace, lexer.LookAhead.kind);
Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind);
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
[Test]
public void TestSkippedEmptyBlockWithPeek()
{
ILexer lexer = GenerateLexer(new StringReader("{}+"));
Assert.AreEqual(Tokens.OpenCurlyBrace, lexer.NextToken().kind);
lexer.NextToken();
lexer.StartPeek();
lexer.Peek();
lexer.Peek();
lexer.Peek();
lexer.SkipCurrentBlock();
Assert.AreEqual(Tokens.CloseCurlyBrace, lexer.LookAhead.kind);
Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind);
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
} }
} }

17
src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs

@ -27,6 +27,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
ILexer lexer = GenerateLexer(new StringReader(text)); ILexer lexer = GenerateLexer(new StringReader(text));
Token t = lexer.NextToken(); Token t = lexer.NextToken();
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind, "Tokens.EOF"); Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind, "Tokens.EOF");
Assert.AreEqual("", lexer.Errors.ErrorOutput);
return t; return t;
} }
@ -34,6 +35,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
{ {
Token t = GetSingleToken(text); Token t = GetSingleToken(text);
Assert.AreEqual(Tokens.Literal, t.kind, "Tokens.Literal"); Assert.AreEqual(Tokens.Literal, t.kind, "Tokens.Literal");
Assert.AreEqual(text, t.val, "value");
Assert.IsNotNull(t.literalValue, "literalValue is null"); Assert.IsNotNull(t.literalValue, "literalValue is null");
Assert.AreEqual(val, t.literalValue, "literalValue"); Assert.AreEqual(val, t.literalValue, "literalValue");
} }
@ -90,12 +92,27 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
public void TestDouble() public void TestDouble()
{ {
CheckToken("1.0", 1.0); CheckToken("1.0", 1.0);
CheckToken("1.1", 1.1);
CheckToken("1.1e-2", 1.1e-2);
} }
[Test] [Test]
public void TestFloat() public void TestFloat()
{ {
CheckToken("1f", 1f);
CheckToken("1.0f", 1.0f); CheckToken("1.0f", 1.0f);
CheckToken("1.1f", 1.1f);
CheckToken("1.1e-2f", 1.1e-2f);
}
[Test]
public void TestDecimal()
{
CheckToken("1m", 1m);
CheckToken("1.0m", 1.0m);
CheckToken("1.1m", 1.1m);
CheckToken("1.1e-2m", 1.1e-2m);
CheckToken("2.0e-5m", 2.0e-5m);
} }
} }
} }

1
src/Libraries/NRefactory/Test/Lexer/VBNet/LiteralsTests.cs

@ -28,6 +28,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.VB
Token t = lexer.NextToken(); Token t = lexer.NextToken();
Assert.AreEqual(Tokens.EOL, lexer.NextToken().kind, "Tokens.EOL"); Assert.AreEqual(Tokens.EOL, lexer.NextToken().kind, "Tokens.EOL");
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind, "Tokens.EOF"); Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind, "Tokens.EOF");
Assert.AreEqual("", lexer.Errors.ErrorOutput);
return t; return t;
} }

Loading…
Cancel
Save