Browse Source

Improved C# lexer performance.

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/trunk@907 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
shortcuts
Daniel Grunwald 20 years ago
parent
commit
c24a108bb4
  1. 6
      src/Libraries/NRefactory/Project/NRefactory.csproj
  2. 39
      src/Libraries/NRefactory/Project/Src/Lexer/AbstractLexer.cs
  3. 147
      src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
  4. 9
      src/Libraries/NRefactory/Project/Src/Lexer/ILexer.cs
  5. 2
      src/Libraries/NRefactory/Project/Src/Lexer/Special/SpecialTracker.cs
  6. 1
      src/Libraries/NRefactory/Project/Src/Parser/AbstractParser.cs
  7. 2044
      src/Libraries/NRefactory/Project/Src/Parser/CSharp/Parser.cs
  8. 4
      src/Libraries/NRefactory/Project/Src/Parser/CSharp/cs.ATG
  9. 2081
      src/Libraries/NRefactory/Project/Src/Parser/VBNet/Parser.cs
  10. 2
      src/Libraries/NRefactory/Project/Src/Parser/VBNet/VBNET.ATG
  11. 42
      src/Libraries/NRefactory/Test/Lexer/CSharp/CustomLexerTests.cs
  12. 17
      src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs
  13. 1
      src/Libraries/NRefactory/Test/Lexer/VBNet/LiteralsTests.cs

6
src/Libraries/NRefactory/Project/NRefactory.csproj

@ -15,6 +15,12 @@ @@ -15,6 +15,12 @@
<OutputType>Library</OutputType>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>Resources\ICSharpCode.NRefactory.snk</AssemblyOriginatorKeyFile>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
<RegisterForComInterop>False</RegisterForComInterop>
<GenerateSerializationAssemblies>Auto</GenerateSerializationAssemblies>
<BaseAddress>4194304</BaseAddress>
<PlatformTarget>AnyCPU</PlatformTarget>
<FileAlignment>4096</FileAlignment>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<Optimize>False</Optimize>

39
src/Libraries/NRefactory/Project/Src/Lexer/AbstractLexer.cs

@ -37,6 +37,17 @@ namespace ICSharpCode.NRefactory.Parser @@ -37,6 +37,17 @@ namespace ICSharpCode.NRefactory.Parser
// used for the original value of strings (with escape sequences).
protected StringBuilder originalValue = new StringBuilder();
protected bool skipAllComments = false;
public bool SkipAllComments {
get {
return skipAllComments;
}
set {
skipAllComments = value;
}
}
protected int Line {
get {
return line;
@ -170,7 +181,7 @@ namespace ICSharpCode.NRefactory.Parser @@ -170,7 +181,7 @@ namespace ICSharpCode.NRefactory.Parser
if (curToken == null) {
curToken = Next();
specialTracker.InformToken(curToken.kind);
// Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
//Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
return curToken;
}
@ -184,7 +195,7 @@ namespace ICSharpCode.NRefactory.Parser @@ -184,7 +195,7 @@ namespace ICSharpCode.NRefactory.Parser
}
curToken = curToken.next;
// Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
//Console.WriteLine(ICSharpCode.NRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
return curToken;
}
@ -201,9 +212,17 @@ namespace ICSharpCode.NRefactory.Parser @@ -201,9 +212,17 @@ namespace ICSharpCode.NRefactory.Parser
throw new NotSupportedException();
}
protected bool IsIdentifierPart(char ch)
protected bool IsIdentifierPart(int ch)
{
return Char.IsLetterOrDigit(ch) || ch == '_';
// char.IsLetter is slow, so optimize for raw ASCII
if (ch < 48) return false; // 48 = '0'
if (ch <= 57) return true; // 57 = '9'
if (ch < 65) return false; // 65 = 'A'
if (ch <= 90) return true; // 90 = 'Z'
if (ch == 95) return true; // 95 = '_'
if (ch < 97) return false; // 97 = 'a'
if (ch <= 122) return true; // 97 = 'z'
return char.IsLetter((char)ch); // accept unicode letters
}
protected bool IsHex(char digit)
@ -250,6 +269,16 @@ namespace ICSharpCode.NRefactory.Parser @@ -250,6 +269,16 @@ namespace ICSharpCode.NRefactory.Parser
return false;
}
protected void SkipToEOL()
{
int nextChar;
while ((nextChar = reader.Read()) != -1) {
if (HandleLineEnd((char)nextChar)) {
break;
}
}
}
protected string ReadToEOL()
{
sb.Length = 0;
@ -259,7 +288,7 @@ namespace ICSharpCode.NRefactory.Parser @@ -259,7 +288,7 @@ namespace ICSharpCode.NRefactory.Parser
// Return read string, if EOL is reached
if (HandleLineEnd(ch)) {
return sb.ToString();;
return sb.ToString();
}
sb.Append(ch);

147
src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs

@ -27,6 +27,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -27,6 +27,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (ch == ' ' || ch == '\t')
continue;
if (Char.IsWhiteSpace(ch)) {
HandleLineEnd(ch);
@ -112,17 +114,17 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -112,17 +114,17 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
string ReadIdent(char ch)
{
int peek;
int curPos = 1;
identBuffer[0] = ch;
int peek;
while ((peek = ReaderPeek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) {
while (IsIdentifierPart(peek = ReaderPeek())) {
ReaderRead();
if (curPos < MAX_IDENTIFIER_LENGTH) {
identBuffer[curPos++] = ch;
identBuffer[curPos++] = (char)peek;
} else {
errors.Error(Line, Col, String.Format("Identifier too long"));
while ((peek = ReaderPeek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) {
while (IsIdentifierPart(ReaderPeek())) {
ReaderRead();
}
break;
@ -160,7 +162,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -160,7 +162,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
ReaderRead(); // skip 'x'
sb.Length = 0; // Remove '0' from 0x prefix from the stringvalue
while (IsHex((char)ReaderPeek())) {
sb.Append(Char.ToUpper((char)ReaderRead(), CultureInfo.InvariantCulture));
sb.Append((char)ReaderRead());
}
if (sb.Length == 0) {
sb.Append('0'); // dummy value to prevent exception
@ -176,7 +178,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -176,7 +178,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
peek = (char)ReaderPeek();
}
Token nextToken = null; // if we accedently read a 'dot'
Token nextToken = null; // if we accidently read a 'dot'
if (peek == '.') { // read floating point number
ReaderRead();
peek = (char)ReaderPeek();
@ -258,7 +260,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -258,7 +260,7 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
}
if (isdecimal) {
try {
return new Token(Tokens.Literal, x, y, stringValue, Decimal.Parse(digit, CultureInfo.InvariantCulture));
return new Token(Tokens.Literal, x, y, stringValue, Decimal.Parse(digit, NumberStyles.Any, CultureInfo.InvariantCulture));
} catch (Exception) {
errors.Error(y, x, String.Format("Can't parse decimal {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue, 0m);
@ -702,8 +704,11 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -702,8 +704,11 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
string ReadCommentToEOL()
{
if (specialCommentHash == null) {
return ReadToEOL();
}
sb.Length = 0;
StringBuilder curWord = specialCommentHash != null ? new StringBuilder() : null;
StringBuilder curWord = new StringBuilder();
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
@ -714,19 +719,17 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -714,19 +719,17 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
}
sb.Append(ch);
if (specialCommentHash != null) {
if (Char.IsLetter(ch)) {
curWord.Append(ch);
} else {
string tag = curWord.ToString();
curWord.Length = 0;
if (specialCommentHash.ContainsKey(tag)) {
Point p = new Point(Col, Line);
string comment = ch + ReadToEOL();
tagComments.Add(new TagComment(tag, comment, p, new Point(Col, Line)));
sb.Append(comment);
break;
}
if (IsIdentifierPart(nextChar)) {
curWord.Append(ch);
} else {
string tag = curWord.ToString();
curWord.Length = 0;
if (specialCommentHash.ContainsKey(tag)) {
Point p = new Point(Col, Line);
string comment = ch + ReadToEOL();
tagComments.Add(new TagComment(tag, comment, p, new Point(Col, Line)));
sb.Append(comment);
break;
}
}
}
@ -735,32 +738,46 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -735,32 +738,46 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
void ReadSingleLineComment(CommentType commentType)
{
specialTracker.StartComment(commentType, new Point(Col, Line));
specialTracker.AddString(ReadCommentToEOL());
specialTracker.FinishComment(new Point(Col, Line));
if (skipAllComments) {
SkipToEOL();
} else {
specialTracker.StartComment(commentType, new Point(Col, Line));
specialTracker.AddString(ReadCommentToEOL());
specialTracker.FinishComment(new Point(Col, Line));
}
}
void ReadMultiLineComment()
{
specialTracker.StartComment(CommentType.Block, new Point(Col, Line));
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (HandleLineEnd(ch)) {
specialTracker.AddChar('\n');
continue;
if (skipAllComments) {
while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (ch == '*' && ReaderPeek() == '/') {
ReaderRead();
return;
}
}
// End of multiline comment reached ?
if (ch == '*' && ReaderPeek() == '/') {
ReaderRead();
specialTracker.FinishComment(new Point(Col, Line));
return;
} else {
specialTracker.StartComment(CommentType.Block, new Point(Col, Line));
while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (HandleLineEnd(ch)) {
specialTracker.AddChar('\n');
continue;
}
// End of multiline comment reached ?
if (ch == '*' && ReaderPeek() == '/') {
ReaderRead();
specialTracker.FinishComment(new Point(Col, Line));
return;
}
specialTracker.AddChar(ch);
}
specialTracker.AddChar(ch);
specialTracker.FinishComment(new Point(Col, Line));
}
specialTracker.FinishComment(new Point(Col, Line));
// Reached EOF before end of multiline comment.
errors.Error(Line, Col, String.Format("Reached EOF before the end of a multiline comment"));
}
@ -774,16 +791,58 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -774,16 +791,58 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
public override void SkipCurrentBlock()
{
int braceCount = 0;
Token t;
while ((t = LookAhead).kind != Tokens.EOF) {
if (t.kind == Tokens.OpenCurlyBrace) {
while (curToken != null) {
if (curToken.kind == Tokens.OpenCurlyBrace) {
++braceCount;
} else if (t.kind == Tokens.CloseCurlyBrace) {
} else if (curToken.kind == Tokens.CloseCurlyBrace) {
if (--braceCount < 0)
return;
}
NextToken();
lastToken = curToken;
curToken = curToken.next;
}
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
switch (nextChar) {
case '{':
braceCount++;
break;
case '}':
if (--braceCount < 0) {
curToken = new Token(Tokens.CloseCurlyBrace, Col, Line);
return;
}
break;
case '/':
int peek = ReaderPeek();
if (peek == '/' || peek == '*') {
ReadComment();
}
break;
case '#':
SkipToEOL();
break;
case '"':
ReadString();
break;
case '\'':
ReadChar();
break;
case '\r':
case '\n':
HandleLineEnd((char)nextChar);
break;
case '@':
int next = ReaderRead();
if (next == -1) {
errors.Error(Line, Col, String.Format("EOF after @"));
} else if (next == '"') {
ReadVerbatimString();
}
break;
}
}
curToken = new Token(Tokens.EOF, Col, Line);
}
}
}

9
src/Libraries/NRefactory/Project/Src/Lexer/ILexer.cs

@ -42,6 +42,15 @@ namespace ICSharpCode.NRefactory.Parser @@ -42,6 +42,15 @@ namespace ICSharpCode.NRefactory.Parser
set;
}
/// <summary>
/// Gets/Sets if the lexer should skip adding comments to the special tracker. Set this
/// property to true to improve lexing performance.
/// </summary>
bool SkipAllComments {
get;
set;
}
/// <summary>
/// Returns the comments that had been read and containing tag key words.
/// </summary>

2
src/Libraries/NRefactory/Project/Src/Lexer/Special/SpecialTracker.cs

@ -57,7 +57,7 @@ namespace ICSharpCode.NRefactory.Parser @@ -57,7 +57,7 @@ namespace ICSharpCode.NRefactory.Parser
{
this.currentCommentType = commentType;
this.startPosition = startPosition;
this.sb.Length = 0;
this.sb.Length = 0;
}
public void AddChar(char c)

1
src/Libraries/NRefactory/Project/Src/Parser/AbstractParser.cs

@ -59,7 +59,6 @@ namespace ICSharpCode.NRefactory.Parser @@ -59,7 +59,6 @@ namespace ICSharpCode.NRefactory.Parser
this.errors = lexer.Errors;
this.lexer = lexer;
errors.SynErr = new ErrorCodeProc(SynErr);
lexer.NextToken();
}
public abstract void Parse();

2044
src/Libraries/NRefactory/Project/Src/Parser/CSharp/Parser.cs

File diff suppressed because it is too large Load Diff

4
src/Libraries/NRefactory/Project/Src/Parser/CSharp/cs.ATG

@ -44,6 +44,7 @@ public void Error(string s) @@ -44,6 +44,7 @@ public void Error(string s)
public override Expression ParseExpression()
{
lexer.NextToken();
Expression expr;
Expr(out expr);
return expr;
@ -648,7 +649,8 @@ PRODUCTIONS @@ -648,7 +649,8 @@ PRODUCTIONS
/*--- compilation unit: */
CS
(. compilationUnit = new CompilationUnit(); .)
(. lexer.NextToken(); /* get the first token */
compilationUnit = new CompilationUnit(); .)
=
{ UsingDirective }
{ IF (IsGlobalAttrTarget()) GlobalAttributeSection }

2081
src/Libraries/NRefactory/Project/Src/Parser/VBNet/Parser.cs

File diff suppressed because it is too large Load Diff

2
src/Libraries/NRefactory/Project/Src/Parser/VBNet/VBNET.ATG

@ -62,6 +62,7 @@ public void Error(string s) @@ -62,6 +62,7 @@ public void Error(string s)
public override Expression ParseExpression()
{
lexer.NextToken();
Expression expr;
Expr(out expr);
return expr;
@ -476,6 +477,7 @@ PRODUCTIONS @@ -476,6 +477,7 @@ PRODUCTIONS
VBNET
(.
lexer.NextToken(); // get the first token
compilationUnit = new CompilationUnit();
withStatements = new Stack();
.) =

42
src/Libraries/NRefactory/Test/Lexer/CSharp/CustomLexerTests.cs

@ -32,6 +32,15 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp @@ -32,6 +32,15 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
[Test]
public void TestIdentifier()
{
ILexer lexer = GenerateLexer(new StringReader("a_Bc05"));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.Identifier, t.kind);
Assert.AreEqual("a_Bc05", t.val);
}
[Test]
public void TestSkippedEmptyBlock()
{
@ -55,5 +64,38 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp @@ -55,5 +64,38 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind);
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
[Test]
public void TestSkippedNonEmptyBlockWithPeek()
{
ILexer lexer = GenerateLexer(new StringReader("{ TestMethod(\"}\"); // }}}\n" +
"while(1) {break;} }+"));
Assert.AreEqual(Tokens.OpenCurlyBrace, lexer.NextToken().kind);
lexer.NextToken();
lexer.StartPeek();
lexer.Peek();
lexer.Peek();
lexer.Peek();
lexer.SkipCurrentBlock();
Assert.AreEqual(Tokens.CloseCurlyBrace, lexer.LookAhead.kind);
Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind);
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
[Test]
public void TestSkippedEmptyBlockWithPeek()
{
ILexer lexer = GenerateLexer(new StringReader("{}+"));
Assert.AreEqual(Tokens.OpenCurlyBrace, lexer.NextToken().kind);
lexer.NextToken();
lexer.StartPeek();
lexer.Peek();
lexer.Peek();
lexer.Peek();
lexer.SkipCurrentBlock();
Assert.AreEqual(Tokens.CloseCurlyBrace, lexer.LookAhead.kind);
Assert.AreEqual(Tokens.Plus, lexer.NextToken().kind);
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
}
}

17
src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs

@ -27,6 +27,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp @@ -27,6 +27,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
ILexer lexer = GenerateLexer(new StringReader(text));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind, "Tokens.EOF");
Assert.AreEqual("", lexer.Errors.ErrorOutput);
return t;
}
@ -34,6 +35,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp @@ -34,6 +35,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
{
Token t = GetSingleToken(text);
Assert.AreEqual(Tokens.Literal, t.kind, "Tokens.Literal");
Assert.AreEqual(text, t.val, "value");
Assert.IsNotNull(t.literalValue, "literalValue is null");
Assert.AreEqual(val, t.literalValue, "literalValue");
}
@ -90,12 +92,27 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp @@ -90,12 +92,27 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
public void TestDouble()
{
CheckToken("1.0", 1.0);
CheckToken("1.1", 1.1);
CheckToken("1.1e-2", 1.1e-2);
}
[Test]
public void TestFloat()
{
CheckToken("1f", 1f);
CheckToken("1.0f", 1.0f);
CheckToken("1.1f", 1.1f);
CheckToken("1.1e-2f", 1.1e-2f);
}
[Test]
public void TestDecimal()
{
CheckToken("1m", 1m);
CheckToken("1.0m", 1.0m);
CheckToken("1.1m", 1.1m);
CheckToken("1.1e-2m", 1.1e-2m);
CheckToken("2.0e-5m", 2.0e-5m);
}
}
}

1
src/Libraries/NRefactory/Test/Lexer/VBNet/LiteralsTests.cs

@ -28,6 +28,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.VB @@ -28,6 +28,7 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.VB
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.EOL, lexer.NextToken().kind, "Tokens.EOL");
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind, "Tokens.EOF");
Assert.AreEqual("", lexer.Errors.ErrorOutput);
return t;
}

Loading…
Cancel
Save