Browse Source

C# parser: allow unicode escape sequences in identifier names (yes, "int numb\u0065r;" is valid C#!)

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/branches/2.1@2637 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
shortcuts
Daniel Grunwald 18 years ago
parent
commit
9dbd77c0d1
  1. 60
      src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
  2. 48
      src/Libraries/NRefactory/Test/Lexer/CSharp/CustomLexerTests.cs

60
src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs

@ -21,7 +21,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -21,7 +21,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
void ReadPreProcessingDirective()
{
Location start = new Location(Col - 1, Line);
string directive = ReadIdent('#');
bool canBeKeyword;
string directive = ReadIdent('#', out canBeKeyword);
string argument = ReadToEndOfLine();
this.specialTracker.AddPreprocessingDirective(directive, argument.Trim(), start, new Location(start.X + directive.Length + argument.Length, start.Y));
}
@ -80,7 +81,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -80,7 +81,8 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
if (ch == '"') {
token = ReadVerbatimString();
} else if (Char.IsLetterOrDigit(ch) || ch == '_') {
token = new Token(Tokens.Identifier, x - 1, y, ReadIdent(ch));
bool canBeKeyword;
token = new Token(Tokens.Identifier, x - 1, y, ReadIdent(ch, out canBeKeyword));
} else {
errors.Error(y, x, String.Format("Unexpected char in Lexer.Next() : {0}", ch));
continue;
@ -89,13 +91,16 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -89,13 +91,16 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
break;
default:
ch = (char)nextChar;
if (Char.IsLetter(ch) || ch == '_') {
if (Char.IsLetter(ch) || ch == '_' || ch == '\\') {
int x = Col - 1; // Col was incremented above, but we want the start of the identifier
int y = Line;
string s = ReadIdent(ch);
int keyWordToken = Keywords.GetToken(s);
if (keyWordToken >= 0) {
return new Token(keyWordToken, x, y);
bool canBeKeyword;
string s = ReadIdent(ch, out canBeKeyword);
if (canBeKeyword) {
int keyWordToken = Keywords.GetToken(s);
if (keyWordToken >= 0) {
return new Token(keyWordToken, x, y);
}
}
return new Token(Tokens.Identifier, x, y, s);
} else if (Char.IsDigit(ch)) {
@ -120,16 +125,39 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -120,16 +125,39 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
const int MAX_IDENTIFIER_LENGTH = 512;
char[] identBuffer = new char[MAX_IDENTIFIER_LENGTH];
string ReadIdent(char ch)
string ReadIdent(char ch, out bool canBeKeyword)
{
int peek;
int curPos = 1;
identBuffer[0] = ch;
while (IsIdentifierPart(peek = ReaderPeek())) {
ReaderRead();
int curPos = 0;
canBeKeyword = true;
while (true) {
if (ch == '\\') {
peek = ReaderPeek();
if (peek != 'u' && peek != 'U') {
errors.Error(Line, Col, "Identifiers can only contain unicode escape sequences");
}
canBeKeyword = false;
string surrogatePair;
ReadEscapeSequence(out ch, out surrogatePair);
if (surrogatePair != null) {
if (!char.IsLetterOrDigit(surrogatePair, 0)) {
errors.Error(Line, Col, "Unicode escape sequences in identifiers cannot be used to represent characters that are invalid in identifiers");
}
for (int i = 0; i < surrogatePair.Length - 1; i++) {
if (curPos < MAX_IDENTIFIER_LENGTH) {
identBuffer[curPos++] = surrogatePair[i];
}
}
ch = surrogatePair[surrogatePair.Length - 1];
} else {
if (!IsIdentifierPart(ch)) {
errors.Error(Line, Col, "Unicode escape sequences in identifiers cannot be used to represent characters that are invalid in identifiers");
}
}
}
if (curPos < MAX_IDENTIFIER_LENGTH) {
identBuffer[curPos++] = (char)peek;
identBuffer[curPos++] = ch;
} else {
errors.Error(Line, Col, String.Format("Identifier too long"));
while (IsIdentifierPart(ReaderPeek())) {
@ -137,6 +165,12 @@ namespace ICSharpCode.NRefactory.Parser.CSharp @@ -137,6 +165,12 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
}
break;
}
peek = ReaderPeek();
if (IsIdentifierPart(peek) || peek == '\\') {
ch = (char)ReaderRead();
} else {
break;
}
}
return new String(identBuffer, 0, curPos);
}

48
src/Libraries/NRefactory/Test/Lexer/CSharp/CustomLexerTests.cs

@ -32,41 +32,57 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp @@ -32,41 +32,57 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
Assert.AreEqual(Tokens.EOF, lexer.NextToken().kind);
}
[Test]
public void TestIdentifier()
void CheckIdentifier(string text, string actualIdentifier)
{
ILexer lexer = GenerateLexer(new StringReader("a_Bc05"));
ILexer lexer = GenerateLexer(new StringReader(text));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.Identifier, t.kind);
Assert.AreEqual("a_Bc05", t.val);
Assert.AreEqual(actualIdentifier, t.val);
t = lexer.NextToken();
Assert.AreEqual(Tokens.EOF, t.kind);
Assert.AreEqual("", lexer.Errors.ErrorOutput);
}
[Test]
public void TestIdentifier()
{
CheckIdentifier("a_Bc05", "a_Bc05");
}
[Test]
public void TestIdentifierStartingWithUnderscore()
{
ILexer lexer = GenerateLexer(new StringReader("_Bc05"));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.Identifier, t.kind);
Assert.AreEqual("_Bc05", t.val);
CheckIdentifier("_Bc05", "_Bc05");
}
[Test]
public void TestIdentifierStartingWithEscapeSequence()
{
CheckIdentifier(@"\u006cexer", "lexer");
}
[Test]
public void TestIdentifierContainingEscapeSequence()
{
CheckIdentifier(@"l\U00000065xer", "lexer");
}
[Test]
public void TestKeyWordAsIdentifier()
{
ILexer lexer = GenerateLexer(new StringReader("@int"));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.Identifier, t.kind);
Assert.AreEqual("int", t.val);
CheckIdentifier("@int", "int");
}
[Test]
public void TestKeywordWithEscapeSequenceIsIdentifier()
{
CheckIdentifier(@"i\u006et", "int");
}
[Test]
public void TestKeyWordAsIdentifierStartingWithUnderscore()
{
ILexer lexer = GenerateLexer(new StringReader("@_int"));
Token t = lexer.NextToken();
Assert.AreEqual(Tokens.Identifier, t.kind);
Assert.AreEqual("_int", t.val);
CheckIdentifier("@_int", "_int");
}
[Test]

Loading…
Cancel
Save