Browse Source

C# lexer: add support for 32-bit unicode escape sequences (including those that represent surrogate pairs)

git-svn-id: svn://svn.sharpdevelop.net/sharpdevelop/branches/2.1@2636 1ccf3a8d-04fe-1044-b7c0-cef0b8235c61
shortcuts
Daniel Grunwald 18 years ago
parent
commit
9d73b7042e
  1. 55
      src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
  2. 14
      src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs

55
src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs

@ -377,8 +377,13 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
if (ch == '\\') { if (ch == '\\') {
originalValue.Append('\\'); originalValue.Append('\\');
originalValue.Append(ReadEscapeSequence(out ch)); string surrogatePair;
sb.Append(ch); originalValue.Append(ReadEscapeSequence(out ch, out surrogatePair));
if (surrogatePair != null) {
sb.Append(surrogatePair);
} else {
sb.Append(ch);
}
} else if (ch == '\n') { } else if (ch == '\n') {
errors.Error(y, x, String.Format("No new line is allowed inside a string literal")); errors.Error(y, x, String.Format("No new line is allowed inside a string literal"));
break; break;
@ -431,14 +436,28 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
} }
char[] escapeSequenceBuffer = new char[12]; char[] escapeSequenceBuffer = new char[12];
string ReadEscapeSequence(out char ch)
/// <summary>
/// reads an escape sequence
/// </summary>
/// <param name="ch">The character represented by the escape sequence,
/// or '\0' if there was an error or the escape sequence represents a character that
/// can be represented only be a suggorate pair</param>
/// <param name="surrogatePair">Null, except when the character represented
/// by the escape sequence can only be represented by a surrogate pair (then the string
/// contains the surrogate pair)</param>
/// <returns>The escape sequence</returns>
string ReadEscapeSequence(out char ch, out string surrogatePair)
{ {
surrogatePair = null;
int nextChar = ReaderRead(); int nextChar = ReaderRead();
if (nextChar == -1) { if (nextChar == -1) {
errors.Error(Line, Col, String.Format("End of file reached inside escape sequence")); errors.Error(Line, Col, String.Format("End of file reached inside escape sequence"));
ch = '\0'; ch = '\0';
return String.Empty; return String.Empty;
} }
int number;
char c = (char)nextChar; char c = (char)nextChar;
int curPos = 1; int curPos = 1;
escapeSequenceBuffer[0] = c; escapeSequenceBuffer[0] = c;
@ -478,8 +497,9 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
break; break;
case 'u': case 'u':
case 'x': case 'x':
// 16 bit unicode character
c = (char)ReaderRead(); c = (char)ReaderRead();
int number = GetHexNumber(c); number = GetHexNumber(c);
escapeSequenceBuffer[curPos++] = c; escapeSequenceBuffer[curPos++] = c;
if (number < 0) { if (number < 0) {
@ -497,6 +517,27 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
} }
ch = (char)number; ch = (char)number;
break; break;
case 'U':
// 32 bit unicode character
number = 0;
for (int i = 0; i < 8; ++i) {
if (IsHex((char)ReaderPeek())) {
c = (char)ReaderRead();
int idx = GetHexNumber(c);
escapeSequenceBuffer[curPos++] = c;
number = 16 * number + idx;
} else {
errors.Error(Line, Col - 1, String.Format("Invalid char in literal : {0}", (char)ReaderPeek()));
break;
}
}
if (number > 0xffff) {
ch = '\0';
surrogatePair = char.ConvertFromUtf32(number);
} else {
ch = (char)number;
}
break;
default: default:
errors.Error(Line, Col, String.Format("Unexpected escape sequence : {0}", c)); errors.Error(Line, Col, String.Format("Unexpected escape sequence : {0}", c));
ch = '\0'; ch = '\0';
@ -518,7 +559,11 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
char chValue = ch; char chValue = ch;
string escapeSequence = String.Empty; string escapeSequence = String.Empty;
if (ch == '\\') { if (ch == '\\') {
escapeSequence = ReadEscapeSequence(out chValue); string surrogatePair;
escapeSequence = ReadEscapeSequence(out chValue, out surrogatePair);
if (surrogatePair != null) {
errors.Error(y, x, String.Format("The unicode character must be represented by a surrogate pair and does not fit into a System.Char"));
}
} }
unchecked { unchecked {

14
src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs

@ -124,6 +124,20 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
{ {
CheckToken(@"@""-->""""<--""", @"-->""<--"); CheckToken(@"@""-->""""<--""", @"-->""<--");
CheckToken(@"""-->\""<--""", "-->\"<--"); CheckToken(@"""-->\""<--""", "-->\"<--");
CheckToken(@"""\U00000041""", "\U00000041");
CheckToken(@"""\U00010041""", "\U00010041");
}
[Test]
public void TestCharLiteral()
{
CheckToken(@"'a'", 'a');
CheckToken(@"'\u0041'", '\u0041');
CheckToken(@"'\x41'", '\x41');
CheckToken(@"'\x041'", '\x041');
CheckToken(@"'\x0041'", '\x0041');
CheckToken(@"'\U00000041'", '\U00000041');
} }
} }
} }

Loading…
Cancel
Save