diff --git a/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs b/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
index fc392520b3..19e993c9ff 100644
--- a/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
+++ b/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs
@@ -377,8 +377,13 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
if (ch == '\\') {
originalValue.Append('\\');
- originalValue.Append(ReadEscapeSequence(out ch));
- sb.Append(ch);
+ string surrogatePair;
+ originalValue.Append(ReadEscapeSequence(out ch, out surrogatePair));
+ if (surrogatePair != null) {
+ sb.Append(surrogatePair);
+ } else {
+ sb.Append(ch);
+ }
} else if (ch == '\n') {
errors.Error(y, x, String.Format("No new line is allowed inside a string literal"));
break;
@@ -431,14 +436,28 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
}
char[] escapeSequenceBuffer = new char[12];
- string ReadEscapeSequence(out char ch)
+
+ ///
+ /// reads an escape sequence
+ ///
+ /// The character represented by the escape sequence,
+ /// or '\0' if there was an error or the escape sequence represents a character that
+ /// can be represented only be a suggorate pair
+ /// Null, except when the character represented
+ /// by the escape sequence can only be represented by a surrogate pair (then the string
+ /// contains the surrogate pair)
+ /// The escape sequence
+ string ReadEscapeSequence(out char ch, out string surrogatePair)
{
+ surrogatePair = null;
+
int nextChar = ReaderRead();
if (nextChar == -1) {
errors.Error(Line, Col, String.Format("End of file reached inside escape sequence"));
ch = '\0';
return String.Empty;
}
+ int number;
char c = (char)nextChar;
int curPos = 1;
escapeSequenceBuffer[0] = c;
@@ -478,8 +497,9 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
break;
case 'u':
case 'x':
+ // 16 bit unicode character
c = (char)ReaderRead();
- int number = GetHexNumber(c);
+ number = GetHexNumber(c);
escapeSequenceBuffer[curPos++] = c;
if (number < 0) {
@@ -497,6 +517,27 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
}
ch = (char)number;
break;
+ case 'U':
+ // 32 bit unicode character
+ number = 0;
+ for (int i = 0; i < 8; ++i) {
+ if (IsHex((char)ReaderPeek())) {
+ c = (char)ReaderRead();
+ int idx = GetHexNumber(c);
+ escapeSequenceBuffer[curPos++] = c;
+ number = 16 * number + idx;
+ } else {
+ errors.Error(Line, Col - 1, String.Format("Invalid char in literal : {0}", (char)ReaderPeek()));
+ break;
+ }
+ }
+ if (number > 0xffff) {
+ ch = '\0';
+ surrogatePair = char.ConvertFromUtf32(number);
+ } else {
+ ch = (char)number;
+ }
+ break;
default:
errors.Error(Line, Col, String.Format("Unexpected escape sequence : {0}", c));
ch = '\0';
@@ -518,7 +559,11 @@ namespace ICSharpCode.NRefactory.Parser.CSharp
char chValue = ch;
string escapeSequence = String.Empty;
if (ch == '\\') {
- escapeSequence = ReadEscapeSequence(out chValue);
+ string surrogatePair;
+ escapeSequence = ReadEscapeSequence(out chValue, out surrogatePair);
+ if (surrogatePair != null) {
+ errors.Error(y, x, String.Format("The unicode character must be represented by a surrogate pair and does not fit into a System.Char"));
+ }
}
unchecked {
diff --git a/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs b/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs
index 7b53e21112..969c5a8cdc 100644
--- a/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs
+++ b/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs
@@ -124,6 +124,20 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp
{
CheckToken(@"@""-->""""<--""", @"-->""<--");
CheckToken(@"""-->\""<--""", "-->\"<--");
+
+ CheckToken(@"""\U00000041""", "\U00000041");
+ CheckToken(@"""\U00010041""", "\U00010041");
+ }
+
+ [Test]
+ public void TestCharLiteral()
+ {
+ CheckToken(@"'a'", 'a');
+ CheckToken(@"'\u0041'", '\u0041');
+ CheckToken(@"'\x41'", '\x41');
+ CheckToken(@"'\x041'", '\x041');
+ CheckToken(@"'\x0041'", '\x0041');
+ CheckToken(@"'\U00000041'", '\U00000041');
}
}
}