diff --git a/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs b/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs index fc392520b3..19e993c9ff 100644 --- a/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs +++ b/src/Libraries/NRefactory/Project/Src/Lexer/CSharp/Lexer.cs @@ -377,8 +377,13 @@ namespace ICSharpCode.NRefactory.Parser.CSharp if (ch == '\\') { originalValue.Append('\\'); - originalValue.Append(ReadEscapeSequence(out ch)); - sb.Append(ch); + string surrogatePair; + originalValue.Append(ReadEscapeSequence(out ch, out surrogatePair)); + if (surrogatePair != null) { + sb.Append(surrogatePair); + } else { + sb.Append(ch); + } } else if (ch == '\n') { errors.Error(y, x, String.Format("No new line is allowed inside a string literal")); break; @@ -431,14 +436,28 @@ namespace ICSharpCode.NRefactory.Parser.CSharp } char[] escapeSequenceBuffer = new char[12]; - string ReadEscapeSequence(out char ch) + + /// + /// reads an escape sequence + /// + /// The character represented by the escape sequence, + /// or '\0' if there was an error or the escape sequence represents a character that + /// can be represented only be a suggorate pair + /// Null, except when the character represented + /// by the escape sequence can only be represented by a surrogate pair (then the string + /// contains the surrogate pair) + /// The escape sequence + string ReadEscapeSequence(out char ch, out string surrogatePair) { + surrogatePair = null; + int nextChar = ReaderRead(); if (nextChar == -1) { errors.Error(Line, Col, String.Format("End of file reached inside escape sequence")); ch = '\0'; return String.Empty; } + int number; char c = (char)nextChar; int curPos = 1; escapeSequenceBuffer[0] = c; @@ -478,8 +497,9 @@ namespace ICSharpCode.NRefactory.Parser.CSharp break; case 'u': case 'x': + // 16 bit unicode character c = (char)ReaderRead(); - int number = GetHexNumber(c); + number = GetHexNumber(c); escapeSequenceBuffer[curPos++] = c; if (number < 0) { @@ -497,6 +517,27 @@ namespace ICSharpCode.NRefactory.Parser.CSharp } ch = (char)number; break; + case 'U': + // 32 bit unicode character + number = 0; + for (int i = 0; i < 8; ++i) { + if (IsHex((char)ReaderPeek())) { + c = (char)ReaderRead(); + int idx = GetHexNumber(c); + escapeSequenceBuffer[curPos++] = c; + number = 16 * number + idx; + } else { + errors.Error(Line, Col - 1, String.Format("Invalid char in literal : {0}", (char)ReaderPeek())); + break; + } + } + if (number > 0xffff) { + ch = '\0'; + surrogatePair = char.ConvertFromUtf32(number); + } else { + ch = (char)number; + } + break; default: errors.Error(Line, Col, String.Format("Unexpected escape sequence : {0}", c)); ch = '\0'; @@ -518,7 +559,11 @@ namespace ICSharpCode.NRefactory.Parser.CSharp char chValue = ch; string escapeSequence = String.Empty; if (ch == '\\') { - escapeSequence = ReadEscapeSequence(out chValue); + string surrogatePair; + escapeSequence = ReadEscapeSequence(out chValue, out surrogatePair); + if (surrogatePair != null) { + errors.Error(y, x, String.Format("The unicode character must be represented by a surrogate pair and does not fit into a System.Char")); + } } unchecked { diff --git a/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs b/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs index 7b53e21112..969c5a8cdc 100644 --- a/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs +++ b/src/Libraries/NRefactory/Test/Lexer/CSharp/NumberLexerTest.cs @@ -124,6 +124,20 @@ namespace ICSharpCode.NRefactory.Tests.Lexer.CSharp { CheckToken(@"@""-->""""<--""", @"-->""<--"); CheckToken(@"""-->\""<--""", "-->\"<--"); + + CheckToken(@"""\U00000041""", "\U00000041"); + CheckToken(@"""\U00010041""", "\U00010041"); + } + + [Test] + public void TestCharLiteral() + { + CheckToken(@"'a'", 'a'); + CheckToken(@"'\u0041'", '\u0041'); + CheckToken(@"'\x41'", '\x41'); + CheckToken(@"'\x041'", '\x041'); + CheckToken(@"'\x0041'", '\x0041'); + CheckToken(@"'\U00000041'", '\U00000041'); } } }