Properly fix the integer conversion mess.

It's not actually all that hard once you realize: a) the ECMA-335 specification is incorrect, whether to sign- or zero-extend does not always depends on the source type b) the C# compilator generates completely weird code when casting between (U)IntPtr and integers. Map all IntPtr casts to a few semi-sane cases, and we can obtain the correct semantics without introducing any helper methods.
9 years ago · d07298b747
6 changed files with 161 additions and 104 deletions
--- a/ICSharpCode.Decompiler/CSharp/CSharpDecompiler.cs
+++ b/ICSharpCode.Decompiler/CSharp/CSharpDecompiler.cs
@ -485,11 +485,6 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -485,11 +485,6 @@ namespace ICSharpCode.Decompiler.CSharp
 			}
 		}
 		
-		/// <summary>
-		/// Whether we need to generate helper methods for u4->i or u->i8 conversions.
-		/// </summary>
-		internal bool needs_conv_i_ovf_un, needs_conv_i8_ovf_un;
-		
 		EntityDeclaration DoDecompile(ITypeDefinition typeDef, ITypeResolveContext decompilationContext)
 		{
 			Debug.Assert(decompilationContext.CurrentTypeDefinition == typeDef);
@ -547,30 +542,6 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -547,30 +542,6 @@ namespace ICSharpCode.Decompiler.CSharp
 						section.Remove();
 				}
 			}
-			if (needs_conv_i_ovf_un) {
-				typeDecl.Members.Add(GenerateConvHelper(
-					"conv_i_ovf_un", KnownTypeCode.UInt32, KnownTypeCode.IntPtr, typeSystemAstBuilder,
-					// on 32-bit, 'conv.ovf u4->i' is like 'conv.ovf u4->i4'
-					new CheckedExpression(new CastExpression(
-						new NRefactory.CSharp.PrimitiveType("int"),
-						new IdentifierExpression("input")
-					)),
-					// on 64-bit, 'conv.ovf u4->i' is like 'conv.ovf u4->i8'
-					new IdentifierExpression("input")
-				));
-				needs_conv_i_ovf_un = false;
-			}
-			if (needs_conv_i8_ovf_un) {
-				typeDecl.Members.Add(GenerateConvHelper(
-					"conv_i8_ovf_un", KnownTypeCode.UIntPtr, KnownTypeCode.Int64, typeSystemAstBuilder,
-					// on 32-bit, 'conv.ovf u->i8' is like 'conv.ovf u4->i8'
-					new IdentifierExpression("input"),
-					// on 64-bit, 'conv.ovf u->i8' is like 'conv.ovf u8->i8'
-					new IdentifierExpression("input")
-				));
-				needs_conv_i8_ovf_un = false;
-			}
-			
 			return typeDecl;
 		}

@ -675,9 +646,6 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -675,9 +646,6 @@ namespace ICSharpCode.Decompiler.CSharp
 			var statementBuilder = new StatementBuilder(decompilationContext, method);
 			var body = statementBuilder.ConvertAsBlock(function.Body);
 			
-			needs_conv_i_ovf_un |= statementBuilder.exprBuilder.needs_conv_i_ovf_un;
-			needs_conv_i8_ovf_un |= statementBuilder.exprBuilder.needs_conv_i8_ovf_un;
-
 			entityDecl.AddChild(body, Roles.Body);
 		}

--- a/ICSharpCode.Decompiler/CSharp/ExpressionBuilder.cs
+++ b/ICSharpCode.Decompiler/CSharp/ExpressionBuilder.cs
@ -609,11 +609,6 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -609,11 +609,6 @@ namespace ICSharpCode.Decompiler.CSharp
 			return result;
 		}
 		
-		/// <summary>
-		/// Whether we need to generate helper methods for u4->i or u->i8 conversions.
-		/// </summary>
-		internal bool needs_conv_i_ovf_un, needs_conv_i8_ovf_un;
-		
 		protected internal override TranslatedExpression VisitConv(Conv inst)
 		{
 			var arg = Translate(inst.Argument);
@ -627,7 +622,9 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -627,7 +622,9 @@ namespace ICSharpCode.Decompiler.CSharp
 			
 			// Also, we need to be very careful with regards to the conversions we emit:
 			// In C#, zero vs. sign-extension depends on the input type,
-			// but in the ILAst Conv instruction it depends on the output type.
+			// but in the ILAst conv instruction it depends on the output type.
+			// However, in the conv.ovf instructions, the .NET runtime behavior seems to depend on the input type,
+			// in violation of the ECMA-335 spec!
 			
 			if (inst.CheckForOverflow || inst.Kind == ConversionKind.IntToFloat) {
 				// We need to first convert the argument to the expected sign.
@ -636,52 +633,11 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -636,52 +633,11 @@ namespace ICSharpCode.Decompiler.CSharp
 				if (arg.Type.GetSize() > inputStackType.GetSize() || arg.Type.GetSign() != inst.InputSign) {
 					arg = arg.ConvertTo(compilation.FindType(inputStackType.ToKnownTypeCode(inst.InputSign)), this);
 				}
-				if (inst.Kind == ConversionKind.ZeroExtend) {
-					// Zero extension with overflow check -> throws if the input value is negative.
-					// C# sign/zero extension depends on the source type, so we can't directly
-					// cast from the signed input type to the target type.
-					// Instead, perform a checked cast to the unsigned version of the input type
-					// (to throw an exception for negative values).
-					// Then the actual zero extension can be left to our parent instruction
-					// (due to the ExpressionBuilder post-condition being flexible with regards to the integer type width).
-					return arg.ConvertTo(compilation.FindType(inputStackType.ToKnownTypeCode(Sign.Unsigned)), this, true)
-						.WithILInstruction(inst);
-				} else if (inst.Kind == ConversionKind.SignExtend) {
-					// Sign extension with overflow check.
-					// Sign-extending conversions can fail when the "larger type" isn't actually larger, that is, in exactly two cases:
-					// * U4 -> I  on 32-bit
-					// * U -> I8 on 64-bit
-					if (inst.InputSign == Sign.Unsigned && inputStackType == StackType.I4 && inst.TargetType == IL.PrimitiveType.I) {
-						// conv u4->i
-						// on 32-bit, this is a sign-changing conversion with overflow-check
-						// on 64-bit, this is a sign extension
-						needs_conv_i_ovf_un = true;
-						arg = arg.ConvertTo(compilation.FindType(KnownTypeCode.UInt32), this);
-						return new InvocationExpression(new IdentifierExpression("conv_i_ovf_un"), arg.Expression)
-							.WithRR(new ResolveResult(compilation.FindType(KnownTypeCode.IntPtr)))
-							.WithILInstruction(inst);
-					} else if (inst.InputSign == Sign.Unsigned && (inputStackType == StackType.I && inst.TargetType == IL.PrimitiveType.I8)) {
-						// conv u->i8
-						// on 32-bit, this is a sign extension
-						// on 64-bit, this is a sign-changing conversion with overflow-check
-						needs_conv_i8_ovf_un = true;
-						arg = arg.ConvertTo(compilation.FindType(KnownTypeCode.UIntPtr), this);
-						return new InvocationExpression(new IdentifierExpression("conv_i8_ovf_un"), arg.Expression)
-							.WithRR(new ResolveResult(compilation.FindType(KnownTypeCode.IntPtr)))
-							.WithILInstruction(inst);
-					} else {
-						// The overflow check cannot actually fail, so we can take the simple solution of performing
-						// an unchecked cast to signed int and let the parent instruction handle the actual sign extension.
-						return arg.ConvertTo(compilation.FindType(inputStackType.ToKnownTypeCode(Sign.Signed)), this)
-							.WithILInstruction(inst);
-					}
-				} else {
-					// Size-preserving sign-changing conversion, or int-to-float conversion:
-					// We can directly cast to the target type.
+				// Because casts with overflow check match C# semantics (zero/sign-extension depends on source type),
+				// we can just directly cast to the target type.
 				return arg.ConvertTo(compilation.FindType(inst.TargetType.ToKnownTypeCode()), this, true)
 					.WithILInstruction(inst);
 			}
-			}
 			
 			switch (inst.Kind) {
 				case ConversionKind.StopGCTracking:
--- a/ICSharpCode.Decompiler/CSharp/TranslatedExpression.cs
+++ b/ICSharpCode.Decompiler/CSharp/TranslatedExpression.cs
@ -164,6 +164,9 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -164,6 +164,9 @@ namespace ICSharpCode.Decompiler.CSharp
 		/// sign of the source type.
 		/// This fits with the ExpressionBuilder's post-condition, so e.g. an assignment can simply
 		/// call <c>Translate(stloc.Value).ConvertTo(stloc.Variable.Type)</c> and have the overall C# semantics match the IL semantics.
+		/// 
+		/// From the caller's perspective, IntPtr/UIntPtr behave like normal C# integers except that they have native int size.
+		/// All the special cases necessary to make IntPtr/UIntPtr behave sanely are handled internally in ConvertTo().
 		/// </remarks>
 		public TranslatedExpression ConvertTo(IType targetType, ExpressionBuilder expressionBuilder, bool checkForOverflow = false)
 		{
@ -179,31 +182,66 @@ namespace ICSharpCode.Decompiler.CSharp
				@@ -179,31 +182,66 @@ namespace ICSharpCode.Decompiler.CSharp
 					LdcI4(compilation, 0).ConvertTo(targetType, expressionBuilder, checkForOverflow)
 				).WithoutILInstruction().WithRR(new ResolveResult(targetType));
 			}
-			// Special-case IntPtr and UIntPtr: they behave slightly weird, e.g. converting to them always checks for overflow,
-			// but converting from them never checks for overflow.
-			if (checkForOverflow && type.IsKnownType(KnownTypeCode.IntPtr) && !targetType.IsKnownType(KnownTypeCode.Int64)) {
-				// Convert through `long` instead.
+			if (targetType.IsKnownType(KnownTypeCode.Boolean)) {
+				// convert to boolean through byte, to simulate the truncation to 8 bits
+				return this.ConvertTo(compilation.FindType(KnownTypeCode.Byte), expressionBuilder, checkForOverflow)
+					.ConvertToBoolean(expressionBuilder);
+			}
+			
+			// Special-case IntPtr and UIntPtr: they behave extremely weird, see IntPtr.txt for details.
+			if (type.IsKnownType(KnownTypeCode.IntPtr)) { // Conversion from IntPtr
+				// Direct cast only works correctly for IntPtr -> long.
+				// IntPtr -> int works correctly only in checked context.
+				// Everything else can be worked around by casting via long.
+				if (!(targetType.IsKnownType(KnownTypeCode.Int64) || checkForOverflow && targetType.IsKnownType(KnownTypeCode.Int32))) {
 					return this.ConvertTo(compilation.FindType(KnownTypeCode.Int64), expressionBuilder, checkForOverflow)
 						.ConvertTo(targetType, expressionBuilder, checkForOverflow);
-			} else if (checkForOverflow && type.IsKnownType(KnownTypeCode.UIntPtr) && !targetType.IsKnownType(KnownTypeCode.UInt64)) {
-				// Convert through `ulong` instead.
+				}
+			} else if (type.IsKnownType(KnownTypeCode.UIntPtr)) { // Conversion from UIntPtr
+				// Direct cast only works correctly for UIntPtr -> ulong.
+				// UIntPtr -> uint works correctly only in checked context.
+				// Everything else can be worked around by casting via ulong.
+				if (!(targetType.IsKnownType(KnownTypeCode.UInt64) || checkForOverflow && targetType.IsKnownType(KnownTypeCode.UInt32))) {
 					return this.ConvertTo(compilation.FindType(KnownTypeCode.UInt64), expressionBuilder, checkForOverflow)
 						.ConvertTo(targetType, expressionBuilder, checkForOverflow);
 				}
-			
-			if (targetType.IsKnownType(KnownTypeCode.Boolean)) {
-				// convert to boolean through byte, to simulate the truncation to 8 bits
-				return this.ConvertTo(compilation.FindType(KnownTypeCode.Byte), expressionBuilder, checkForOverflow)
-					.ConvertToBoolean(expressionBuilder);
 			}
-			if ((targetType.IsKnownType(KnownTypeCode.IntPtr) || targetType.IsKnownType(KnownTypeCode.UIntPtr))
-			    && type.Kind != TypeKind.Pointer && !checkForOverflow)
-			{
-				// (u)long -> (U)IntPtr casts in C# can throw overflow exceptions in 32-bit mode, even in unchecked context.
-				// To avoid those, convert via `void*`.
+			if (targetType.IsKnownType(KnownTypeCode.IntPtr)) { // Conversion to IntPtr
+				if (type.IsKnownType(KnownTypeCode.Int32)) {
+					// normal casts work for int (both in checked and unchecked context)
+				} else if (checkForOverflow) {
+					// if overflow-checking is enabled, we can simply cast via long:
+					// (and long itself works directly in checked context)
+					if (!type.IsKnownType(KnownTypeCode.Int64)) {
+						return this.ConvertTo(compilation.FindType(KnownTypeCode.Int64), expressionBuilder, checkForOverflow)
+							.ConvertTo(targetType, expressionBuilder, checkForOverflow);
+					}
+				} else {
+					// If overflow-checking is disabled, the only way to truncate to native size
+					// without throwing an exception in 32-bit mode is to use a pointer type.
+					if (type.Kind != TypeKind.Pointer) {
+						return this.ConvertTo(new PointerType(compilation.FindType(KnownTypeCode.Void)), expressionBuilder, checkForOverflow)
+							.ConvertTo(targetType, expressionBuilder, checkForOverflow);
+					}
+				}
+			} else if (targetType.IsKnownType(KnownTypeCode.UIntPtr)) { // Conversion to UIntPtr
+				if (type.IsKnownType(KnownTypeCode.UInt32) || type.Kind == TypeKind.Pointer) {
+					// normal casts work for uint and pointers (both in checked and unchecked context)
+				} else if (checkForOverflow) {
+					// if overflow-checking is enabled, we can simply cast via ulong:
+					// (and ulong itself works directly in checked context)
+					if (!type.IsKnownType(KnownTypeCode.UInt64)) {
+						return this.ConvertTo(compilation.FindType(KnownTypeCode.UInt64), expressionBuilder, checkForOverflow)
+							.ConvertTo(targetType, expressionBuilder, checkForOverflow);
+					}
+				} else {
+					// If overflow-checking is disabled, the only way to truncate to native size
+					// without throwing an exception in 32-bit mode is to use a pointer type.
 					return this.ConvertTo(new PointerType(compilation.FindType(KnownTypeCode.Void)), expressionBuilder, checkForOverflow)
 						.ConvertTo(targetType, expressionBuilder, checkForOverflow);
 				}
+			}
+
 			if (targetType.Kind == TypeKind.Pointer && type.Kind == TypeKind.Enum) {
 				// enum to pointer: C# doesn't allow such casts
 				// -> convert via underlying type
--- a/ICSharpCode.Decompiler/Tests/RoundtripAssembly.cs
+++ b/ICSharpCode.Decompiler/Tests/RoundtripAssembly.cs
@ -98,7 +98,7 @@ namespace ICSharpCode.Decompiler.Tests
				@@ -98,7 +98,7 @@ namespace ICSharpCode.Decompiler.Tests
 		{
 			try {
 				RunWithOutput("Random Tests\\TestCases", "TestCase-1.exe");
-			} catch (AssertionException ex) {
+			} catch (CompilationFailedException ex) {
 				Assert.Ignore(ex.Message);
 			}
 		}
--- a/ILSpy.sln
+++ b/ILSpy.sln
@ -8,6 +8,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "doc", "doc", "{F45DB999-7E7
				@@ -8,6 +8,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "doc", "doc", "{F45DB999-7E7
 	ProjectSection(SolutionItems) = preProject
 		doc\Command Line.txt = doc\Command Line.txt
 		doc\ILAst.txt = doc\ILAst.txt
+		doc\IntPtr.txt = doc\IntPtr.txt
 	EndProjectSection
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ILSpy", "ILSpy\ILSpy.csproj", "{1E85EFF9-E370-4683-83E4-8A3D063FF791}"
--- a/doc/IntPtr.txt
+++ b/doc/IntPtr.txt
@ -0,0 +1,94 @@
				@@ -0,0 +1,94 @@
+C# casts to/from IntPtr and UIntPtr don't behave like one would expect from the normal C# primitive types.
+For example, they don't fully respect the checked/unchecked context.
+
+First, let's consider what methods we have available for converting between (U)IntPtr and normal C# types.
+
+Primitives for constructing IntPtr/UIntPtr:
+ * new IntPtr(int)    equivalent to: conv i4->i <sign extend>
+ * new IntPtr(long)   equivalent to: conv.ovf i8->i
+ * new IntPtr(void*)  equivalent to: nop
+ * new UIntPtr(uint)  equivalent to: conv u4->u <zero extend>
+ * new UIntPtr(ulong) equivalent to: conv.ovf u8->u
+ * new UIntPtr(void*) equivalent to: nop
+
+Primitives for getting the value back out:
+ * IntPtr.ToInt32()    equivalent to: conv.ovf i->i4
+ * IntPtr.ToInt64()    equivalent to: conv i->i8 <sign extend>
+ * IntPtr.ToPointer()  equivalent to: nop
+ * UIntPtr.ToUInt32()  equivalent to: conv.ovf u->u4
+ * UIntPtr.ToUInt64()  equivalent to: conv u->u8 <zero extend>
+ * UIntPtr.ToPointer() equivalent to: nop
+
+The (U)IntPtr.op_Explicit implementations are equivalent to the corresponding primitives.
+(void*) is a useful type because all (U)IntPtr<->void* conversions are no-ops.
+C# pointer types act like a normal C# unsigned integer type (of 'native int' size), so we can
+use `void*` whenever the target type is unsigned or the sign does not matter (overflow checking disabled).
+
+Next, we'll consider what the C# compiler does when casting between integer types and IntPtr.
+I tried all these conversions in both checked and unchecked mode, and the C# compiler was
+always generating the same code in both modes!
+
+OK = cast behavior is as if IntPtr was a built-in type and the context does not matter:
+	* sign/zero extension depending on source type
+	* never throws OverflowException and never is supposed to
+CC = cast behavior is as if IntPtr was a built-in type and we are in a checked context:
+	* sign/zero extension depending on source type
+	* performs correct overflowing checking as in a direct cast to native (u)int
+from -> to = C# cast
+generated opcode sequence = what csc.exe produces for that cast
+
+     from ->    to  : generated opcode sequence            overall effect equivalent to
+OK short  ->  IntPtr: call op_Explicit(int32)              conv i2->i <sign extend>
+OK ushort ->  IntPtr: call op_Explicit(int32)              conv u2->u <zero extend>
+                      Sign extension in op_Explicit does not matter because sign bit is always 0 at that point.
+OK int    ->  IntPtr: call op_Explicit(int32)              conv i4->i <sign extend>
+CC uint   ->  IntPtr: conv.u8 + call op_Explicit(int64)    conv.ovf u4->i <zero extend>
+CC long   ->  IntPtr: call op_Explicit(int64)              conv.ovf i8->i
+   ulong  ->  IntPtr: call op_Explicit(int64)              conv.ovf i8->i
+   short  -> UIntPtr: conv.i8 + call op_Explicit(uint64)   32-bit: conv.ovf i2->u <sign extend>;  64-bit: conv i2->i <sign extend>
+                                                              OverflowException for negative input values only on 32-bit!
+OK ushort -> UIntPtr: call op_Explicit(uint32)             conv u2->u <zero extend>
+   int    -> UIntPtr: conv.i8 + call op_Explicit(uint64)   32-bit: conv.ovf i4->u <sign extend>;  64-bit: conv i4->i <sign extend>
+                                                              OverflowException for negative input values only on 32-bit!
+OK uint   -> UIntPtr: call op_Explicit(uint32)             conv u4->u <zero extend>
+   long   -> UIntPtr: call op_Explicit(uint64)             conv.ovf u8->u
+CC ulong  -> UIntPtr: call op_Explicit(uint64)             conv.ovf u8->u
+
+If an unchecked conversion is desired and the desired entry is not marked 'OK',
+we work around the problem by casting sourceType->void*->(U)IntPtr.
+
+If a checked conversion is desired and the desired entry is not marked 'OK' or 'CC', we have to find a replacement.
+  signed type -> UIntPtr: (UIntPtr)(void*)value
+  ulong -> IntPtr:        (IntPtr)(long)value
+
+Continuing the conversion table for the other direction, (UIntPtr) to primitive types:
+     from ->    to  : generated opcode sequence            overall effect equivalent to
+   IntPtr -> short:  call int32 op_Explicit + conv.i2      conv.ovf i->i4; conv i4->i2
+   IntPtr -> ushort: call int32 op_Explicit + conv.u2      conv.ovf i->i4; conv i4->u2
+CC IntPtr -> int:    call int32 op_Explicit                conv.ovf i->i4
+   IntPtr -> uint:   call int32 op_Explicit                conv.ovf i->i4
+OK IntPtr -> long:   call int64 op_Explicit                conv i->i8 <sign extend>
+   IntPtr -> ulong:  call int64 op_Explicit                conv i->i8 <sign extend>
+   UIntPtr -> short:  call uint32 op_Explicit + conv.i2    conv.ovf u->u4; conv u4->i2
+   UIntPtr -> ushort: call uint32 op_Explicit + conv.u2    conv.ovf u->u4; conv u4->u2
+   UIntPtr -> int:    call uint32 op_Explicit              conv.ovf u->u4
+CC UIntPtr -> uint:   call uint32 op_Explicit              conv.ovf u->u4
+   UIntPtr -> long:   call uint64 op_Explicit              conv u->u8 <zero extend>
+OK UIntPtr -> ulong:  call uint64 op_Explicit              conv u->u8 <zero extend>
+
+If an unchecked conversion is desired and the desired entry is not marked 'OK',
+we work around the problem by casting (U)IntPtr->(u)long->targetType.
+(`void*` would also work instead of `ulong`/`long`, but let's avoid unsafe code where possible)
+
+If a checked conversion is desired and the desired entry is not marked 'OK' or 'CC',
+we also have to work around the problem, and this again works by casting via (u)long: (U)IntPtr->(u)long->targetType
+(note that `void*` is not always a valid alternative in this case)
+
+Finally, conversions between IntPtr and (U)IntPtr, or IntPtr and `void*` need special consideration:
+  * C# does not allow directly casting IntPtr <-> UIntPtr
+  * Casting via `void*` works but is always unchecked.
+  * These should work for checked conversions:
+     IntPtr -> UIntPtr: cast IntPtr->long->ulong->UIntPtr
+     IntPtr -> void*:   cast IntPtr->long->void*
+     UIntPtr -> IntPtr: cast UIntPtr->ulong->long->IntPtr
+     void*  -> IntPtr:  cast void*->long->IntPtr