From 481e05eabb737ac3fb03ed3b5edd79ed59828f71 Mon Sep 17 00:00:00 2001 From: Chicken-Bones Date: Mon, 13 Aug 2018 22:21:23 +1000 Subject: [PATCH] Better shortcircuit detection to avoid single condition switch statements --- .../TestCases/Pretty/Switch.cs | 10 +- .../TestCases/Pretty/Switch.il | 90 ++++++++--- .../TestCases/Pretty/Switch.opt.il | 51 +++++- .../TestCases/Pretty/Switch.opt.roslyn.il | 51 +++++- .../TestCases/Pretty/Switch.roslyn.il | 86 +++++++++-- .../IL/ControlFlow/SwitchDetection.cs | 145 +++++++++++++++--- 6 files changed, 358 insertions(+), 75 deletions(-) diff --git a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.cs b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.cs index ed55a23ae..3bb096a87 100644 --- a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.cs +++ b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.cs @@ -1023,13 +1023,21 @@ namespace ICSharpCode.Decompiler.Tests.TestCases.Pretty public static bool Loop8(char c, bool b, Func getChar) { if (b) { - while (c == ' ' || c == '\t') { + while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { c = getChar(); } } return true; } + + public static void Loop9(Func getChar) + { + char c; + do { + c = getChar(); + } while (c != -1 && c != '\n' && c != '\u2028' && c != '\u2029'); + } #endregion // Ensure correctness of SwitchDetection.UseCSharpSwitch control flow heuristics diff --git a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.il b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.il index eea9abefe..5f70f6ec0 100644 --- a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.il +++ b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.il @@ -3468,7 +3468,7 @@ bool b, class [mscorlib]System.Func`1 getChar) cil managed { - // Code size 47 (0x2f) + // Code size 64 (0x40) .maxstack 2 .locals init (bool V_0, bool V_1) @@ -3478,7 +3478,7 @@ IL_0003: ceq IL_0005: stloc.1 IL_0006: ldloc.1 - IL_0007: brtrue.s IL_0029 + IL_0007: brtrue.s IL_003a IL_0009: nop IL_000a: br.s IL_0016 @@ -3489,29 +3489,83 @@ IL_0013: starg.s c IL_0015: nop IL_0016: ldarg.0 - IL_0017: ldc.i4.s 32 - IL_0019: beq.s IL_0022 + IL_0017: ldc.i4.s 97 + IL_0019: blt.s IL_0020 IL_001b: ldarg.0 - IL_001c: ldc.i4.s 9 - IL_001e: ceq - IL_0020: br.s IL_0023 + IL_001c: ldc.i4.s 122 + IL_001e: ble.s IL_0033 - IL_0022: ldc.i4.1 - IL_0023: nop - IL_0024: stloc.1 - IL_0025: ldloc.1 - IL_0026: brtrue.s IL_000c + IL_0020: ldarg.0 + IL_0021: ldc.i4.s 65 + IL_0023: blt.s IL_002f - IL_0028: nop - IL_0029: ldc.i4.1 - IL_002a: stloc.0 - IL_002b: br.s IL_002d + IL_0025: ldarg.0 + IL_0026: ldc.i4.s 90 + IL_0028: cgt + IL_002a: ldc.i4.0 + IL_002b: ceq + IL_002d: br.s IL_0030 - IL_002d: ldloc.0 - IL_002e: ret + IL_002f: ldc.i4.0 + IL_0030: nop + IL_0031: br.s IL_0034 + + IL_0033: ldc.i4.1 + IL_0034: nop + IL_0035: stloc.1 + IL_0036: ldloc.1 + IL_0037: brtrue.s IL_000c + + IL_0039: nop + IL_003a: ldc.i4.1 + IL_003b: stloc.0 + IL_003c: br.s IL_003e + + IL_003e: ldloc.0 + IL_003f: ret } // end of method Switch::Loop8 + .method public hidebysig static void Loop9(class [mscorlib]System.Func`1 getChar) cil managed + { + // Code size 47 (0x2f) + .maxstack 2 + .locals init (char V_0, + bool V_1) + IL_0000: nop + IL_0001: nop + IL_0002: ldarg.0 + IL_0003: callvirt instance !0 class [mscorlib]System.Func`1::Invoke() + IL_0008: stloc.0 + IL_0009: nop + IL_000a: ldloc.0 + IL_000b: ldc.i4.m1 + IL_000c: beq.s IL_0028 + + IL_000e: ldloc.0 + IL_000f: ldc.i4.s 10 + IL_0011: beq.s IL_0028 + + IL_0013: ldloc.0 + IL_0014: ldc.i4 0x2028 + IL_0019: beq.s IL_0028 + + IL_001b: ldloc.0 + IL_001c: ldc.i4 0x2029 + IL_0021: ceq + IL_0023: ldc.i4.0 + IL_0024: ceq + IL_0026: br.s IL_0029 + + IL_0028: ldc.i4.0 + IL_0029: nop + IL_002a: stloc.1 + IL_002b: ldloc.1 + IL_002c: brtrue.s IL_0001 + + IL_002e: ret + } // end of method Switch::Loop9 + .method public hidebysig static void SwitchWithBreakCase(int32 i, bool b) cil managed { diff --git a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.il b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.il index b9809e001..c17a273e4 100644 --- a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.il +++ b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.il @@ -2622,10 +2622,10 @@ bool b, class [mscorlib]System.Func`1 getChar) cil managed { - // Code size 25 (0x19) + // Code size 35 (0x23) .maxstack 8 IL_0000: ldarg.1 - IL_0001: brfalse.s IL_0017 + IL_0001: brfalse.s IL_0021 IL_0003: br.s IL_000d @@ -2633,17 +2633,52 @@ IL_0006: callvirt instance !0 class [mscorlib]System.Func`1::Invoke() IL_000b: starg.s c IL_000d: ldarg.0 - IL_000e: ldc.i4.s 32 - IL_0010: beq.s IL_0005 + IL_000e: ldc.i4.s 97 + IL_0010: blt.s IL_0017 IL_0012: ldarg.0 - IL_0013: ldc.i4.s 9 - IL_0015: beq.s IL_0005 + IL_0013: ldc.i4.s 122 + IL_0015: ble.s IL_0005 - IL_0017: ldc.i4.1 - IL_0018: ret + IL_0017: ldarg.0 + IL_0018: ldc.i4.s 65 + IL_001a: blt.s IL_0021 + + IL_001c: ldarg.0 + IL_001d: ldc.i4.s 90 + IL_001f: ble.s IL_0005 + + IL_0021: ldc.i4.1 + IL_0022: ret } // end of method Switch::Loop8 + .method public hidebysig static void Loop9(class [mscorlib]System.Func`1 getChar) cil managed + { + // Code size 33 (0x21) + .maxstack 2 + .locals init (char V_0) + IL_0000: ldarg.0 + IL_0001: callvirt instance !0 class [mscorlib]System.Func`1::Invoke() + IL_0006: stloc.0 + IL_0007: ldloc.0 + IL_0008: ldc.i4.m1 + IL_0009: beq.s IL_0020 + + IL_000b: ldloc.0 + IL_000c: ldc.i4.s 10 + IL_000e: beq.s IL_0020 + + IL_0010: ldloc.0 + IL_0011: ldc.i4 0x2028 + IL_0016: beq.s IL_0020 + + IL_0018: ldloc.0 + IL_0019: ldc.i4 0x2029 + IL_001e: bne.un.s IL_0000 + + IL_0020: ret + } // end of method Switch::Loop9 + .method public hidebysig static void SwitchWithBreakCase(int32 i, bool b) cil managed { diff --git a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.roslyn.il b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.roslyn.il index 526a58edd..a5c1af55f 100644 --- a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.roslyn.il +++ b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.opt.roslyn.il @@ -2790,10 +2790,10 @@ bool b, class [mscorlib]System.Func`1 getChar) cil managed { - // Code size 25 (0x19) + // Code size 35 (0x23) .maxstack 8 IL_0000: ldarg.1 - IL_0001: brfalse.s IL_0017 + IL_0001: brfalse.s IL_0021 IL_0003: br.s IL_000d @@ -2801,17 +2801,52 @@ IL_0006: callvirt instance !0 class [mscorlib]System.Func`1::Invoke() IL_000b: starg.s c IL_000d: ldarg.0 - IL_000e: ldc.i4.s 32 - IL_0010: beq.s IL_0005 + IL_000e: ldc.i4.s 97 + IL_0010: blt.s IL_0017 IL_0012: ldarg.0 - IL_0013: ldc.i4.s 9 - IL_0015: beq.s IL_0005 + IL_0013: ldc.i4.s 122 + IL_0015: ble.s IL_0005 - IL_0017: ldc.i4.1 - IL_0018: ret + IL_0017: ldarg.0 + IL_0018: ldc.i4.s 65 + IL_001a: blt.s IL_0021 + + IL_001c: ldarg.0 + IL_001d: ldc.i4.s 90 + IL_001f: ble.s IL_0005 + + IL_0021: ldc.i4.1 + IL_0022: ret } // end of method Switch::Loop8 + .method public hidebysig static void Loop9(class [mscorlib]System.Func`1 getChar) cil managed + { + // Code size 33 (0x21) + .maxstack 2 + .locals init (char V_0) + IL_0000: ldarg.0 + IL_0001: callvirt instance !0 class [mscorlib]System.Func`1::Invoke() + IL_0006: stloc.0 + IL_0007: ldloc.0 + IL_0008: ldc.i4.m1 + IL_0009: beq.s IL_0020 + + IL_000b: ldloc.0 + IL_000c: ldc.i4.s 10 + IL_000e: beq.s IL_0020 + + IL_0010: ldloc.0 + IL_0011: ldc.i4 0x2028 + IL_0016: beq.s IL_0020 + + IL_0018: ldloc.0 + IL_0019: ldc.i4 0x2029 + IL_001e: bne.un.s IL_0000 + + IL_0020: ret + } // end of method Switch::Loop9 + .method public hidebysig static void SwitchWithBreakCase(int32 i, bool b) cil managed { diff --git a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.roslyn.il b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.roslyn.il index e5f3b8b50..c08307c3d 100644 --- a/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.roslyn.il +++ b/ICSharpCode.Decompiler.Tests/TestCases/Pretty/Switch.roslyn.il @@ -3830,7 +3830,7 @@ bool b, class [mscorlib]System.Func`1 getChar) cil managed { - // Code size 43 (0x2b) + // Code size 59 (0x3b) .maxstack 2 .locals init (bool V_0, bool V_1, @@ -3839,7 +3839,7 @@ IL_0001: ldarg.1 IL_0002: stloc.0 IL_0003: ldloc.0 - IL_0004: brfalse.s IL_0025 + IL_0004: brfalse.s IL_0035 IL_0006: nop IL_0007: br.s IL_0013 @@ -3850,28 +3850,80 @@ IL_0010: starg.s c IL_0012: nop IL_0013: ldarg.0 - IL_0014: ldc.i4.s 32 - IL_0016: beq.s IL_001f + IL_0014: ldc.i4.s 97 + IL_0016: blt.s IL_001d IL_0018: ldarg.0 - IL_0019: ldc.i4.s 9 - IL_001b: ceq - IL_001d: br.s IL_0020 + IL_0019: ldc.i4.s 122 + IL_001b: ble.s IL_002f - IL_001f: ldc.i4.1 - IL_0020: stloc.1 - IL_0021: ldloc.1 - IL_0022: brtrue.s IL_0009 + IL_001d: ldarg.0 + IL_001e: ldc.i4.s 65 + IL_0020: blt.s IL_002c - IL_0024: nop - IL_0025: ldc.i4.1 - IL_0026: stloc.2 - IL_0027: br.s IL_0029 + IL_0022: ldarg.0 + IL_0023: ldc.i4.s 90 + IL_0025: cgt + IL_0027: ldc.i4.0 + IL_0028: ceq + IL_002a: br.s IL_002d - IL_0029: ldloc.2 - IL_002a: ret + IL_002c: ldc.i4.0 + IL_002d: br.s IL_0030 + + IL_002f: ldc.i4.1 + IL_0030: stloc.1 + IL_0031: ldloc.1 + IL_0032: brtrue.s IL_0009 + + IL_0034: nop + IL_0035: ldc.i4.1 + IL_0036: stloc.2 + IL_0037: br.s IL_0039 + + IL_0039: ldloc.2 + IL_003a: ret } // end of method Switch::Loop8 + .method public hidebysig static void Loop9(class [mscorlib]System.Func`1 getChar) cil managed + { + // Code size 46 (0x2e) + .maxstack 2 + .locals init (char V_0, + bool V_1) + IL_0000: nop + IL_0001: nop + IL_0002: ldarg.0 + IL_0003: callvirt instance !0 class [mscorlib]System.Func`1::Invoke() + IL_0008: stloc.0 + IL_0009: nop + IL_000a: ldloc.0 + IL_000b: ldc.i4.m1 + IL_000c: beq.s IL_0028 + + IL_000e: ldloc.0 + IL_000f: ldc.i4.s 10 + IL_0011: beq.s IL_0028 + + IL_0013: ldloc.0 + IL_0014: ldc.i4 0x2028 + IL_0019: beq.s IL_0028 + + IL_001b: ldloc.0 + IL_001c: ldc.i4 0x2029 + IL_0021: ceq + IL_0023: ldc.i4.0 + IL_0024: ceq + IL_0026: br.s IL_0029 + + IL_0028: ldc.i4.0 + IL_0029: stloc.1 + IL_002a: ldloc.1 + IL_002b: brtrue.s IL_0001 + + IL_002d: ret + } // end of method Switch::Loop9 + .method public hidebysig static void SwitchWithBreakCase(int32 i, bool b) cil managed { diff --git a/ICSharpCode.Decompiler/IL/ControlFlow/SwitchDetection.cs b/ICSharpCode.Decompiler/IL/ControlFlow/SwitchDetection.cs index 737387edd..d95fedb5b 100644 --- a/ICSharpCode.Decompiler/IL/ControlFlow/SwitchDetection.cs +++ b/ICSharpCode.Decompiler/IL/ControlFlow/SwitchDetection.cs @@ -73,9 +73,7 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow n.Successors.ForEach(Analyze); } contextNode.Successors.ForEach(Analyze); - - foreach (var node in cfg.cfg) - node.Visited = false; + ResetVisited(cfg.cfg); int l = 1; foreach (var loopHead in loopHeads.OrderBy(n => n.PostOrderNumber)) @@ -286,15 +284,19 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow // good enough indicator that the surrounding code also forms a switch statement if (analysis.ContainsILSwitch || MatchRoslynSwitchOnString()) return true; - + + // heuristic to determine if a block would be better represented as an if statement rather than switch int ifCount = analysis.InnerBlocks.Count + 1; - int labelCount = analysis.Sections.Where(s => !s.Key.SetEquals(defaultSectionKey)).Sum(s => s.Key.Intervals.Length); - // heuristic to determine if a block would be better represented as an if statement rather than a case statement - if (ifCount < labelCount) + int intervalCount = analysis.Sections.Where(s => !s.Key.SetEquals(defaultSectionKey)).Sum(s => s.Key.Intervals.Length); + if (ifCount < intervalCount) return false; + + (var flowNodes, var caseNodes) = AnalyzeControlFlow(); - // don't create switch statements with only one non-default label (provided the if option is short enough) - if (analysis.Sections.Count == 2 && ifCount <= 2) + // don't create switch statements with only one non-default label when the corresponding condition tree is flat + // it may be important that the switch-like conditions be inlined + // for example, a loop condition: while (c == '\n' || c == '\r') + if (analysis.Sections.Count == 2 && IsSingleCondition(flowNodes, caseNodes)) return false; // if there is no ILSwitch, there's still many control flow patterns that @@ -305,9 +307,10 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow // These goto statements may be "goto case x" or "goto default", but these are a hint that the original code was not a switch, // and that the switch statement may be very poor quality. // Thus the rule of thumb is no goto statements if the original code didn't include them - if (SwitchUsesGoto(out var breakBlock)) + if (SwitchUsesGoto(flowNodes, caseNodes, out var breakBlock)) return false; + // valid switch construction, all code can be inlined if (breakBlock == null) return true; @@ -318,10 +321,6 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow /// /// stloc switchValueVar(call ComputeStringHash(switchValue)) - /// - /// Previously, the roslyn case block heads were added to the flowBlocks for case control flow analysis. - /// This forbade goto case statements (as is the purpose of ValidatePotentialSwitchFlow) - /// Identifying the roslyn string switch head is a better indicator for UseCSharpSwitch /// private bool MatchRoslynSwitchOnString() { @@ -330,9 +329,10 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow } /// - /// Determines if the analysed switch can be constructed without any gotos + /// Builds the control flow graph for the current container (if necessary), establishes loopContext + /// and returns the ControlFlowNodes corresponding to the inner flow and case blocks of the potential switch /// - private bool SwitchUsesGoto(out Block breakBlock) + private (List flowNodes, List caseNodes) AnalyzeControlFlow() { if (controlFlowGraph == null) controlFlowGraph = new ControlFlowGraph(currentContainer, context.CancellationToken); @@ -340,6 +340,9 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow var switchHead = controlFlowGraph.GetNode(analysis.RootBlock); loopContext = new LoopContext(controlFlowGraph, switchHead); + var flowNodes = new List { switchHead }; + flowNodes.AddRange(analysis.InnerBlocks.Select(controlFlowGraph.GetNode)); + // grab the control flow nodes for blocks targetted by each section var caseNodes = new List(); foreach (var s in analysis.Sections) { @@ -351,13 +354,22 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow caseNodes.Add(node); } - var flowBlocks = analysis.InnerBlocks.ToHashSet(); - flowBlocks.Add(analysis.RootBlock); - AddNullCase(flowBlocks, caseNodes); - + AddNullCase(flowNodes, caseNodes); + + Debug.Assert(flowNodes.SelectMany(n => n.Successors) + .All(n => flowNodes.Contains(n) || caseNodes.Contains(n) || loopContext.MatchContinue(n))); + + return (flowNodes, caseNodes); + } + + /// + /// Determines if the analysed switch can be constructed without any gotos + /// + private bool SwitchUsesGoto(List flowNodes, List caseNodes, out Block breakBlock) + { // cases with predecessors that aren't part of the switch logic // must either require "goto case" statements, or consist of a single "break;" - var externalCases = caseNodes.Where(c => c.Predecessors.Any(n => !flowBlocks.Contains(n.UserData))).ToList(); + var externalCases = caseNodes.Where(c => c.Predecessors.Any(n => !flowNodes.Contains(n))).ToList(); breakBlock = null; if (externalCases.Count > 1) @@ -381,7 +393,7 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow /// Does some of the analysis of SwitchOnNullableTransform to add the null case control flow /// to the results of SwitchAnaylsis /// - private void AddNullCase(HashSet flowBlocks, List caseNodes) + private void AddNullCase(List flowNodes, List caseNodes) { if (analysis.RootBlock.IncomingEdgeCount != 1) return; @@ -406,7 +418,94 @@ namespace ICSharpCode.Decompiler.IL.ControlFlow return; //add the null case logic to the incoming flow blocks - flowBlocks.Add(nullableBlock); + flowNodes.Add(controlFlowGraph.GetNode(nullableBlock)); + } + /// + /// Pattern matching for short circuit expressions + /// p + /// |\ + /// | n + /// |/ \ + /// s c + /// + /// where + /// p: if (a) goto n; goto s; + /// n: if (b) goto c; goto s; + /// + /// Can simplify to + /// p|n + /// / \ + /// s c + /// + /// where: + /// p|n: if (a && b) goto c; goto s; + /// + /// Note that if n has only 1 successor, but is still a flow node, then a short circuit expression + /// has a target (c) with no corresponding block (leave) + /// + /// A node with 2 successors + /// The successor index to consider n (the other successor will be the common sibling) + private static bool IsShortCircuit(ControlFlowNode parent, int side) + { + var node = parent.Successors[side]; + var sibling = parent.Successors[side ^ 1]; + + if (!IsFlowNode(node) || node.Successors.Count > 2 || node.Predecessors.Count != 1) + return false; + + return node.Successors.Contains(sibling); + } + + /// + /// A flow node contains only two instructions, the first of which is an IfInstruction + /// A short circuit expression is comprised of a root block ending in an IfInstruction and one or more flow nodes + /// + static bool IsFlowNode(ControlFlowNode n) => ((Block)n.UserData).Instructions.FirstOrDefault() is IfInstruction; + + /// + /// Determines whether the flowNodes are can be reduced to a single condition via short circuit operators + /// + private bool IsSingleCondition(List flowNodes, List caseNodes) + { + if (flowNodes.Count == 1) + return true; + + var rootNode = controlFlowGraph.GetNode(analysis.RootBlock); + rootNode.Visited = true; + + // search down the tree, marking nodes as visited while they continue the current condition + var n = rootNode; + while (n.Successors.Count > 0 && (n == rootNode || IsFlowNode(n))) { + if (n.Successors.Count == 1) { + // if there is more than one case node, then a flow node with only one successor is not part of the initial condition + if (caseNodes.Count > 1) + break; + + n = n.Successors[0]; + } + else { // 2 successors + if (IsShortCircuit(n, 0)) + n = n.Successors[0]; + else if (IsShortCircuit(n, 1)) + n = n.Successors[1]; + else + break; + } + + n.Visited = true; + if (loopContext.MatchContinue(n)) + break; + } + + var ret = flowNodes.All(f => f.Visited); + ResetVisited(controlFlowGraph.cfg); + return ret; + } + + private static void ResetVisited(IEnumerable nodes) + { + foreach (var n in nodes) + n.Visited = false; } } }