// Copyright (c) 2014 Daniel Grunwald // // Permission is hereby granted, free of charge, to any person obtaining a copy of this // software and associated documentation files (the "Software"), to deal in the Software // without restriction, including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons // to whom the Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or // substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using ICSharpCode.Decompiler.FlowAnalysis; using ICSharpCode.Decompiler.IL.Transforms; using ICSharpCode.Decompiler.Util; namespace ICSharpCode.Decompiler.IL.ControlFlow { /// /// Detect loops in IL AST. /// /// /// Transform ordering: /// * LoopDetection should run before other control flow structures are detected. /// * Blocks should be basic blocks (not extended basic blocks) so that the natural loops /// don't include more instructions than strictly necessary. /// * Loop detection should run after the 'return block' is duplicated (ControlFlowSimplification). /// public class LoopDetection : IBlockTransform { BlockTransformContext context; /// Block container corresponding to the current cfg. BlockContainer currentBlockContainer; /// /// Enabled during DetectSwitchBody, used by ExtendLoop and children /// private bool isSwitch; /// /// Used when isSwitch == true, to determine appropriate exit points within loops /// private SwitchDetection.LoopContext loopContext; /// /// Check whether 'block' is a loop head; and construct a loop instruction /// (nested BlockContainer) if it is. /// public void Run(Block block, BlockTransformContext context) { this.context = context; // LoopDetection runs early enough so that block should still // be in the original container at this point. Debug.Assert(block.Parent == context.ControlFlowGraph.Container); this.currentBlockContainer = context.ControlFlowGraph.Container; // Because this is a post-order block transform, we can assume that // any nested loops within this loop have already been constructed. if (block.Instructions.Last() is SwitchInstruction switchInst) { // Switch instructions support "break;" just like loops DetectSwitchBody(block, switchInst); } ControlFlowNode h = context.ControlFlowNode; // CFG node for our potential loop head Debug.Assert(h.UserData == block); Debug.Assert(!TreeTraversal.PreOrder(h, n => n.DominatorTreeChildren).Any(n => n.Visited)); List loop = null; foreach (var t in h.Predecessors) { if (h.Dominates(t)) { // h->t is a back edge, and h is a loop header // Add the natural loop of t->h to the loop. // Definitions: // * A back edge is an edge t->h so that h dominates t. // * The natural loop of the back edge is the smallest set of nodes // that includes the back edge and has no predecessors outside the set // except for the predecessor of the header. if (loop == null) { loop = new List(); loop.Add(h); // Mark loop header as visited so that the pre-order traversal // stops at the loop header. h.Visited = true; } t.TraversePreOrder(n => n.Predecessors, loop.Add); } } if (loop != null) { var headBlock = (Block)h.UserData; context.Step($"Construct loop with head {headBlock.Label}", headBlock); // loop now is the union of all natural loops with loop head h. // Ensure any block included into nested loops is also considered part of this loop: IncludeNestedContainers(loop); // Try to extend the loop to reduce the number of exit points: ExtendLoop(h, loop, out var exitPoint); IncludeUnreachablePredecessors(loop); // Sort blocks in the loop in reverse post-order to make the output look a bit nicer. // (if the loop doesn't contain nested loops, this is a topological sort) loop.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber)); Debug.Assert(loop[0] == h); foreach (var node in loop) { node.Visited = false; // reset visited flag so that we can find outer loops Debug.Assert(h.Dominates(node) || !node.IsReachable, "The loop body must be dominated by the loop head"); } ConstructLoop(loop, exitPoint); } } /// /// For each block in the input loop that is the head of a nested loop or switch, /// include all blocks from the nested container into the loop. /// /// This ensures that all blocks that were included into inner loops are also /// included into the outer loop, thus keeping our loops well-nested. /// /// /// More details for why this is necessary are here: /// https://github.com/icsharpcode/ILSpy/issues/915 /// /// Pre+Post-Condition: node.Visited iff loop.Contains(node) /// void IncludeNestedContainers(List loop) { for (int i = 0; i < loop.Count; i++) { IncludeBlock((Block)loop[i].UserData); } void IncludeBlock(Block block) { foreach (var nestedContainer in block.Instructions.OfType()) { // Just in case the block has multiple nested containers (e.g. due to loop and switch), // also check the entry point: IncludeBlock(nestedContainer.EntryPoint); // Use normal processing for all non-entry-point blocks // (the entry-point itself doesn't have a CFG node, because it's newly created by this transform) for (int i = 1; i < nestedContainer.Blocks.Count; i++) { var node = context.ControlFlowGraph.GetNode(nestedContainer.Blocks[i]); Debug.Assert(loop[0].Dominates(node) || !node.IsReachable); if (!node.Visited) { node.Visited = true; loop.Add(node); // note: this block will be re-visited when the "i < loop.Count" // gets around to the new entry } } } } } #region ExtendLoop /// /// Given a natural loop, add additional CFG nodes to the loop in order /// to reduce the number of exit points out of the loop. /// We do this because C# only allows reaching a single exit point (with 'break' /// statements or when the loop condition evaluates to false), so we'd have /// to introduce 'goto' statements for any additional exit points. /// /// /// Definition: /// A "reachable exit" is a branch/leave target that is reachable from the loop, /// but not dominated by the loop head. A reachable exit may or may not have a /// corresponding CFG node (depending on whether it is a block in the current block container). /// -> reachable exits are leaving the code region dominated by the loop /// /// Definition: /// A loop "exit point" is a CFG node that is not itself part of the loop, /// but has at least one predecessor which is part of the loop. /// -> exit points are leaving the loop itself /// /// Nodes can only be added to the loop if they are dominated by the loop head. /// When adding a node to the loop, we must also add all of that node's predecessors /// to the loop. (this ensures that the loop keeps its single entry point) /// /// Goal: If possible, find a set of nodes that can be added to the loop so that there /// remains only a single exit point. /// Add as little code as possible to the loop to reach this goal. /// /// This means we need to partition the set of nodes dominated by the loop entry point /// into two sets (in-loop and out-of-loop). /// Constraints: /// * the loop head itself is in-loop /// * there must not be any edge from an out-of-loop node to an in-loop node /// -> all predecessors of in-loop nodes are also in-loop /// -> all nodes in a cycle are part of the same partition /// Optimize: /// * use only a single exit point if at all possible /// * minimize the amount of code in the in-loop partition /// (thus: maximize the amount of code in the out-of-loop partition) /// "amount of code" could be measured as: /// * number of basic blocks /// * number of instructions directly in those basic blocks (~= number of statements) /// * number of instructions in those basic blocks (~= number of expressions) /// (we currently use the number of statements) /// /// Observations: /// * If a node is in-loop, so are all its ancestors in the dominator tree (up to the loop entry point) /// * If there are no exits reachable from a node (i.e. all paths from that node lead to a return/throw instruction), /// it is valid to put the group of nodes dominated by that node into either partition independently of /// any other nodes except for the ancestors in the dominator tree. /// (exception: the loop head itself must always be in-loop) /// /// There are two different cases we need to consider: /// 1) There are no exits reachable at all from the loop head. /// -> it is possible to create a loop with zero exit points by adding all nodes /// dominated by the loop to the loop. /// -> the only way to exit the loop is by "return;" or "throw;" /// 2) There are some exits reachable from the loop head. /// /// In case 1, we can pick a single exit point freely by picking any node that has no reachable exits /// (other than the loop head). /// All nodes dominated by the exit point are out-of-loop, all other nodes are in-loop. /// See PickExitPoint() for the heuristic that picks the exit point in this case. /// /// In case 2, we need to pick our exit point so that all paths from the loop head /// to the reachable exits run through that exit point. /// /// This is a form of postdominance where the reachable exits are considered exit nodes, /// while "return;" or "throw;" instructions are not considered exit nodes. /// /// Using this form of postdominance, we are looking for an exit point that post-dominates all nodes in the natural loop. /// --> a common ancestor in post-dominator tree. /// To minimize the amount of code in-loop, we pick the lowest common ancestor. /// All nodes dominated by the exit point are out-of-loop, all other nodes are in-loop. /// (using normal dominance as in case 1, not post-dominance!) /// /// If it is impossible to use a single exit point for the loop, the lowest common ancestor will be the fake "exit node" /// used by the post-dominance analysis. In this case, we fall back to the old heuristic algorithm. /// /// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop. /// void ExtendLoop(ControlFlowNode loopHead, List loop, out ControlFlowNode exitPoint) { exitPoint = FindExitPoint(loopHead, loop); Debug.Assert(!loop.Contains(exitPoint), "Cannot pick an exit point that is part of the natural loop"); if (exitPoint != null) { // Either we are in case 1 and just picked an exit that maximizes the amount of code // outside the loop, or we are in case 2 and found an exit point via post-dominance. // Note that if exitPoint == NoExitPoint, we end up adding all dominated blocks to the loop. var ep = exitPoint; foreach (var node in TreeTraversal.PreOrder(loopHead, n => DominatorTreeChildren(n, ep))) { if (!node.Visited) { node.Visited = true; loop.Add(node); } } } else { // We are in case 2, but could not find a suitable exit point. // Heuristically try to minimize the number of exit points // (but we'll always end up with more than 1 exit and will require goto statements). ExtendLoopHeuristic(loopHead, loop, loopHead); } } /// /// Special control flow node (not part of any graph) that signifies that we want to construct a loop /// without any exit point. /// static readonly ControlFlowNode NoExitPoint = new ControlFlowNode(); /// /// Finds a suitable single exit point for the specified loop. /// /// /// 1) If a suitable exit point was found: the control flow block that should be reached when breaking from the loop /// 2) If the loop should not have any exit point (extend by all dominated blocks): NoExitPoint /// 3) otherwise (exit point unknown, heuristically extend loop): null /// /// This method must not write to the Visited flags on the CFG. internal ControlFlowNode FindExitPoint(ControlFlowNode loopHead, IReadOnlyList naturalLoop) { bool hasReachableExit = HasReachableExit(loopHead); if (!hasReachableExit) { // Case 1: // There are no nodes n so that loopHead dominates a predecessor of n but not n itself // -> we could build a loop with zero exit points. if (IsPossibleForeachLoop((Block)loopHead.UserData, out var exitBranch)) { if (exitBranch != null) { // let's see if the target of the exit branch is a suitable exit point var cfgNode = loopHead.Successors.FirstOrDefault(n => n.UserData == exitBranch.TargetBlock); if (cfgNode != null && loopHead.Dominates(cfgNode) && !context.ControlFlowGraph.HasReachableExit(cfgNode)) { return cfgNode; } } return NoExitPoint; } ControlFlowNode exitPoint = null; int exitPointILOffset = -1; foreach (var node in loopHead.DominatorTreeChildren) { PickExitPoint(node, ref exitPoint, ref exitPointILOffset); } return exitPoint; } else { // Case 2: // We need to pick our exit point so that all paths from the loop head // to the reachable exits run through that exit point. var cfg = context.ControlFlowGraph.cfg; var revCfg = PrepareReverseCFG(loopHead, out int exitNodeArity); //ControlFlowNode.ExportGraph(cfg).Show("cfg"); //ControlFlowNode.ExportGraph(revCfg).Show("rev"); ControlFlowNode commonAncestor = revCfg[loopHead.UserIndex]; Debug.Assert(commonAncestor.IsReachable); foreach (ControlFlowNode cfgNode in naturalLoop) { ControlFlowNode revNode = revCfg[cfgNode.UserIndex]; if (revNode.IsReachable) { commonAncestor = Dominance.FindCommonDominator(commonAncestor, revNode); } } // All paths from within the loop to a reachable exit run through 'commonAncestor'. // However, this doesn't mean that 'commonAncestor' is valid as an exit point. // We walk up the post-dominator tree until we've got a valid exit point: ControlFlowNode exitPoint; while (commonAncestor.UserIndex >= 0) { exitPoint = cfg[commonAncestor.UserIndex]; Debug.Assert(exitPoint.Visited == naturalLoop.Contains(exitPoint)); // It's possible that 'commonAncestor' is itself part of the natural loop. // If so, it's not a valid exit point. if (!exitPoint.Visited && ValidateExitPoint(loopHead, exitPoint)) { // we found an exit point return exitPoint; } commonAncestor = commonAncestor.ImmediateDominator; } // least common post-dominator is the artificial exit node // This means we're in one of two cases: // * The loop might have multiple exit points. // -> we should return null // * The loop has a single exit point that wasn't considered during post-dominance analysis. // (which means the single exit isn't dominated by the loop head) // -> we should return NoExitPoint so that all code dominated by the loop head is included into the loop if (exitNodeArity > 1) return null; // Exit node is on the very edge of the tree, and isn't important for determining inclusion // Still necessary for switch detection to insert correct leave statements if (exitNodeArity == 1 && isSwitch) return loopContext.GetBreakTargets(loopHead).Distinct().Single(); // If exitNodeArity == 0, we should maybe look test if our exits out of the block container are all compatible? // but I don't think it hurts to have a bit too much code inside the loop in this rare case. return NoExitPoint; } } /// /// Validates an exit point. /// /// An exit point is invalid iff there is a node reachable from the exit point that /// is dominated by the loop head, but not by the exit point. /// (i.e. this method returns false iff the exit point's dominance frontier contains /// a node dominated by the loop head. but we implement this the slow way because /// we don't have dominance frontiers precomputed) /// /// /// We need this because it's possible that there's a return block (thus reverse-unreachable node ignored by post-dominance) /// that is reachable both directly from the loop, and from the exit point. /// bool ValidateExitPoint(ControlFlowNode loopHead, ControlFlowNode exitPoint) { var cfg = context.ControlFlowGraph; return IsValid(exitPoint); bool IsValid(ControlFlowNode node) { if (!cfg.HasReachableExit(node)) { // Optimization: if the dominance frontier is empty, we don't need // to check every node. return true; } foreach (var succ in node.Successors) { if (loopHead != succ && loopHead.Dominates(succ) && !exitPoint.Dominates(succ)) return false; } foreach (var child in node.DominatorTreeChildren) { if (!IsValid(child)) return false; } return true; } } /// /// Extension of ControlFlowGraph.HasReachableExit /// Uses loopContext.GetBreakTargets().Any() when analyzing switches to avoid /// classifying continue blocks as reachable exits. /// bool HasReachableExit(ControlFlowNode node) => isSwitch ? loopContext.GetBreakTargets(node).Any() : context.ControlFlowGraph.HasReachableExit(node); /// /// Returns the children in a loop dominator tree, with an optional exit point /// Avoids returning continue statements when analysing switches (because increment blocks can be dominated) /// IEnumerable DominatorTreeChildren(ControlFlowNode n, ControlFlowNode exitPoint) => n.DominatorTreeChildren.Where(c => c != exitPoint && (!isSwitch || !loopContext.MatchContinue(c))); /// /// Pick exit point by picking any node that has no reachable exits. /// /// In the common case where the code was compiled with a compiler that emits IL code /// in source order (like the C# compiler), we can find the "real" exit point /// by simply picking the block with the highest IL offset. /// So let's do that instead of maximizing amount of code. /// /// Code amount in and its dominated nodes. /// This method must not write to the Visited flags on the CFG. void PickExitPoint(ControlFlowNode node, ref ControlFlowNode exitPoint, ref int exitPointILOffset) { if (isSwitch && loopContext.MatchContinue(node)) return; Block block = (Block)node.UserData; if (block.StartILOffset > exitPointILOffset && !HasReachableExit(node) && ((Block)node.UserData).Parent == currentBlockContainer) { // HasReachableExit(node) == false // -> there are no nodes n so that `node` dominates a predecessor of n but not n itself // -> there is no control flow out of `node` back into the loop, so it's usable as exit point // Additionally, we require that the block wasn't already moved into a nested loop, // since there's no way to jump into the middle of that loop when we need to exit. // NB: this is the only reason why we detect nested loops before outer loops: // If we detected the outer loop first, the outer loop might pick an exit point // that prevents us from finding a nice exit for the inner loops, causing // unnecessary gotos. exitPoint = node; exitPointILOffset = block.StartILOffset; return; // don't visit children, they are likely to have even later IL offsets and we'd end up // moving almost all of the code into the loop. } foreach (var child in node.DominatorTreeChildren) { PickExitPoint(child, ref exitPoint, ref exitPointILOffset); } } /// /// Constructs a new control flow graph. /// Each node cfg[i] has a corresponding node rev[i]. /// Edges are only created for nodes dominated by loopHead, and are in reverse from their direction /// in the primary CFG. /// An artificial exit node is used for edges that leave the set of nodes dominated by loopHead, /// or that leave the block Container. /// /// Entry point of the loop. /// out: The number of different CFG nodes. /// Possible values: /// 0 = no CFG nodes used as exit nodes (although edges leaving the block container might still be exits); /// 1 = a single CFG node (not dominated by loopHead) was used as an exit node; /// 2 = more than one CFG node (not dominated by loopHead) was used as an exit node. /// /// ControlFlowNode[] PrepareReverseCFG(ControlFlowNode loopHead, out int exitNodeArity) { ControlFlowNode[] cfg = context.ControlFlowGraph.cfg; ControlFlowNode[] rev = new ControlFlowNode[cfg.Length + 1]; for (int i = 0; i < cfg.Length; i++) { rev[i] = new ControlFlowNode { UserIndex = i, UserData = cfg[i].UserData }; } ControlFlowNode nodeTreatedAsExitNode = null; bool multipleNodesTreatedAsExitNodes = false; ControlFlowNode exitNode = new ControlFlowNode { UserIndex = -1 }; rev[cfg.Length] = exitNode; for (int i = 0; i < cfg.Length; i++) { if (!loopHead.Dominates(cfg[i]) || isSwitch && cfg[i] != loopHead && loopContext.MatchContinue(cfg[i])) continue; // Add reverse edges for all edges in cfg foreach (var succ in cfg[i].Successors) { // edges to outer loops still count as exits (labelled continue not implemented) if (isSwitch && loopContext.MatchContinue(succ, 1)) continue; if (loopHead.Dominates(succ)) { rev[succ.UserIndex].AddEdgeTo(rev[i]); } else { if (nodeTreatedAsExitNode == null) nodeTreatedAsExitNode = succ; if (nodeTreatedAsExitNode != succ) multipleNodesTreatedAsExitNodes = true; exitNode.AddEdgeTo(rev[i]); } } if (context.ControlFlowGraph.HasDirectExitOutOfContainer(cfg[i])) { exitNode.AddEdgeTo(rev[i]); } } if (multipleNodesTreatedAsExitNodes) exitNodeArity = 2; // more than 1 else if (nodeTreatedAsExitNode != null) exitNodeArity = 1; else exitNodeArity = 0; Dominance.ComputeDominance(exitNode, context.CancellationToken); return rev; } static bool IsPossibleForeachLoop(Block loopHead, out Branch exitBranch) { exitBranch = null; var container = (BlockContainer)loopHead.Parent; if (!(container.SlotInfo == TryInstruction.TryBlockSlot && container.Parent is TryFinally)) return false; if (loopHead.Instructions.Count != 2) return false; if (!loopHead.Instructions[0].MatchIfInstruction(out var condition, out var trueInst)) return false; var falseInst = loopHead.Instructions[1]; while (condition.MatchLogicNot(out var arg)) { condition = arg; ExtensionMethods.Swap(ref trueInst, ref falseInst); } if (!(condition is CallInstruction call && call.Method.Name == "MoveNext")) return false; if (!(call.Arguments.Count == 1 && call.Arguments[0].MatchLdLocRef(out var enumeratorVar))) return false; exitBranch = falseInst as Branch; // Check that loopHead is entry-point of try-block: Block entryPoint = container.EntryPoint; while (entryPoint.IncomingEdgeCount == 1 && entryPoint.Instructions.Count == 1 && entryPoint.Instructions[0].MatchBranch(out var targetBlock)) { // skip blocks that only branch to another block entryPoint = targetBlock; } return entryPoint == loopHead; } #endregion #region ExtendLoop (fall-back heuristic) /// /// This function implements a heuristic algorithm that tries to reduce the number of exit /// points. It is only used as fall-back when it is impossible to use a single exit point. /// /// /// This heuristic loop extension algorithm traverses the loop head's dominator tree in pre-order. /// For each candidate node, we detect whether adding it to the loop reduces the number of exit points. /// If it does, the candidate is added to the loop. /// /// Adding a node to the loop has two effects on the the number of exit points: /// * exit points that were added to the loop are no longer exit points, thus reducing the total number of exit points /// * successors of the newly added nodes might be new, additional exit points /// /// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop. /// void ExtendLoopHeuristic(ControlFlowNode loopHead, List loop, ControlFlowNode candidate) { Debug.Assert(candidate.Visited == loop.Contains(candidate)); if (!candidate.Visited) { // This node not yet part of the loop, but might be added List additionalNodes = new List(); // Find additionalNodes nodes and mark them as visited. candidate.TraversePreOrder(n => n.Predecessors, additionalNodes.Add); // This means Visited now represents the candiate extended loop. // Determine new exit points that are reachable from the additional nodes // (note: some of these might have previously been exit points, too) var newExitPoints = additionalNodes.SelectMany(n => n.Successors).Where(n => !n.Visited).ToHashSet(); // Make visited represent the unextended loop, so that we can measure the exit points // in the old state. foreach (var node in additionalNodes) node.Visited = false; // Measure number of added and removed exit points int removedExitPoints = additionalNodes.Count(IsExitPoint); int addedExitPoints = newExitPoints.Count(n => !IsExitPoint(n)); if (removedExitPoints > addedExitPoints) { // We can reduce the number of exit points by adding the candidate node to the loop. candidate.TraversePreOrder(n => n.Predecessors, loop.Add); } } // Pre-order traversal of dominator tree foreach (var node in candidate.DominatorTreeChildren) { ExtendLoopHeuristic(loopHead, loop, node); } } /// /// Gets whether 'node' is an exit point for the loop marked by the Visited flag. /// bool IsExitPoint(ControlFlowNode node) { if (node.Visited) return false; // nodes in the loop are not exit points foreach (var pred in node.Predecessors) { if (pred.Visited) return true; } return false; } #endregion /// /// While our normal dominance logic ensures the loop has just a single reachable entry point, /// it's possible that there are unreachable code blocks that have jumps into the loop. /// We'll also include those into the loop. /// /// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop. /// private void IncludeUnreachablePredecessors(List loop) { for (int i = 1; i < loop.Count; i++) { Debug.Assert(loop[i].Visited); foreach (var pred in loop[i].Predecessors) { if (!pred.Visited) { if (pred.IsReachable) { Debug.Fail("All jumps into the loop body should go through the entry point"); } else { pred.Visited = true; loop.Add(pred); } } } } } /// /// Move the blocks associated with the loop into a new block container. /// void ConstructLoop(List loop, ControlFlowNode exitPoint) { Block oldEntryPoint = (Block)loop[0].UserData; Block exitTargetBlock = (Block)exitPoint?.UserData; BlockContainer loopContainer = new BlockContainer(ContainerKind.Loop); Block newEntryPoint = new Block(); loopContainer.Blocks.Add(newEntryPoint); // Move contents of oldEntryPoint to newEntryPoint // (we can't move the block itself because it might be the target of branch instructions outside the loop) newEntryPoint.Instructions.ReplaceList(oldEntryPoint.Instructions); newEntryPoint.AddILRange(oldEntryPoint); oldEntryPoint.Instructions.ReplaceList(new[] { loopContainer }); if (exitTargetBlock != null) oldEntryPoint.Instructions.Add(new Branch(exitTargetBlock)); loopContainer.AddILRange(newEntryPoint); MoveBlocksIntoContainer(loop, loopContainer); // Rewrite branches within the loop from oldEntryPoint to newEntryPoint: foreach (var branch in loopContainer.Descendants.OfType()) { if (branch.TargetBlock == oldEntryPoint) { branch.TargetBlock = newEntryPoint; } else if (branch.TargetBlock == exitTargetBlock) { branch.ReplaceWith(new Leave(loopContainer).WithILRange(branch)); } } } private void MoveBlocksIntoContainer(List loop, BlockContainer loopContainer) { // Move other blocks into the loop body: they're all dominated by the loop header, // and thus cannot be the target of branch instructions outside the loop. for (int i = 1; i < loop.Count; i++) { Block block = (Block)loop[i].UserData; // some blocks might already be in use by nested loops that were detected earlier; // don't move those (they'll be implicitly moved when the block containing the // nested loop container is moved). if (block.Parent == currentBlockContainer) { Debug.Assert(block.ChildIndex != 0); int oldChildIndex = block.ChildIndex; loopContainer.Blocks.Add(block); currentBlockContainer.Blocks.SwapRemoveAt(oldChildIndex); } } for (int i = 1; i < loop.Count; i++) { // Verify that we moved all loop blocks into the loop container. // If we wanted to move any blocks already in use by a nested loop, // this means we check that the whole nested loop got moved. Block block = (Block)loop[i].UserData; Debug.Assert(block.IsDescendantOf(loopContainer)); } } private void DetectSwitchBody(Block block, SwitchInstruction switchInst) { Debug.Assert(block.Instructions.Last() == switchInst); ControlFlowNode h = context.ControlFlowNode; // CFG node for our switch head Debug.Assert(h.UserData == block); Debug.Assert(!TreeTraversal.PreOrder(h, n => n.DominatorTreeChildren).Any(n => n.Visited)); isSwitch = true; loopContext = new SwitchDetection.LoopContext(context.ControlFlowGraph, h); var nodesInSwitch = new List(); nodesInSwitch.Add(h); h.Visited = true; ExtendLoop(h, nodesInSwitch, out var exitPoint); if (exitPoint != null && h.Dominates(exitPoint) && exitPoint.Predecessors.Count == 1 && !HasReachableExit(exitPoint)) { // If the exit point is reachable from just one single "break;", // it's better to move the code into the switch. // (unlike loops which should not be nested unless necessary, // nesting switches makes it clearer in which cases a piece of code is reachable) nodesInSwitch.AddRange(TreeTraversal.PreOrder(exitPoint, p => p.DominatorTreeChildren)); foreach (var node in nodesInSwitch) { node.Visited = true; } exitPoint = null; } IncludeUnreachablePredecessors(nodesInSwitch); context.Step("Create BlockContainer for switch", switchInst); // Sort blocks in the loop in reverse post-order to make the output look a bit nicer. // (if the loop doesn't contain nested loops, this is a topological sort) nodesInSwitch.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber)); Debug.Assert(nodesInSwitch[0] == h); foreach (var node in nodesInSwitch) { node.Visited = false; // reset visited flag so that we can find outer loops Debug.Assert(h.Dominates(node) || !node.IsReachable, "The switch body must be dominated by the switch head"); } BlockContainer switchContainer = new BlockContainer(ContainerKind.Switch); Block newEntryPoint = new Block(); newEntryPoint.AddILRange(switchInst); switchContainer.Blocks.Add(newEntryPoint); newEntryPoint.Instructions.Add(switchInst); block.Instructions[block.Instructions.Count - 1] = switchContainer; Block exitTargetBlock = (Block)exitPoint?.UserData; if (exitTargetBlock != null) { block.Instructions.Add(new Branch(exitTargetBlock)); } switchContainer.AddILRange(newEntryPoint); MoveBlocksIntoContainer(nodesInSwitch, switchContainer); // Rewrite branches within the loop from oldEntryPoint to newEntryPoint: foreach (var branch in switchContainer.Descendants.OfType()) { if (branch.TargetBlock == exitTargetBlock) { branch.ReplaceWith(new Leave(switchContainer).WithILRange(branch)); } } isSwitch = false; } } }