// Copyright (c) 2014 Daniel Grunwald // // Permission is hereby granted, free of charge, to any person obtaining a copy of this // software and associated documentation files (the "Software"), to deal in the Software // without restriction, including without limitation the rights to use, copy, modify, merge, // publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons // to whom the Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all copies or // substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, // INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE // FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using ICSharpCode.Decompiler.FlowAnalysis; namespace ICSharpCode.Decompiler.IL.ControlFlow { /// /// Detect loops in IL AST. /// /// /// Transform ordering: /// * LoopDetection should run before other control flow structures are detected. /// * Blocks should be basic blocks (not extended basic blocks) so that the natural loops /// don't include more instructions than strictly necessary. /// * (depending on future loop detection improvements:) Loop detection should run after the 'return block' is duplicated (ControlFlowSimplification). /// public class LoopDetection : IILTransform { #region Construct Control Flow Graph /// /// Constructs a control flow graph for the blocks in the given block container. /// The graph nodes will have the same indices as the blocks in the block container. /// Return statements, exceptions, or branches leaving the block container are not /// modeled by the control flow graph. /// internal static ControlFlowNode[] BuildCFG(BlockContainer bc) { ControlFlowNode[] nodes = new ControlFlowNode[bc.Blocks.Count]; for (int i = 0; i < nodes.Length; i++) { nodes[i] = new ControlFlowNode { UserData = bc.Blocks[i] }; } // Create edges: for (int i = 0; i < bc.Blocks.Count; i++) { var block = bc.Blocks[i]; var sourceNode = nodes[i]; foreach (var branch in block.Descendants.OfType()) { if (branch.TargetBlock.Parent == bc) { sourceNode.AddEdgeTo(nodes[bc.Blocks.IndexOf(branch.TargetBlock)]); } else { // Note: edges into different block containers are ignored: // Either they point to a nested block container in the source block, // in which case we can ignore them for control flow purposes; // or they jump to a parent block container, in which case they act // like a return statement or exceptional exit. } } } return nodes; } #endregion /// /// Run loop detection for all block containers in the function (including nested lambda functions). /// public void Run(ILFunction function, ILTransformContext context) { foreach (var blockContainer in function.Descendants.OfType()) { Run(blockContainer, context); } } /// /// Run loop detection for blocks in the block container. /// public void Run(BlockContainer blockContainer, ILTransformContext context) { var cfg = BuildCFG(blockContainer); var entryPoint = cfg[0]; Dominance.ComputeDominance(entryPoint, context.CancellationToken); FindLoops(entryPoint); } /// /// Recurse into the dominator tree and find back edges/natural loops. /// /// /// A back edge is an edge t->h so that h dominates t. /// The natural loop of the back edge is the smallest set of nodes that includes the back edge /// and has no predecessors outside the set except for the predecessor of the header. /// /// Preconditions: /// * dominance was computed for h /// * all blocks in the dominator subtree starting at h are in the same BlockContainer /// * the visited flag is set to false /// void FindLoops(ControlFlowNode h) { List loop = null; foreach (var t in h.Predecessors) { if (h.Dominates(t)) { // h->t is a back edge, and h is a loop header // Add the natural loop of t->h to the loop. if (loop == null) { loop = new List(); loop.Add(h); // Mark loop header as visited so that the pre-order traversal // stops at the loop header. h.Visited = true; } t.TraversePreOrder(n => n.Predecessors, loop.Add); } } if (loop != null) { // loop now is the union of all natural loops with loop head h. // Try to extend the loop to reduce the number of exit points: ExtendLoop(h, loop, h); // Sort blocks in the loop in reverse post-order to make the output look a bit nicer. // (if the loop doesn't contain nested loops, this is a topological sort) loop.Sort((a, b) => b.PostOrderNumber.CompareTo(a.PostOrderNumber)); Debug.Assert(loop[0] == h); foreach (var node in loop) { node.Visited = false; // reset visited flag so that we can find nested loops Debug.Assert(h.Dominates(node), "The loop body must be dominated by the loop head"); } ConstructLoop(loop); } // Recurse into the dominator tree to find other possible loop heads foreach (var child in h.DominatorTreeChildren) { FindLoops(child); } } /// /// Given a natural loop, add additional CFG nodes to the loop in order /// to reduce the number of exit points out of the loop. /// We do this because C# only allows reaching a single exit point (with 'break' /// statements or when the loop condition evaluates to false), so we'd have /// to introduce 'goto' statements for any additional exit points. /// /// Definition: a loop exit point is a CFG node that is not itself part of the loop, /// but has at least one predecessor which is part of the loop. /// /// Nodes can only be added to the loop if they are dominated by the loop head. /// When adding a node to the loop, we implicitly also add all of that node's predecessors /// to the loop. (this ensures that the loop keeps its single entry point) /// /// Adding a node to the loop has two effects on the the number of exit points: /// * exit points that were added to the loop are no longer exit points, thus reducing the total number of exit points /// * successors of the newly added nodes might be new, additional exit points /// /// The loop extension algorithm proceeds traverses the loop head's dominator tree in pre-order. /// For each candidate node, we detect whether adding it to the loop reduces the number of exit points. /// If it does, the candidate is added to the loop. /// /// /// Requires and maintains the invariant that a node is marked as visited iff it is contained in the loop. /// /// Note: I don't think this works reliably to minimize the number of exit points, /// it's just a heuristic that should reduce the number of exit points in most cases. /// I think what we're really looking for is a minimum vertex cut of the following flow graph: /// * all nodes that are part of the natural loop are combined into a single node (the source node) /// * all control flow nodes that are dominated by the loop head (but not part of the loop) /// are nodes in the graph /// * all nodes that in the loop's dominance frontier are nodes in the graph /// * connections are as usual in the CFG /// * the nodes in the loop's dominance frontier are additionally connected to the sink node. /// /// Also, if the only way to leave the loop is through 'ret' or 'leave' instructions, or 'br' instructions /// that leave the block container, this method has the effect of adding more code than necessary to the loop, /// as those instructions do not have corresponding control flow edges. /// Ideally, 'leave' and 'br' should be also considered exit points; and if there are no other exit points, /// we can afford to introduce an additional exit point so that 'ret' instructions and nested infinite loops /// don't have to be moved into the loop. /// void ExtendLoop(ControlFlowNode loopHead, List loop, ControlFlowNode candidate) { Debug.Assert(candidate.Visited == loop.Contains(candidate)); if (!candidate.Visited) { // This node not yet part of the loop, but might be added List additionalNodes = new List(); // Find additionalNodes nodes and mark them as visited. candidate.TraversePreOrder(n => n.Predecessors, additionalNodes.Add); // This means Visited now represents the candiate extended loop. // Determine new exit points that are reachable from the additional nodes // (note: some of these might have previously been exit points, too) var newExitPoints = additionalNodes.SelectMany(n => n.Successors).Where(n => !n.Visited).ToHashSet(); // Make visited represent the unextended loop, so that we can measure the exit points // in the old state. foreach (var node in additionalNodes) node.Visited = false; // Measure number of added and removed exit points int removedExitPoints = additionalNodes.Count(IsExitPoint); int addedExitPoints = newExitPoints.Count(n => !IsExitPoint(n)); if (removedExitPoints > addedExitPoints) { // We can reduce the number of exit points by adding the candidate node to the loop. candidate.TraversePreOrder(n => n.Predecessors, loop.Add); } } // Pre-order traversal of dominator tree foreach (var node in candidate.DominatorTreeChildren) { ExtendLoop(loopHead, loop, node); } } /// /// Gets whether 'node' is an exit point for the loop marked by the Visited flag. /// bool IsExitPoint(ControlFlowNode node) { if (node.Visited) return false; // nodes in the loop are not exit points foreach (var pred in node.Predecessors) { if (pred.Visited) return true; } return false; } /// /// Move the blocks associated with the loop into a new block container. /// void ConstructLoop(List loop) { Block oldEntryPoint = (Block)loop[0].UserData; BlockContainer oldContainer = (BlockContainer)oldEntryPoint.Parent; BlockContainer loopContainer = new BlockContainer(); Block newEntryPoint = new Block(); loopContainer.Blocks.Add(newEntryPoint); // Move contents of oldEntryPoint to newEntryPoint // (we can't move the block itself because it might be the target of branch instructions outside the loop) newEntryPoint.Instructions.ReplaceList(oldEntryPoint.Instructions); newEntryPoint.FinalInstruction = oldEntryPoint.FinalInstruction; newEntryPoint.ILRange = oldEntryPoint.ILRange; oldEntryPoint.Instructions.ReplaceList(new[] { loopContainer }); oldEntryPoint.FinalInstruction = new Nop(); // Move other blocks into the loop body: they're all dominated by the loop header, // and thus cannot be the target of branch instructions outside the loop. for (int i = 1; i < loop.Count; i++) { Block block = (Block)loop[i].UserData; Debug.Assert(block.Parent == oldContainer); loopContainer.Blocks.Add(block); } // Remove all blocks that were moved into the body from the old container oldContainer.Blocks.RemoveAll(b => b.Parent != oldContainer); // Rewrite branches within the loop from oldEntryPoint to newEntryPoint: foreach (var branch in loopContainer.Descendants.OfType()) { if (branch.TargetBlock == oldEntryPoint) branch.TargetBlock = newEntryPoint; } } } }