diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index b69045b4d61f28b47ad70377002a3d938b9bcbeb..d93be6a065f802e7a2d3cbf3d505d54807de635a 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2216,13 +2216,6 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L, continue; if (Succ == MBB) continue; - BlockChain &SuccChain = *BlockToChain[Succ]; - // Don't split chains, either this chain or the successor's chain. - if (&Chain == &SuccChain) { - LLVM_DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " - << getBlockName(Succ) << " (chain conflict)\n"); - continue; - } auto SuccProb = MBPI->getEdgeProbability(MBB, Succ); if (LoopBlockSet.count(Succ)) { @@ -2322,12 +2315,29 @@ MachineBlockPlacement::hasViableTopFallthrough( return false; } -/// Attempt to rotate an exiting block to the bottom of the loop. -/// -/// Once we have built a chain, try to rotate it to line up the hot exit block -/// with fallthrough out of the loop if doing so doesn't introduce unnecessary -/// branches. For example, if the loop has fallthrough into its header and out -/// of its bottom already, don't rotate it. +// Compute the fallthrough gains via rotating loop, and rotate only when gains > 0 +// +// In following diagram, B0,B1...,Bn is a previously built loop chain, +// Bk is the new bottom found by findBestLoopExit, edges markd as "-" are reduced fallthrough, +// edges marked as "+" are increased fallthrough, this function computes +// +// SUM(increased fallthrough) - SUM(decreased fallthrough) +// +// | +// | - +// V +// ---> B0 +// | B1 +// | . + +// | Bk ---> +// +| |- +// | V +// | Bk+1 +// | . +// | Bn-1 +// --- Bn <--- +// |- +// void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, const MachineBasicBlock *ExitingBB, BlockFrequency ExitFreq, @@ -2346,57 +2356,53 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (Top->isEntryBlock()) return; - bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet); - - // If the header has viable fallthrough, check whether the current loop - // bottom is a viable exiting block. If so, bail out as rotating will - // introduce an unnecessary branch. - if (ViableTopFallthrough) { - for (MachineBasicBlock *Succ : Bottom->successors()) { - BlockChain *SuccChain = BlockToChain[Succ]; - if (!LoopBlockSet.count(Succ) && - (!SuccChain || Succ == *SuccChain->begin())) - return; - } + // ignore when bottom's successors is bigger than 2 (similar to find BestLoopTop) + if (Bottom->succ_size() > 2) + return; + + BlockFrequency FallThrough2Exit = BlockFrequency(0); - // Rotate will destroy the top fallthrough, we need to ensure the new exit - // frequency is larger than top fallthrough. - BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); - if (FallThrough2Top >= ExitFreq) - return; + if (Bottom->succ_size() == 2) { + MachineBasicBlock *Succ = *Bottom->succ_begin(); + if (Succ == Top) + Succ = *Bottom->succ_rbegin(); + BlockChain *SuccChain = BlockToChain[Succ]; + // fallthrough2exit exits only when succ is not in current loop and succ is in a chain's head + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) + FallThrough2Exit = + MBFI->getBlockFreq(Bottom) * MBPI->getEdgeProbability(Bottom, Succ); } BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB); if (ExitIt == LoopChain.end()) return; - - // Rotating a loop exit to the bottom when there is a fallthrough to top - // trades the entry fallthrough for an exit fallthrough. - // If there is no bottom->top edge, but the chosen exit block does have - // a fallthrough, we break that fallthrough for nothing in return. - - // Let's consider an example. We have a built chain of basic blocks - // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. - // By doing a rotation we get - // Bk+1, ..., Bn, B1, ..., Bk - // Break of fallthrough to B1 is compensated by a fallthrough from Bk. - // If we had a fallthrough Bk -> Bk+1 it is broken now. - // It might be compensated by fallthrough Bn -> B1. - // So we have a condition to avoid creation of extra branch by loop rotation. - // All below must be true to avoid loop rotation: - // If there is a fallthrough to top (B1) - // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) - // There is no fallthrough from bottom (Bn) to top (B1). - // Please note that there is no exit fallthrough from Bn because we checked it - // above. - if (ViableTopFallthrough) { - assert(std::next(ExitIt) != LoopChain.end() && - "Exit should not be last BB"); - MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); - if (ExitingBB->isSuccessor(NextBlockInChain)) - if (!Bottom->isSuccessor(Top)) - return; - } + + assert(std::next(ExitIt) != LoopChain.end() && "Exit should not be last BB"); + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + + BlockFrequency FallThroughFromPred = BlockFrequency(0); + BlockFrequency BackEdgeFreq = BlockFrequency(0); + + // fallthrough from bk to bk+1 + if (ExitingBB->isSuccessor(NextBlockInChain)) + FallThroughFromPred = MBFI->getBlockFreq(ExitingBB) * + MBPI->getEdgeProbability(ExitingBB, NextBlockInChain); + + // fallthrough from bottom to top + if (Bottom->isSuccessor(Top)) + BackEdgeFreq = + MBFI->getBlockFreq(Bottom) * MBPI->getEdgeProbability(Bottom, Top); + + BlockFrequency NewFreq = ExitFreq; + BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet); + + BlockFrequency Gains = BackEdgeFreq + NewFreq; + BlockFrequency Lost = + FallThrough2Top + FallThrough2Exit + FallThroughFromPred; + + if (Lost >= Gains) + return; LLVM_DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) << " at bottom\n"); diff --git a/llvm/test/CodeGen/X86/bb_rotate.ll b/llvm/test/CodeGen/X86/bb_rotate.ll deleted file mode 100644 index 55a7b0138026328402fe0866e185b0b7d600c4a8..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/X86/bb_rotate.ll +++ /dev/null @@ -1,53 +0,0 @@ -; RUN: llc -mtriple=i686-linux < %s | FileCheck %s - -define i1 @no_viable_top_fallthrough() { -; CHECK-LABEL: no_viable_top_fallthrough -; CHECK: %.entry -; CHECK: %.bb1 -; CHECK: %.bb2 -; CHECK: %.middle -; CHECK: %.backedge -; CHECK: %.bb3 -; CHECK: %.header -; CHECK: %.exit -; CHECK: %.stop -.entry: - %val1 = call i1 @foo() - br i1 %val1, label %.bb1, label %.header, !prof !10 - -.bb1: - %val2 = call i1 @foo() - br i1 %val2, label %.stop, label %.exit, !prof !10 - -.header: - %val3 = call i1 @foo() - br i1 %val3, label %.bb2, label %.exit - -.bb2: - %val4 = call i1 @foo() - br i1 %val4, label %.middle, label %.bb3, !prof !10 - -.middle: - %val5 = call i1 @foo() - br i1 %val5, label %.header, label %.backedge - -.backedge: - %val6 = call i1 @foo() - br label %.header - -.bb3: - %val7 = call i1 @foo() - br label %.middle - -.exit: - %val8 = call i1 @foo() - br label %.stop - -.stop: - %result = call i1 @foo() - ret i1 %result -} - -declare i1 @foo() - -!10 = !{!"branch_weights", i32 90, i32 10} diff --git a/llvm/test/CodeGen/X86/loop-rotate.ll b/llvm/test/CodeGen/X86/loop-rotate.ll deleted file mode 100644 index 3f0a390e7c1bb439e4ef81f77dbc95bbe11eac40..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/X86/loop-rotate.ll +++ /dev/null @@ -1,120 +0,0 @@ -; RUN: llc -mtriple=i686-linux < %s | FileCheck %s - -; Don't rotate the loop if the number of fall through to exit is not larger -; than the number of fall through to header. -define void @no_rotate() { -; CHECK-LABEL: no_rotate -; CHECK: %entry -; CHECK: %header -; CHECK: %middle -; CHECK: %latch1 -; CHECK: %latch2 -; CHECK: %end -entry: - br label %header - -header: - %val1 = call i1 @foo() - br i1 %val1, label %middle, label %end - -middle: - %val2 = call i1 @foo() - br i1 %val2, label %latch1, label %end - -latch1: - %val3 = call i1 @foo() - br i1 %val3, label %latch2, label %header - -latch2: - %val4 = call i1 @foo() - br label %header - -end: - ret void -} - -define void @do_rotate() { -; CHECK-LABEL: do_rotate -; CHECK: %entry -; CHECK: %then -; CHECK: %else -; CHECK: %latch1 -; CHECK: %latch2 -; CHECK: %header -; CHECK: %end -entry: - %val0 = call i1 @foo() - br i1 %val0, label %then, label %else - -then: - call void @a() - br label %header - -else: - call void @b() - br label %header - -header: - %val1 = call i1 @foo() - br i1 %val1, label %latch1, label %end - -latch1: - %val3 = call i1 @foo() - br i1 %val3, label %latch2, label %header - -latch2: - %val4 = call i1 @foo() - br label %header - -end: - ret void -} - -; The loop structure is same as in @no_rotate, but the loop header's predecessor -; doesn't fall through to it, so it should be rotated to get exit fall through. -define void @do_rotate2() { -; CHECK-LABEL: do_rotate2 -; CHECK: %entry -; CHECK: %then -; CHECK: %middle -; CHECK: %latch1 -; CHECK: %latch2 -; CHECK: %header -; CHECK: %exit -entry: - %val0 = call i1 @foo() - br i1 %val0, label %then, label %header, !prof !1 - -then: - call void @a() - br label %end - -header: - %val1 = call i1 @foo() - br i1 %val1, label %middle, label %exit - -middle: - %val2 = call i1 @foo() - br i1 %val2, label %latch1, label %exit - -latch1: - %val3 = call i1 @foo() - br i1 %val3, label %latch2, label %header - -latch2: - %val4 = call i1 @foo() - br label %header - -exit: - call void @b() - br label %end - -end: - ret void -} - -declare i1 @foo() -declare void @a() -declare void @b() - -!1 = !{!"branch_weights", i32 10, i32 1} diff --git a/llvm/test/CodeGen/X86/lsr-negative-stride.ll b/llvm/test/CodeGen/X86/lsr-negative-stride.ll index 26c6128ab48db5fa0dcfbaa913964ac88f02ded7..924fd25ac62d9891a9a124df1657c91e0f65bba5 100644 --- a/llvm/test/CodeGen/X86/lsr-negative-stride.ll +++ b/llvm/test/CodeGen/X86/lsr-negative-stride.ll @@ -24,6 +24,12 @@ define i32 @t(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retl ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %cond_false +; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: subl %edx, %ecx +; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: je .LBB0_6 ; CHECK-NEXT: .LBB0_2: # %bb.outer ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_3 Depth 2 @@ -39,14 +45,6 @@ define i32 @t(i32 %a, i32 %b) nounwind { ; CHECK-NEXT: cmpl %eax, %ecx ; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: jmp .LBB0_6 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_5: # %cond_false -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: subl %edx, %ecx -; CHECK-NEXT: cmpl %edx, %ecx -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: .LBB0_6: # %bb17 ; CHECK-NEXT: retl entry: