diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index e9d3a5a5cb79746c1990052901cd1fa1b47e4f5d..7572adea93520dd1b6e31dd70e2cdead1fbd2ed1 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -207,7 +207,6 @@ private: bool legalLoopInstructions(); bool legalLoopMemoryAccesses(); bool isLoopAlreadyVisited(); - void setNoAliasToLoop(Loop *VerLoop); bool instructionSafeForVersioning(Instruction *I); bool legalLoopVersioningOverlap(); }; @@ -383,6 +382,13 @@ bool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { } LoadAndStoreCounter++; Value *Ptr = St->getPointerOperand(); + // Don't allow stores that we don't have runtime checks for, as we won't be + // able to mark them noalias meaning they would prevent any code motion. + auto &Pointers = LAI->getRuntimePointerChecking()->Pointers; + if (!any_of(Pointers, [&](auto &P) { return P.PointerValue == Ptr; })) { + LLVM_DEBUG(dbgs() << " Found a store without a runtime check.\n"); + return false; + } // Check loop invariant. if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) InvariantCounter++; @@ -400,6 +406,13 @@ bool LoopVersioningLICM::legalLoopInstructions() { InvariantCounter = 0; IsReadOnlyLoop = true; using namespace ore; + // Get LoopAccessInfo from current loop via the proxy. + LAI = &LAIs.getInfo(*CurLoop); + // Check LoopAccessInfo for need of runtime check. + if (LAI->getRuntimePointerChecking()->getChecks().empty()) { + LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); + return false; + } // Iterate over loop blocks and instructions of each block and check // instruction safety. for (auto *Block : CurLoop->getBlocks()) @@ -413,13 +426,6 @@ bool LoopVersioningLICM::legalLoopInstructions() { return false; } } - // Get LoopAccessInfo from current loop via the proxy. - LAI = &LAIs.getInfo(*CurLoop); - // Check LoopAccessInfo for need of runtime check. - if (LAI->getRuntimePointerChecking()->getChecks().empty()) { - LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); - return false; - } // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold if (LAI->getNumRuntimePointerChecks() > VectorizerParams::RuntimeMemoryCheckThreshold) { @@ -616,41 +622,6 @@ bool LoopVersioningLICM::isLegalForVersioning() { return true; } -/// Update loop with aggressive aliasing assumptions. -/// It marks no-alias to any pairs of memory operations by assuming -/// loop should not have any must-alias memory accesses pairs. -/// During LoopVersioningLICM legality we ignore loops having must -/// aliasing memory accesses. -void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { - // Get latch terminator instruction. - Instruction *I = VerLoop->getLoopLatch()->getTerminator(); - // Create alias scope domain. - MDBuilder MDB(I->getContext()); - MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); - StringRef Name = "LVAliasScope"; - MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); - SmallVector Scopes{NewScope}, NoAliases{NewScope}; - // Iterate over each instruction of loop. - // set no-alias for all load & store instructions. - for (auto *Block : CurLoop->getBlocks()) { - for (auto &Inst : *Block) { - // Only interested in instruction that may modify or read memory. - if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) - continue; - // Set no-alias for current instruction. - Inst.setMetadata( - LLVMContext::MD_noalias, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), - MDNode::get(Inst.getContext(), NoAliases))); - // set alias-scope for current instruction. - Inst.setMetadata( - LLVMContext::MD_alias_scope, - MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(Inst.getContext(), Scopes))); - } - } -} - bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipLoop(L)) return false; @@ -757,7 +728,7 @@ bool LoopVersioningLICM::run(DominatorTree *DT) { addStringMetadataToLoop(LVer.getVersionedLoop(), "llvm.mem.parallel_loop_access"); // Update version loop with aggressive aliasing assumption. - setNoAliasToLoop(LVer.getVersionedLoop()); + LVer.annotateLoopWithNoAlias(); Changed = true; } return Changed; diff --git a/llvm/test/CodeGen/AArch64/fno-plt.c b/llvm/test/CodeGen/AArch64/fno-plt.c index 81ed912ce927e1b3dd3e2071e53efca78b84dc58..6cc59b683da20aa2af1191f598780c213fe22765 100644 --- a/llvm/test/CodeGen/AArch64/fno-plt.c +++ b/llvm/test/CodeGen/AArch64/fno-plt.c @@ -1,8 +1,8 @@ -// RUN: clang %s -shared -fno-plt -O2 -fno-inline -fPIC -o noplt.so +// RUN: clang %s -shared -fno-plt -O2 -fno-inline -fPIC --target=aarch64-linux-gnu -fuse-ld=lld -nostdlib -o noplt.so // RUN: llvm-objdump -d noplt.so | FileCheck %s --check-prefix=CHECK-NO-PLT -// RUN: clang %s -shared -O2 -fno-inline -fPIC -o plt.so -// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT +// RUN: clang %s -shared -O2 -fno-inline -fPIC --target=aarch64-linux-gnu -fuse-ld=lld -nostdlib -o plt.so +// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT // CHECK-PLT: bar@plt // CHECK-PLT: bar1@plt diff --git a/llvm/test/CodeGen/AArch64/fno-plt.cpp b/llvm/test/CodeGen/AArch64/fno-plt.cpp deleted file mode 100644 index c5a1f2f24b376fc59f4fd4197c7f085a1b7ce867..0000000000000000000000000000000000000000 --- a/llvm/test/CodeGen/AArch64/fno-plt.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// RUN: clang -x c++ %s -shared -fno-plt -O2 -fno-inline -fPIC -o noplt.so -// RUN: llvm-objdump -d noplt.so | FileCheck %s --check-prefix=CHECK-NO-PLT - -// RUN: clang -x c++ %s -shared -O0 -fPIC -o plt.so -// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT - -// RUN: clang -x c++ %s -shared -O2 -fno-inline -fPIC -o plt.so -// RUN: llvm-objdump -d plt.so | FileCheck %s --check-prefix=CHECK-PLT - -// CHECK-PLT: bar@plt -// CHECK-PLT: bar1@plt -// CHECK-NO-PLT-NOT: bar@plt -// CHECK-NO-PLT-NOT: bar1@plt -// CHECK-NO-PLT-NOT: bar2@plt - -__attribute__((optnone)) -void bar(int a) { - return; -} - -__attribute__((optnone)) -extern void bar1(int); - -__attribute__((optnone)) -static void bar2(int a) { - return; -} - -void foo(int a) { - bar(a); - bar1(a); - bar2(a); - return; -} - diff --git a/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll new file mode 100644 index 0000000000000000000000000000000000000000..c0d9f062a99f9a5d0c830898b55d768d00fd0174 --- /dev/null +++ b/llvm/test/Transforms/LoopVersioningLICM/load-from-unknown-address.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -S -passes='function(loop-versioning-licm,loop-mssa(licm))' | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + +; In these tests we have a loop where we can calculate the bounds of some memory +; accesses but not others. + +; Load from a gep whose bounds can't be calculated as the offset is loaded from memory +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval +define void @gep_loaded_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_offset +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; As above but with a store to the loaded address. This should prevent the loop +; from being versioned, as we wouldn't be able to do any code motion. +define void @gep_loaded_offset_with_store(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_offset_with_store +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load i64, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[RVAL]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load i64, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %q, i64 %rval + %val = load i32, ptr %arrayidx, align 4 + store i32 0, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; Load from a gep whose bounds can't be calculated as the pointer is loaded from memory +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of rval +define void @gep_loaded_base(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_loaded_base +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[RVAL:%.*]] = load ptr, ptr [[R]], align 4 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[RVAL]], i64 0 +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %rval = load ptr, ptr %r, align 4 + %arrayidx = getelementptr inbounds i32, ptr %rval, i64 0 + %val = load i32, ptr %arrayidx, align 4 + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %val, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; Load from a gep with an offset that scalar evolution can't describe +; FIXME: Not knowing the bounds of the gep shouldn't stop us from hoisting the load of qval +define void @gep_strange_offset(ptr %p, ptr %q, ptr %r, i32 %n) { +; CHECK-LABEL: define void @gep_strange_offset +; CHECK-SAME: (ptr [[P:%.*]], ptr [[Q:%.*]], ptr [[R:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_ADDR:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[P_ADDR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[P]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_ADDR]], -1 +; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[Q]], align 4 +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[DEC]], 2 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[REM]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[R]], i64 [[IDXPROM]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[VAL]], [[QVAL]] +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_ADDR]], i64 4 +; CHECK-NEXT: store i32 [[ADD]], ptr [[P_ADDR]], align 4 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n.addr = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %p.addr = phi ptr [ %incdec.ptr, %while.body ], [ %p, %entry ] + %dec = add nsw i32 %n.addr, -1 + %qval = load i32, ptr %q, align 4 + %rem = srem i32 %dec, 2 + %idxprom = sext i32 %rem to i64 + %arrayidx = getelementptr inbounds i32, ptr %r, i64 %idxprom + %val = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %val, %qval + %incdec.ptr = getelementptr inbounds i8, ptr %p.addr, i64 4 + store i32 %add, ptr %p.addr, align 4 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where the source address is loaded from a pointer +; FIXME: We should be able to hoist the load of the source address pointer +define void @memcpy_load_src(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_src +; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[DST_VAL:%.*]] = phi ptr [ [[DST_VAL_NEXT:%.*]], [[WHILE_BODY]] ], [ [[DST]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT]] = getelementptr inbounds i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dst_val = phi ptr [ %dst_val.next, %while.body ], [ %dst, %entry ] + %dec = add nsw i32 %n_val, -1 + %src_val = load ptr, ptr %src, align 8 + %src_val.next = getelementptr inbounds i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds i8, ptr %dst_val, i64 1 + store ptr %src_val.next, ptr %src, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where the destination address is loaded from a pointer +; FIXME: We could hoist the load of the destination address, but doing the +; bounds check of the store through that pointer itself requires using the +; hoisted load. +define void @memcpy_load_dst(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_dst +; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SRC_VAL:%.*]] = phi ptr [ [[SRC_VAL_NEXT:%.*]], [[WHILE_BODY]] ], [ [[SRC]], [[ENTRY]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT]] = getelementptr inbounds i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %src_val = phi ptr [ %src_val.next, %while.body ], [ %src, %entry ] + %dec = add nsw i32 %n_val, -1 + %dst_val = load ptr, ptr %dst, align 8 + %src_val.next = getelementptr inbounds i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds i8, ptr %dst_val, i64 1 + store ptr %dst_val.next, ptr %dst, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} + +; A memcpy-like loop where both the source and destination pointers are loaded from pointers +; FIXME: We could hoist the loads of both addresses, but doing the bounds check +; of the store through the destination address itself requires using the hoisted +; load. +define void @memcpy_load_src_dst(ptr %dst, ptr %src, i32 %n) { +; CHECK-LABEL: define void @memcpy_load_src_dst +; CHECK-SAME: (ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[N_VAL:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[N]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[DEC]] = add nsw i32 [[N_VAL]], -1 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load ptr, ptr [[SRC]], align 8 +; CHECK-NEXT: [[DST_VAL:%.*]] = load ptr, ptr [[DST]], align 8 +; CHECK-NEXT: [[SRC_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[SRC_VAL]], i64 1 +; CHECK-NEXT: [[DST_VAL_NEXT:%.*]] = getelementptr inbounds i8, ptr [[DST_VAL]], i64 1 +; CHECK-NEXT: store ptr [[SRC_VAL_NEXT]], ptr [[SRC]], align 8 +; CHECK-NEXT: store ptr [[DST_VAL_NEXT]], ptr [[DST]], align 8 +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[SRC_VAL]], align 1 +; CHECK-NEXT: store i8 [[VAL]], ptr [[DST_VAL]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: + %n_val = phi i32 [ %dec, %while.body ], [ %n, %entry ] + %dec = add nsw i32 %n_val, -1 + %src_val = load ptr, ptr %src, align 8 + %dst_val = load ptr, ptr %dst, align 8 + %src_val.next = getelementptr inbounds i8, ptr %src_val, i64 1 + %dst_val.next = getelementptr inbounds i8, ptr %dst_val, i64 1 + store ptr %src_val.next, ptr %src, align 8 + store ptr %dst_val.next, ptr %dst, align 8 + %val = load i8, ptr %src_val, align 1 + store i8 %val, ptr %dst_val, align 1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %while.end, label %while.body + +while.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll index eec772c52bbb69fc533efe76ad6e71b90e5a7955..8337a2d2c9c8c5497a142315a8fd5ea85144408f 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll @@ -57,23 +57,23 @@ define i32 @foo(ptr nocapture %var1, ptr nocapture readnone %var2, ptr nocapture ; CHECK-NEXT: [[CMP2_LVER_ORIG:%.*]] = icmp ult i32 [[INC_LVER_ORIG]], [[ITR]] ; CHECK-NEXT: br i1 [[CMP2_LVER_ORIG]], label [[FOR_BODY3_LVER_ORIG]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.body3.ph: -; CHECK-NEXT: [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: [[ARRAYIDX7_PROMOTED:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !alias.scope !2 ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: ; CHECK-NEXT: [[ADD86:%.*]] = phi i32 [ [[ARRAYIDX7_PROMOTED]], [[FOR_BODY3_PH]] ], [ [[ADD8:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[J_113:%.*]] = phi i32 [ [[J_016]], [[FOR_BODY3_PH]] ], [ [[INC:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[J_113]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VAR1]], i64 [[IDXPROM]] -; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4, !alias.scope !5, !noalias !2 ; CHECK-NEXT: [[ADD8]] = add nsw i32 [[ADD86]], [[ADD]] ; CHECK-NEXT: [[INC]] = add nuw i32 [[J_113]], 1 ; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[INC]], [[ITR]] -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT5:%.*]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_INC11_LOOPEXIT_LOOPEXIT5:%.*]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.inc11.loopexit.loopexit: ; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT:%.*]] ; CHECK: for.inc11.loopexit.loopexit5: ; CHECK-NEXT: [[ADD8_LCSSA:%.*]] = phi i32 [ [[ADD8]], [[FOR_BODY3]] ] -; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope !2, !noalias !2 +; CHECK-NEXT: store i32 [[ADD8_LCSSA]], ptr [[ARRAYIDX7]], align 4, !alias.scope !2 ; CHECK-NEXT: br label [[FOR_INC11_LOOPEXIT]] ; CHECK: for.inc11.loopexit: ; CHECK-NEXT: br label [[FOR_INC11]] diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll index 22ca534be7ae09d956a576481ce87912f6eca766..a31da2a212ea55488da19ad9cade2b2d39a41284 100644 --- a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll +++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll @@ -9,7 +9,7 @@ ; ; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us ; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ] -; CHECK-NEXT: store float %add14.us.lcssa, ptr %arrayidx.us, align 4, !alias.scope !0, !noalias !0 +; CHECK-NEXT: store float %add14.us.lcssa, ptr %arrayidx.us, align 4, !alias.scope !3 ; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us ; define i32 @foo(ptr nocapture %var2, ptr nocapture readonly %var3, i32 %itr) #0 {