diff --git a/llvm/include/llvm/Analysis/HashRecognize.h b/llvm/include/llvm/Analysis/HashRecognize.h new file mode 100644 index 0000000000000000000000000000000000000000..6dea3d24885ffacf3087642c083b5f71e4124b8d --- /dev/null +++ b/llvm/include/llvm/Analysis/HashRecognize.h @@ -0,0 +1,107 @@ +//===- HashRecognize.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interface for the HashRecognize analysis, which identifies hash functions +// that can be optimized using a lookup-table or with target-specific +// instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_HASHRECOGNIZE_H +#define LLVM_ANALYSIS_HASHRECOGNIZE_H + +#include "llvm/ADT/APInt.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" +#include + +namespace llvm { + +class LPMUpdater; + +/// A custom std::array with 256 entries, that also has a print function. +struct CRCTable : public std::array { + void print(raw_ostream &OS) const; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const; +#endif +}; + +/// The structure that is returned when a polynomial algorithm was recognized by +/// the analysis. Currently, only the CRC algorithm is recognized. +struct PolynomialInfo { + // The small constant trip-count of the analyzed loop. + unsigned TripCount; + + // The LHS in a polynomial operation, or the initial variable of the + // computation, since all polynomial operations must have a constant RHS, + // which is the generating polynomial. It is the LHS of the polynomial + // division in the case of CRC. Since polynomial division is an XOR in + // GF(2^m), this variable must be XOR'ed with RHS in a loop to yield the + // ComputedValue. + Value *LHS; + + // The generating polynomial, or the RHS of the polynomial division in the + // case of CRC. + APInt RHS; + + // The final computed value. This is a remainder of a polynomial division in + // the case of CRC, which must be zero. + Value *ComputedValue; + + // Set to true in the case of big-endian. + bool ByteOrderSwapped; + + // An optional auxiliary checksum that augments the LHS. In the case of CRC, + // it is XOR'ed with the LHS, so that the computation's final remainder is + // zero. + Value *LHSAux; + + PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, + Value *ComputedValue, bool ByteOrderSwapped, + Value *LHSAux = nullptr); +}; + +/// The analysis. +class HashRecognize { + const Loop &L; + ScalarEvolution &SE; + +public: + HashRecognize(const Loop &L, ScalarEvolution &SE); + + // The main analysis entry points. + std::variant recognizeCRC() const; + std::optional getResult() const; + + // Auxilary entry point after analysis to interleave the generating polynomial + // and return a 256-entry CRC table. + static CRCTable genSarwateTable(const APInt &GenPoly, bool ByteOrderSwapped); + + void print(raw_ostream &OS) const; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const; +#endif +}; + +class HashRecognizePrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit HashRecognizePrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &); +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7a45ae93b185b3b107300e1b1ec014ea66b71556 --- /dev/null +++ b/llvm/include/llvm/Analysis/ScalarEvolutionPatternMatch.h @@ -0,0 +1,279 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a simple and efficient mechanism for performing general +// tree-based pattern matches on SCEVs, based on LLVM's IR pattern matchers. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONPATTERNMATCH_H +#define LLVM_ANALYSIS_SCALAREVOLUTIONPATTERNMATCH_H + +#include "llvm/Analysis/ScalarEvolutionExpressions.h" + +namespace llvm { +namespace SCEVPatternMatch { + +template bool match(const SCEV *S, const Pattern &P) { + return P.match(S); +} + +template struct cst_pred_ty : public Predicate { + cst_pred_ty() = default; + cst_pred_ty(uint64_t V) : Predicate(V) {} + bool match(const SCEV *S) const { + assert((isa(S) || !S->getType()->isVectorTy()) && + "no vector types expected from SCEVs"); + auto *C = dyn_cast(S); + return C && this->isValue(C->getAPInt()); + } +}; + +struct is_zero { + bool isValue(const APInt &C) const { return C.isZero(); } +}; + +/// Match an integer 0. +inline cst_pred_ty m_scev_Zero() { return cst_pred_ty(); } + +struct is_one { + bool isValue(const APInt &C) const { return C.isOne(); } +}; + +/// Match an integer 1. +inline cst_pred_ty m_scev_One() { return cst_pred_ty(); } + +struct is_all_ones { + bool isValue(const APInt &C) const { return C.isAllOnes(); } +}; + +/// Match an integer with all bits set. +inline cst_pred_ty m_scev_AllOnes() { + return cst_pred_ty(); +} + +template struct class_match { + template bool match(ITy *V) const { return isa(V); } +}; + +inline class_match m_SCEV() { return class_match(); } +inline class_match m_SCEVConstant() { + return class_match(); +} +inline class_match m_SCEVVScale() { + return class_match(); +} + +template struct bind_ty { + Class *&VR; + + bind_ty(Class *&V) : VR(V) {} + + template bool match(ITy *V) const { + if (auto *CV = dyn_cast(V)) { + VR = CV; + return true; + } + return false; + } +}; + +/// Match a SCEV, capturing it if we match. +inline bind_ty m_SCEV(const SCEV *&V) { return V; } +inline bind_ty m_SCEVConstant(const SCEVConstant *&V) { + return V; +} +inline bind_ty m_SCEVUnknown(const SCEVUnknown *&V) { + return V; +} + +inline bind_ty m_scev_Add(const SCEVAddExpr *&V) { + return V; +} + +/// Match a specified const SCEV *. +struct specificscev_ty { + const SCEV *Expr; + + specificscev_ty(const SCEV *Expr) : Expr(Expr) {} + + template bool match(ITy *S) const { return S == Expr; } +}; + +/// Match if we have a specific specified SCEV. +inline specificscev_ty m_scev_Specific(const SCEV *S) { return S; } + +struct is_specific_cst { + uint64_t CV; + is_specific_cst(uint64_t C) : CV(C) {} + bool isValue(const APInt &C) const { return C == CV; } +}; + +/// Match an SCEV constant with a plain unsigned integer. +inline cst_pred_ty m_scev_SpecificInt(uint64_t V) { return V; } + +struct is_specific_signed_cst { + int64_t CV; + is_specific_signed_cst(int64_t C) : CV(C) {} + bool isValue(const APInt &C) const { return C.trySExtValue() == CV; } +}; + +/// Match an SCEV constant with a plain signed integer (sign-extended value will +/// be matched) +inline cst_pred_ty m_scev_SpecificSInt(int64_t V) { + return V; +} + +struct bind_cst_ty { + const APInt *&CR; + + bind_cst_ty(const APInt *&Op0) : CR(Op0) {} + + bool match(const SCEV *S) const { + assert((isa(S) || !S->getType()->isVectorTy()) && + "no vector types expected from SCEVs"); + auto *C = dyn_cast(S); + if (!C) + return false; + CR = &C->getAPInt(); + return true; + } +}; + +/// Match an SCEV constant and bind it to an APInt. +inline bind_cst_ty m_scev_APInt(const APInt *&C) { return C; } + +/// Match a unary SCEV. +template struct SCEVUnaryExpr_match { + Op0_t Op0; + + SCEVUnaryExpr_match(Op0_t Op0) : Op0(Op0) {} + + bool match(const SCEV *S) const { + auto *E = dyn_cast(S); + return E && E->getNumOperands() == 1 && Op0.match(E->getOperand(0)); + } +}; + +template +inline SCEVUnaryExpr_match m_scev_Unary(const Op0_t &Op0) { + return SCEVUnaryExpr_match(Op0); +} + +template +inline SCEVUnaryExpr_match +m_scev_SExt(const Op0_t &Op0) { + return m_scev_Unary(Op0); +} + +template +inline SCEVUnaryExpr_match +m_scev_ZExt(const Op0_t &Op0) { + return m_scev_Unary(Op0); +} + +template +inline SCEVUnaryExpr_match +m_scev_PtrToInt(const Op0_t &Op0) { + return SCEVUnaryExpr_match(Op0); +} + +/// Match a binary SCEV. +template +struct SCEVBinaryExpr_match { + Op0_t Op0; + Op1_t Op1; + + SCEVBinaryExpr_match(Op0_t Op0, Op1_t Op1) : Op0(Op0), Op1(Op1) {} + + bool match(const SCEV *S) const { + auto *E = dyn_cast(S); + return E && E->getNumOperands() == 2 && + ((Op0.match(E->getOperand(0)) && Op1.match(E->getOperand(1))) || + (Commutable && Op0.match(E->getOperand(1)) && + Op1.match(E->getOperand(0)))); + } +}; + +template +inline SCEVBinaryExpr_match +m_scev_Binary(const Op0_t &Op0, const Op1_t &Op1) { + return SCEVBinaryExpr_match(Op0, Op1); +} + +template +inline SCEVBinaryExpr_match +m_scev_Add(const Op0_t &Op0, const Op1_t &Op1) { + return m_scev_Binary(Op0, Op1); +} + +template +inline SCEVBinaryExpr_match +m_scev_Mul(const Op0_t &Op0, const Op1_t &Op1) { + return m_scev_Binary(Op0, Op1); +} + +template +inline SCEVBinaryExpr_match +m_scev_c_Mul(const Op0_t &Op0, const Op1_t &Op1) { + return m_scev_Binary(Op0, Op1); +} + +template +inline SCEVBinaryExpr_match +m_scev_UDiv(const Op0_t &Op0, const Op1_t &Op1) { + return m_scev_Binary(Op0, Op1); +} + +inline class_match m_Loop() { return class_match(); } + +/// Match an affine SCEVAddRecExpr. +template +struct SCEVAffineAddRec_match { + SCEVBinaryExpr_match Ops; + Loop_t Loop; + + SCEVAffineAddRec_match(Op0_t Op0, Op1_t Op1, Loop_t Loop) + : Ops(Op0, Op1), Loop(Loop) {} + + bool match(const SCEV *S) const { + return Ops.match(S) && Loop.match(cast(S)->getLoop()); + } +}; + +/// Match a specified const Loop*. +struct specificloop_ty { + const Loop *L; + + specificloop_ty(const Loop *L) : L(L) {} + + bool match(const Loop *L) const { return L == this->L; } +}; + +inline specificloop_ty m_SpecificLoop(const Loop *L) { return L; } + +inline bind_ty m_Loop(const Loop *&L) { return L; } + +template +inline SCEVAffineAddRec_match> +m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1) { + return SCEVAffineAddRec_match>( + Op0, Op1, m_Loop()); +} + +template +inline SCEVAffineAddRec_match +m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1, const Loop_t &L) { + return SCEVAffineAddRec_match(Op0, Op1, L); +} + +} // namespace SCEVPatternMatch +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 19bf9549a8caec0f11472a3584ef604f173b93e9..3e1c7ebe1033ce5d3602fa21a8df8271ab544516 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -173,6 +173,12 @@ public: static_cast(this)->getFirstNonPHI()); } + const_iterator getFirstNonPHIIt() const; + iterator getFirstNonPHIIt() { + return static_cast( + this)->getFirstNonPHIIt().getNonConst(); + } + /// Returns a pointer to the first instruction in this block that is not a /// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp /// is true. diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index 621eba6bd0b679b80e912b5ced9994fc079e823d..4889e7df2cd195e614efae99a4926921852be642 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1689,6 +1689,15 @@ m_ZExtOrSExtOrSelf(const OpTy &Op) { return m_CombineOr(m_ZExtOrSExt(Op), Op); } +template +inline match_combine_or< + match_combine_or, + CastClass_match>, + OpTy> +m_ZExtOrTruncOrSelf(const OpTy &Op) { + return m_CombineOr(m_CombineOr(m_ZExt(Op), m_Trunc(Op)), Op); +} + template inline CastClass_match m_UIToFP(const OpTy &Op) { return CastClass_match(Op); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index b35764c7fb4b4858b588b23aa897feaa32b743c4..43f0f9a8813518fe5bf88ad96f4c6979a33e189a 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -94,6 +94,7 @@ add_llvm_component_library(LLVMAnalysis FunctionPropertiesAnalysis.cpp GlobalsModRef.cpp GuardUtils.cpp + HashRecognize.cpp HeatUtils.cpp IRSimilarityIdentifier.cpp IVDescriptors.cpp diff --git a/llvm/lib/Analysis/HashRecognize.cpp b/llvm/lib/Analysis/HashRecognize.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ea49361947eb300a5111d40df057279b43694faf --- /dev/null +++ b/llvm/lib/Analysis/HashRecognize.cpp @@ -0,0 +1,588 @@ +//===- HashRecognize.cpp ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The HashRecognize analysis recognizes unoptimized polynomial hash functions +// with operations over a Galois field of characteristic 2, also called binary +// fields, or GF(2^n). 2^n is termed the order of the Galois field. This class +// of hash functions can be optimized using a lookup-table-driven +// implementation, or with target-specific instructions. +// +// Examples: +// +// 1. Cyclic redundancy check (CRC), which is a polynomial division in GF(2). +// 2. Rabin fingerprint, a component of the Rabin-Karp algorithm, which is a +// rolling hash polynomial division in GF(2). +// 3. Rijndael MixColumns, a step in AES computation, which is a polynomial +// multiplication in GF(2^3). +// 4. GHASH, the authentication mechanism in AES Galois/Counter Mode (GCM), +// which is a polynomial evaluation in GF(2^128). +// +// All of them use an irreducible generating polynomial of degree m, +// +// c_m * x^m + c_(m-1) * x^(m-1) + ... + c_0 * x^0 +// +// where each coefficient c is can take values 0 or 1. The polynomial is simply +// represented by m+1 bits, corresponding to the coefficients. The different +// variants of CRC are named by degree of generating polynomial used: so CRC-32 +// would use a polynomial of degree 32. +// +// The reason algorithms on GF(2^n) can be optimized with a lookup-table is the +// following: in such fields, polynomial addition and subtraction are identical +// and equivalent to XOR, polynomial multiplication is an AND, and polynomial +// division is identity: the XOR and AND operations in unoptimized +// implementations are performed bit-wise, and can be optimized to be performed +// chunk-wise, by interleaving copies of the generating polynomial, and storing +// the pre-computed values in a table. +// +// A generating polynomial of m bits always has the MSB set, so we usually +// omit it. An example of a 16-bit polynomial is the CRC-16-CCITT polynomial: +// +// (x^16) + x^12 + x^5 + 1 = (1) 0001 0000 0010 0001 = 0x1021 +// +// Transmissions are either in big-endian or little-endian form, and hash +// algorithms are written according to this. For example, IEEE 802 and RS-232 +// specify little-endian transmission. +// +//===----------------------------------------------------------------------===// +// +// At the moment, we only recognize the CRC algorithm. +// Documentation on CRC32 from the kernel: +// https://www.kernel.org/doc/Documentation/crc32.txt +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/HashRecognize.h" +#include "llvm/ADT/APInt.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionPatternMatch.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/KnownBits.h" + +using namespace llvm; +using namespace PatternMatch; +using namespace SCEVPatternMatch; + +#define DEBUG_TYPE "hash-recognize" + +/// Checks if there's a stray instruction in the loop \p L outside of the +/// use-def chains from \p Roots, or if we escape the loop during the use-def +/// walk. +static bool containsUnreachable(const Loop &L, + ArrayRef Roots) { + SmallPtrSet Visited; + BasicBlock *Latch = L.getLoopLatch(); + + SmallVector Worklist(Roots); + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + Visited.insert(I); + + if (isa(I)) + continue; + + for (const Use &U : I->operands()) { + if (auto *UI = dyn_cast(U)) { + if (!L.contains(UI)) + return true; + Worklist.push_back(UI); + } + } + } + return std::distance(Latch->begin(), Latch->end()) != Visited.size(); +} + +/// A structure that can hold either a Simple Recurrence or a Conditional +/// Recurrence. Note that in the case of a Simple Recurrence, Step is an operand +/// of the BO, while in a Conditional Recurrence, it is a SelectInst. +struct RecurrenceInfo { + const Loop &L; + const PHINode *Phi = nullptr; + BinaryOperator *BO = nullptr; + Value *Start = nullptr; + Value *Step = nullptr; + std::optional ExtraConst; + + RecurrenceInfo(const Loop &L) : L(L) {} + operator bool() const { return BO; } + + void print(raw_ostream &OS, unsigned Indent = 0) const { + OS.indent(Indent) << "Phi: "; + Phi->print(OS); + OS << "\n"; + OS.indent(Indent) << "BinaryOperator: "; + BO->print(OS); + OS << "\n"; + OS.indent(Indent) << "Start: "; + Start->print(OS); + OS << "\n"; + OS.indent(Indent) << "Step: "; + Step->print(OS); + OS << "\n"; + if (ExtraConst) { + OS.indent(Indent) << "ExtraConst: "; + ExtraConst->print(OS, false); + OS << "\n"; + } + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { print(dbgs()); } +#endif + + bool matchSimpleRecurrence(const PHINode *P); + bool matchConditionalRecurrence( + const PHINode *P, + Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); + +private: + BinaryOperator *digRecurrence( + Instruction *V, + Instruction::BinaryOps BOWithConstOpToMatch = Instruction::BinaryOpsEnd); +}; + +/// Check the well-formedness of the (most|least) significant bit check given \p +/// ConditionalRecurrence, \p SimpleRecurrence, depending on \p +/// ByteOrderSwapped. We check that ConditionalRecurrence.Step is a +/// Select(Cmp()) where the compare is `>= 0` in the big-endian case, and `== 0` +/// in the little-endian case (or the inverse, in which case the branches of the +/// compare are swapped). We check that the LHS is (ConditionalRecurrence.Phi +/// [xor SimpleRecurrence.Phi]) in the big-endian case, and additionally check +/// for an AND with one in the little-endian case. We then check AllowedByR +/// against CheckAllowedByR, which is [0, smin) in the big-endian case, and is +/// [0, 1) in the little-endian case. CheckAllowedByR checks for +/// significant-bit-clear, and we match the corresponding arms of the select +/// against bit-shift and bit-shift-and-xor-gen-poly. +static bool +isSignificantBitCheckWellFormed(const RecurrenceInfo &ConditionalRecurrence, + const RecurrenceInfo &SimpleRecurrence, + bool ByteOrderSwapped) { + auto *SI = cast(ConditionalRecurrence.Step); + ICmpInst::Predicate Pred; + const Value *L; + const APInt *R; + Instruction *TV, *FV; + if (!match(SI, m_Select(m_ICmp(Pred, m_Value(L), m_APInt(R)), + m_Instruction(TV), m_Instruction(FV)))) + return false; + + // Match predicate with or without a SimpleRecurrence (the corresponding data + // is LHSAux). + auto MatchPred = m_CombineOr( + m_Specific(ConditionalRecurrence.Phi), + m_c_Xor(m_ZExtOrTruncOrSelf(m_Specific(ConditionalRecurrence.Phi)), + m_ZExtOrTruncOrSelf(m_Specific(SimpleRecurrence.Phi)))); + bool LWellFormed = ByteOrderSwapped ? match(L, MatchPred) + : match(L, m_c_And(MatchPred, m_One())); + if (!LWellFormed) + return false; + + KnownBits KnownR = KnownBits::makeConstant(*R); + unsigned BW = KnownR.getBitWidth(); + auto RCR = ConstantRange::fromKnownBits(KnownR, false); + auto AllowedByR = ConstantRange::makeAllowedICmpRegion(Pred, RCR); + ConstantRange CheckAllowedByR(APInt::getZero(BW), + ByteOrderSwapped ? APInt::getSignedMinValue(BW) + : APInt(BW, 1)); + + BinaryOperator *BitShift = ConditionalRecurrence.BO; + if (AllowedByR == CheckAllowedByR) + return TV == BitShift && + match(FV, m_c_Xor(m_Specific(BitShift), + m_SpecificInt(*ConditionalRecurrence.ExtraConst))); + if (AllowedByR.inverse() == CheckAllowedByR) + return FV == BitShift && + match(TV, m_c_Xor(m_Specific(BitShift), + m_SpecificInt(*ConditionalRecurrence.ExtraConst))); + return false; +} + +/// Wraps llvm::matchSimpleRecurrence. Match a simple first order recurrence +/// cycle of the form: +/// +/// loop: +/// %rec = phi [%start, %entry], [%BO, %loop] +/// ... +/// %BO = binop %rec, %step +/// +/// or +/// +/// loop: +/// %rec = phi [%start, %entry], [%BO, %loop] +/// ... +/// %BO = binop %step, %rec +/// +bool RecurrenceInfo::matchSimpleRecurrence(const PHINode *P) { + if (llvm::matchSimpleRecurrence(P, BO, Start, Step)) { + Phi = P; + return true; + } + return false; +} + +/// Digs for a recurrence starting with \p V hitting the PHI node in a use-def +/// chain. Used by matchConditionalRecurrence. +BinaryOperator * +RecurrenceInfo::digRecurrence(Instruction *V, + Instruction::BinaryOps BOWithConstOpToMatch) { + SmallVector Worklist; + Worklist.push_back(V); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + + // Don't add a PHI's operands to the Worklist. + if (isa(I)) + continue; + + // Find a recurrence over a BinOp, by matching either of its operands + // with with the PHINode. + if (match(I, m_c_BinOp(m_Value(), m_Specific(Phi)))) + return cast(I); + + // Bind to ExtraConst, if we match exactly one. + if (I->getOpcode() == BOWithConstOpToMatch) { + if (ExtraConst) + return nullptr; + const APInt *C = nullptr; + if (match(I, m_c_BinOp(m_APInt(C), m_Value()))) + ExtraConst = *C; + } + + // Continue along the use-def chain. + for (Use &U : I->operands()) + if (auto *UI = dyn_cast(U)) + if (L.contains(UI)) + Worklist.push_back(UI); + } + return nullptr; +} + +/// A Conditional Recurrence is a recurrence of the form: +/// +/// loop: +/// %rec = phi [%start, %entry], [%step, %loop] +/// ... +/// %step = select _, %tv, %fv +/// +/// where %tv and %fv ultimately end up using %rec via the same %BO instruction, +/// after digging through the use-def chain. +/// +/// ExtraConst is relevant if \p BOWithConstOpToMatch is supplied: when digging +/// the use-def chain, a BinOp with opcode \p BOWithConstOpToMatch is matched, +/// and ExtraConst is a constant operand of that BinOp. This peculiarity exists, +/// because in a CRC algorithm, the \p BOWithConstOpToMatch is an XOR, and the +/// ExtraConst ends up being the generating polynomial. +bool RecurrenceInfo::matchConditionalRecurrence( + const PHINode *P, Instruction::BinaryOps BOWithConstOpToMatch) { + Phi = P; + if (Phi->getNumIncomingValues() != 2) + return false; + + for (unsigned Idx = 0; Idx != 2; ++Idx) { + Value *FoundStep = Phi->getIncomingValue(Idx); + Value *FoundStart = Phi->getIncomingValue(!Idx); + + Instruction *TV, *FV; + if (!match(FoundStep, + m_Select(m_Cmp(), m_Instruction(TV), m_Instruction(FV)))) + continue; + + // For a conditional recurrence, both the true and false values of the + // select must ultimately end up in the same recurrent BinOp. + BinaryOperator *FoundBO = digRecurrence(TV, BOWithConstOpToMatch); + BinaryOperator *AltBO = digRecurrence(FV, BOWithConstOpToMatch); + if (!FoundBO || FoundBO != AltBO) + return false; + + if (BOWithConstOpToMatch != Instruction::BinaryOpsEnd && !ExtraConst) { + LLVM_DEBUG(dbgs() << "HashRecognize: Unable to match single BinaryOp " + "with constant in conditional recurrence\n"); + return false; + } + + BO = FoundBO; + Start = FoundStart; + Step = FoundStep; + return true; + } + return false; +} + +/// Iterates over all the phis in \p LoopLatch, and attempts to extract a +/// Conditional Recurrence and an optional Simple Recurrence. +static std::optional> +getRecurrences(BasicBlock *LoopLatch, const PHINode *IndVar, const Loop &L) { + auto Phis = LoopLatch->phis(); + unsigned NumPhis = std::distance(Phis.begin(), Phis.end()); + if (NumPhis != 2 && NumPhis != 3) + return {}; + + RecurrenceInfo SimpleRecurrence(L); + RecurrenceInfo ConditionalRecurrence(L); + for (PHINode &P : Phis) { + if (&P == IndVar) + continue; + if (!SimpleRecurrence) + SimpleRecurrence.matchSimpleRecurrence(&P); + if (!ConditionalRecurrence) + ConditionalRecurrence.matchConditionalRecurrence( + &P, Instruction::BinaryOps::Xor); + } + if (NumPhis == 3 && (!SimpleRecurrence || !ConditionalRecurrence)) + return {}; + return std::make_pair(SimpleRecurrence, ConditionalRecurrence); +} + +PolynomialInfo::PolynomialInfo(unsigned TripCount, Value *LHS, const APInt &RHS, + Value *ComputedValue, bool ByteOrderSwapped, + Value *LHSAux) + : TripCount(TripCount), LHS(LHS), RHS(RHS), ComputedValue(ComputedValue), + ByteOrderSwapped(ByteOrderSwapped), LHSAux(LHSAux) {} + +/// Generate a lookup table of 256 entries by interleaving the generating +/// polynomial. The optimization technique of table-lookup for CRC is also +/// called the Sarwate algorithm. +CRCTable HashRecognize::genSarwateTable(const APInt &GenPoly, + bool ByteOrderSwapped) { + unsigned BW = GenPoly.getBitWidth(); + CRCTable Table; + Table[0] = APInt::getZero(BW); + + if (ByteOrderSwapped) { + APInt CRCInit = APInt::getSignedMinValue(BW); + for (unsigned I = 1; I < 256; I <<= 1) { + CRCInit = CRCInit.shl(1) ^ + (CRCInit.isSignBitSet() ? GenPoly : APInt::getZero(BW)); + for (unsigned J = 0; J < I; ++J) + Table[I + J] = CRCInit ^ Table[J]; + } + return Table; + } + + APInt CRCInit(BW, 1); + for (unsigned I = 128; I; I >>= 1) { + CRCInit = CRCInit.lshr(1) ^ (CRCInit[0] ? GenPoly : APInt::getZero(BW)); + for (unsigned J = 0; J < 256; J += (I << 1)) + Table[I + J] = CRCInit ^ Table[J]; + } + return Table; +} + +/// Checks that \p P1 and \p P2 are used together in an XOR in the use-def chain +/// of \p SI's condition, ignoring any casts. The purpose of this function is to +/// ensure that LHSAux from the SimpleRecurrence is used correctly in the CRC +/// computation. +/// +/// In other words, it checks for the following pattern: +/// +/// loop: +/// %P1 = phi [_, %entry], [%P1.next, %loop] +/// %P2 = phi [_, %entry], [%P2.next, %loop] +/// ... +/// %xor = xor (CastOrSelf %P1), (CastOrSelf %P2) +/// +/// where %xor is in the use-def chain of \p SI's condition. +static bool isConditionalOnXorOfPHIs(const SelectInst *SI, const PHINode *P1, + const PHINode *P2, const Loop &L) { + SmallVector Worklist; + + // matchConditionalRecurrence has already ensured that the SelectInst's + // condition is an Instruction. + Worklist.push_back(cast(SI->getCondition())); + + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + + // Don't add a PHI's operands to the Worklist. + if (isa(I)) + continue; + + // If we match an XOR of the two PHIs ignoring casts, we're done. + if (match(I, m_c_Xor(m_ZExtOrTruncOrSelf(m_Specific(P1)), + m_ZExtOrTruncOrSelf(m_Specific(P2))))) + return true; + + // Continue along the use-def chain. + for (const Use &U : I->operands()) + if (auto *UI = dyn_cast(U)) + if (L.contains(UI)) + Worklist.push_back(UI); + } + return false; +} + +// Recognizes a multiplication or division by the constant two, using SCEV. By +// doing this, we're immune to whether the IR expression is mul/udiv or +// equivalently shl/lshr. Return false when it is a UDiv, true when it is a Mul, +// and std::nullopt otherwise. +static std::optional isBigEndianBitShift(Value *V, ScalarEvolution &SE) { + if (!V->getType()->isIntegerTy()) + return {}; + + const SCEV *E = SE.getSCEV(V); + if (match(E, m_scev_UDiv(m_SCEV(), m_scev_SpecificInt(2)))) + return false; + if (match(E, m_scev_Mul(m_scev_SpecificInt(2), m_SCEV()))) + return true; + return {}; +} + +/// The main entry point for analyzing a loop and recognizing the CRC algorithm. +/// Returns a PolynomialInfo on success, and a StringRef on failure. +std::variant HashRecognize::recognizeCRC() const { + if (!L.isInnermost()) + return "Loop is not innermost"; + BasicBlock *Latch = L.getLoopLatch(); + BasicBlock *Exit = L.getExitBlock(); + const PHINode *IndVar = L.getCanonicalInductionVariable(); + if (!Latch || !Exit || !IndVar || L.getNumBlocks() != 1) + return "Loop not in canonical form"; + unsigned TC = SE.getSmallConstantTripCount(&L); + if (!TC || TC % 8) + return "Unable to find a small constant byte-multiple trip count"; + + auto R = getRecurrences(Latch, IndVar, L); + if (!R) + return "Found stray PHI"; + auto [SimpleRecurrence, ConditionalRecurrence] = *R; + if (!ConditionalRecurrence) + return "Unable to find conditional recurrence"; + + // Make sure that all recurrences are either all SCEVMul with two or SCEVDiv + // with two, or in other words, that they're single bit-shifts. + std::optional ByteOrderSwapped = + isBigEndianBitShift(ConditionalRecurrence.BO, SE); + if (!ByteOrderSwapped) + return "Loop with non-unit bitshifts"; + if (SimpleRecurrence) { + if (isBigEndianBitShift(SimpleRecurrence.BO, SE) != ByteOrderSwapped) + return "Loop with non-unit bitshifts"; + + // Ensure that the PHIs have exactly two uses: + // the bit-shift, and the XOR (or a cast feeding into the XOR). + if (!ConditionalRecurrence.Phi->hasNUses(2) || + !SimpleRecurrence.Phi->hasNUses(2)) + return "Recurrences have stray uses"; + + // Check that the SelectInst ConditionalRecurrence.Step is conditional on + // the XOR of SimpleRecurrence.Phi and ConditionalRecurrence.Phi. + if (!isConditionalOnXorOfPHIs(cast(ConditionalRecurrence.Step), + SimpleRecurrence.Phi, + ConditionalRecurrence.Phi, L)) + return "Recurrences not intertwined with XOR"; + } + + // Make sure that the TC doesn't exceed the bitwidth of LHSAux, or LHS. + Value *LHS = ConditionalRecurrence.Start; + Value *LHSAux = SimpleRecurrence ? SimpleRecurrence.Start : nullptr; + if (TC > (LHSAux ? LHSAux->getType()->getIntegerBitWidth() + : LHS->getType()->getIntegerBitWidth())) + return "Loop iterations exceed bitwidth of data"; + + // Make sure that the computed value is used in the exit block: this should be + // true even if it is only really used in an outer loop's exit block, since + // the loop is in LCSSA form. + auto *ComputedValue = cast(ConditionalRecurrence.Step); + if (none_of(ComputedValue->users(), [Exit](User *U) { + auto *UI = dyn_cast(U); + return UI && UI->getParent() == Exit; + })) + return "Unable to find use of computed value in loop exit block"; + + assert(ConditionalRecurrence.ExtraConst && + "Expected ExtraConst in conditional recurrence"); + const APInt &GenPoly = *ConditionalRecurrence.ExtraConst; + + if (!isSignificantBitCheckWellFormed(ConditionalRecurrence, SimpleRecurrence, + *ByteOrderSwapped)) + return "Malformed significant-bit check"; + + SmallVector Roots( + {ComputedValue, + cast(IndVar->getIncomingValueForBlock(Latch)), + L.getLatchCmpInst(), Latch->getTerminator()}); + if (SimpleRecurrence) + Roots.push_back(SimpleRecurrence.BO); + if (containsUnreachable(L, Roots)) + return "Found stray unvisited instructions"; + + return PolynomialInfo(TC, LHS, GenPoly, ComputedValue, *ByteOrderSwapped, + LHSAux); +} + +void CRCTable::print(raw_ostream &OS) const { + for (unsigned I = 0; I < 256; I++) { + (*this)[I].print(OS, false); + OS << (I % 16 == 15 ? '\n' : ' '); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void CRCTable::dump() const { print(dbgs()); } +#endif + +void HashRecognize::print(raw_ostream &OS) const { + if (!L.isInnermost()) + return; + OS << "HashRecognize: Checking a loop in '" + << L.getHeader()->getParent()->getName() + << "\n"; + auto Ret = recognizeCRC(); + if (!std::holds_alternative(Ret)) { + OS << "Did not find a hash algorithm\n"; + if (std::holds_alternative(Ret)) + OS << "Reason: " << std::get(Ret) << "\n"; + return; + } + + auto Info = std::get(Ret); + OS << "Found" << (Info.ByteOrderSwapped ? " big-endian " : " little-endian ") + << "CRC-" << Info.RHS.getBitWidth() << " loop with trip count " + << Info.TripCount << "\n"; + OS.indent(2) << "Initial CRC: "; + Info.LHS->print(OS); + OS << "\n"; + OS.indent(2) << "Generating polynomial: "; + Info.RHS.print(OS, false); + OS << "\n"; + OS.indent(2) << "Computed CRC: "; + Info.ComputedValue->print(OS); + OS << "\n"; + if (Info.LHSAux) { + OS.indent(2) << "Auxiliary data: "; + Info.LHSAux->print(OS); + OS << "\n"; + } + OS.indent(2) << "Computed CRC lookup table:\n"; + genSarwateTable(Info.RHS, Info.ByteOrderSwapped).print(OS); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void HashRecognize::dump() const { print(dbgs()); } +#endif + +std::optional HashRecognize::getResult() const { + auto Res = HashRecognize(L, SE).recognizeCRC(); + if (std::holds_alternative(Res)) + return std::get(Res); + return std::nullopt; +} + +HashRecognize::HashRecognize(const Loop &L, ScalarEvolution &SE) + : L(L), SE(SE) {} + +PreservedAnalyses HashRecognizePrinterPass::run(Loop &L, + LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + HashRecognize(L, AR.SE).print(OS); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 14e1787c2b14b75eabe661e6f45f6d338695795e..bff652ac01f41d6e31e152ceab77afa8f4b14d58 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -220,6 +220,15 @@ const Instruction* BasicBlock::getFirstNonPHI() const { return nullptr; } +BasicBlock::const_iterator BasicBlock::getFirstNonPHIIt() const { + for (const Instruction &I : *this) { + if (isa(I)) + continue; + return I.getIterator(); + } + return end(); +} + const Instruction *BasicBlock::getFirstNonPHIOrDbg(bool SkipPseudoOp) const { for (const Instruction &I : *this) { if (isa(I) || isa(I)) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 2ae9bb8f8d658be12381941655d167c74e01d34f..a990c871357be4fc13cb957bf16d7ec49053fb07 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/HashRecognize.h" #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/InlineAdvisor.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index f6df0fcb7f803fb6b9f66a8e8d499e33246ae1fe..060f91ae96df968fb08c9bf3099de8146b679c74 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -635,6 +635,7 @@ LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) +LOOP_PASS("print", HashRecognizePrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) LOOP_PASS("print", LoopNestPrinterPass(dbgs())) LOOP_PASS("print", LoopCachePrinterPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 8572a442e784ae2fb5bc93548a72daa70522cae5..004c9bfde842c3366ae3d26dfa009fc4b29fee97 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -48,6 +48,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CmpInstAnalysis.h" +#include "llvm/Analysis/HashRecognize.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -76,6 +77,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" @@ -141,6 +143,11 @@ static cl::opt UseLIRCodeSizeHeurs( "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt EnableCRCRecognize( + "enable-crc-recognize", + cl::desc("Enable crc loop recognize and optimization (default-false)"), + cl::init(false), cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -229,6 +236,7 @@ private: const SCEV *BECount); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); + bool optimizeCRCLoop(const PolynomialInfo &Info); /// @} /// \name Noncountable Loop Idiom Handling @@ -308,7 +316,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) { HasMemsetPattern = TLI->has(LibFunc_memset_pattern16); HasMemcpy = TLI->has(LibFunc_memcpy); - if (HasMemset || HasMemsetPattern || HasMemcpy) + if (HasMemset || HasMemsetPattern || HasMemcpy || EnableCRCRecognize) if (SE->hasLoopInvariantBackedgeTakenCount(L)) return runOnCountableLoop(); @@ -352,6 +360,13 @@ bool LoopIdiomRecognize::runOnCountableLoop() { MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks); } + + // Optimize a CRC loop if HashRecognize found one, provided we're not + // optimizing for size. + if (EnableCRCRecognize && !ApplyCodeSizeHeuristics) + if (auto Res = HashRecognize(*CurLoop, *SE).getResult()) + optimizeCRCLoop(*Res); + return MadeChange; } @@ -2868,3 +2883,208 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() { ++NumShiftUntilZero; return MadeChange; } + +bool hasCRCTargetFeature(Function *F) { + Attribute Attr = F->getFnAttribute("target-features"); + if (!Attr.isValid()) + return false; + StringRef Features = Attr.getValueAsString(); + return Features.contains("+crc"); +} + +bool LoopIdiomRecognize::optimizeCRCLoop(const PolynomialInfo &Info) { + // Try optimize to crc32 intrinsic. + Module &M = *CurLoop->getHeader()->getModule(); + Triple TT(M.getTargetTriple()); + Function *F = CurLoop->getHeader()->getParent(); + // Check target feature has +crc + if (TT.getArch() == Triple::aarch64 && hasCRCTargetFeature(F)) { + Instruction *Terminator = CurLoop->getLoopPreheader()->getTerminator(); + LLVMContext &Context = CurLoop->getLoopPreheader()->getContext(); + IRBuilder<> Builder(Terminator); + + Value *Crc = Info.LHS; + Value *Zero = ConstantInt::get(Crc->getType(), 0); + Value *Data = Info.LHSAux == nullptr ? Zero : Info.LHSAux; + Value *ComputedValue = Info.ComputedValue; + + static const APInt SpecialValue(32, 0xEDB88320); + + // Check POLY const variable is 0xEDB88320 + // Check is right shift (ByteOrderSwapped == false) + // Check is CRC 32 bit + if (Crc->getType() == Type::getInt32Ty(Context) && Info.ByteOrderSwapped == false + && Info.RHS.getBitWidth() == 32 && Info.RHS.eq(SpecialValue)) { + Function *Crc32Intrinsic = nullptr; + switch (Info.TripCount) { + case 8: + Crc32Intrinsic = Intrinsic::getDeclaration(&M, Intrinsic::aarch64_crc32b); + break; + case 16: + Crc32Intrinsic = Intrinsic::getDeclaration(&M, Intrinsic::aarch64_crc32h); + break; + case 32: + Crc32Intrinsic = Intrinsic::getDeclaration(&M, Intrinsic::aarch64_crc32w); + break; + default: + break; + } + // If data's type is not i32, use zero extension to i32 + if (Data->getType() == Type::getInt16Ty(Context) + || Data->getType() == Type::getInt8Ty(Context)) { + Data = Builder.CreateZExt(Data, Type::getInt32Ty(Context)); + } + if (Data->getType() == Type::getInt32Ty(Context) && Crc32Intrinsic) { + // Insert intrinsic call at the end of loop header, and get result value + auto NewCrc = Builder.CreateCall(Crc32Intrinsic, {Crc, Data}); + ComputedValue->replaceAllUsesWith(NewCrc); + // after replace uses, the loop body is dead and will be delete by LoopDeletionPass + return true; + } + } + } + // If can not be optimized to crc intrinsic, try optimize to lookup-table algorithm + + // First, create a new GlobalVariable corresponding to the + // Sarwate-lookup-table. + Type *CRCTy = Info.LHS->getType(); + unsigned CRCBW = CRCTy->getIntegerBitWidth(); + std::array CRCConstants; + transform(HashRecognize::genSarwateTable(Info.RHS, Info.ByteOrderSwapped), + CRCConstants.begin(), + [CRCTy](const APInt &E) { return ConstantInt::get(CRCTy, E); }); + Constant *ConstArray = + ConstantArray::get(ArrayType::get(CRCTy, 256), CRCConstants); + GlobalVariable *GV = + new GlobalVariable(M, ConstArray->getType(), true, + GlobalValue::PrivateLinkage, ConstArray, ".crctable"); + + PHINode *IV = CurLoop->getCanonicalInductionVariable(); + SmallVector Cleanup; + + // Next, mark all PHIs for removal except IV. + { + for (PHINode &PN : CurLoop->getHeader()->phis()) { + if (&PN == IV) + continue; + PN.replaceAllUsesWith(PoisonValue::get(PN.getType())); + Cleanup.push_back(&PN); + } + } + + // Next, fix up the trip count. + { + unsigned NewBTC = (Info.TripCount / 8) - 1; + BasicBlock *LoopBlk = CurLoop->getLoopLatch(); + BranchInst *BrInst = cast(LoopBlk->getTerminator()); + ICmpInst::Predicate ExitPred = BrInst->getSuccessor(0) == LoopBlk + ? ICmpInst::Predicate::ICMP_NE + : ICmpInst::Predicate::ICMP_EQ; + Instruction *ExitCond = CurLoop->getLatchCmpInst(); + Value *ExitLimit = ConstantInt::get(IV->getType(), NewBTC); + IRBuilder<> Builder(ExitCond); + Value *NewExitCond = + Builder.CreateICmp(ExitPred, IV, ExitLimit, "exit.cond"); + ExitCond->replaceAllUsesWith(NewExitCond); + deleteDeadInstruction(ExitCond); + } + + // Finally, fill the loop with the Sarwate-table-lookup logic, and replace all + // uses of ComputedValue. + // + // Little-endian: + // crc = (crc >> 8) ^ tbl[(iv'th byte of data) ^ (bottom byte of crc)] + // Big-Endian: + // crc = (crc << 8) ^ tbl[(iv'th byte of data) ^ (top byte of crc)] + { + auto LoByte = [](IRBuilderBase &Builder, Value *Op, const Twine &Name) { + Type *OpTy = Op->getType(); + unsigned OpBW = OpTy->getIntegerBitWidth(); + return OpBW > 8 + ? Builder.CreateAnd(Op, ConstantInt::get(OpTy, 0XFF), Name) + : Op; + }; + auto HiIdx = [LoByte, CRCBW](IRBuilderBase &Builder, Value *Op, + const Twine &Name) { + Type *OpTy = Op->getType(); + + // When the bitwidth of the CRC mismatches the Op's bitwidth, we need to + // use the CRC's bitwidth as the reference for shifting right. + return LoByte(Builder, + CRCBW > 8 ? Builder.CreateLShr( + Op, ConstantInt::get(OpTy, CRCBW - 8), Name) + : Op, + Name + ".lo.byte"); + }; + + IRBuilder<> Builder(CurLoop->getHeader(), + CurLoop->getHeader()->getFirstNonPHIIt()); + + // Create the CRC PHI, and initialize its incoming value to the initial + // value of CRC. + PHINode *CRCPhi = Builder.CreatePHI(CRCTy, 2, "crc"); + CRCPhi->addIncoming(Info.LHS, CurLoop->getLoopPreheader()); + + // CRC is now an evolving variable, initialized to the PHI. + Value *CRC = CRCPhi; + + // TableIndexer = ((top|bottom) byte of CRC). It is XOR'ed with (iv'th byte + // of LHSAux), if LHSAux is non-nullptr. + Value *Indexer = CRC; + if (Value *Data = Info.LHSAux) { + Type *DataTy = Data->getType(); + + // To index into the (iv'th byte of LHSAux), we multiply iv by 8, and we + // shift right by that amount, and take the lo-byte (in the little-endian + // case), or shift left by that amount, and take the hi-idx (in the + // big-endian case). + Value *IVBits = Builder.CreateZExtOrTrunc( + Builder.CreateShl(IV, 3, "iv.bits"), DataTy, "iv.indexer"); + Value *DataIndexer = + Info.ByteOrderSwapped + ? Builder.CreateShl(Data, IVBits, "data.indexer") + : Builder.CreateLShr(Data, IVBits, "data.indexer"); + Indexer = Builder.CreateXor( + DataIndexer, + Builder.CreateZExtOrTrunc(Indexer, DataTy, "crc.indexer.cast"), + "crc.data.indexer"); + } + + Indexer = Info.ByteOrderSwapped ? HiIdx(Builder, Indexer, "indexer.hi") + : LoByte(Builder, Indexer, "indexer.lo"); + + // Always index into a GEP using the index type. + Indexer = Builder.CreateZExt( + Indexer, SE->getDataLayout().getIndexType(GV->getType()), + "indexer.ext"); + + // CRCTableLd = CRCTable[(iv'th byte of data) ^ (top|bottom) byte of CRC]. + Value *CRCTableGEP = + Builder.CreateInBoundsGEP(CRCTy, GV, Indexer, "tbl.ptradd"); + Value *CRCTableLd = Builder.CreateLoad(CRCTy, CRCTableGEP, "tbl.ld"); + + // CRCNext = (CRC (<<|>>) 8) ^ CRCTableLd, or simply CRCTableLd in case of + // CRC-8. + Value *CRCNext = CRCTableLd; + if (CRCBW > 8) { + Value *CRCShift = Info.ByteOrderSwapped + ? Builder.CreateShl(CRC, 8, "crc.be.shift") + : Builder.CreateLShr(CRC, 8, "crc.le.shift"); + CRCNext = Builder.CreateXor(CRCShift, CRCTableLd, "crc.next"); + } + + // Connect the back-edge for the loop, and RAUW the ComputedValue. + CRCPhi->addIncoming(CRCNext, CurLoop->getLoopLatch()); + Info.ComputedValue->replaceUsesOutsideBlock(CRCNext, + CurLoop->getLoopLatch()); + } + + // Cleanup. + { + for (PHINode *PN : Cleanup) + RecursivelyDeleteDeadPHINode(PN); + SE->forgetLoop(CurLoop); + } + return true; +} + diff --git a/llvm/test/Transforms/LoopIdiom/crc-loop-to-crc32intrinsic-check.ll b/llvm/test/Transforms/LoopIdiom/crc-loop-to-crc32intrinsic-check.ll new file mode 100644 index 0000000000000000000000000000000000000000..029efb46ab999ffd868a2bef55372f39d3fa2f48 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/crc-loop-to-crc32intrinsic-check.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt -passes=loop-idiom --enable-crc-recognize -S %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define i32 @crc32.32(i32 %msg, i32 %checksum) #0 { +; CHECK-LABEL: define i32 @crc32.32( +; CHECK-SAME: i32 [[MSG:%.*]], i32 [[CHECKSUM:%.*]]) +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.crc32w(i32 [[CHECKSUM]], i32 [[MSG]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHEKC: [[LOOP]]: +; CHECK: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[RES]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[CRC_NEXT_LCSSA]] +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %xor.data.crc = xor i32 %data, %crc + %and.data.crc = and i32 %xor.data.crc, 1 + %data.next = lshr i32 %data, 1 + %check.sb = icmp eq i32 %and.data.crc, 0 + %crc.lshr = lshr i32 %crc, 1 + %xor = xor i32 %crc.lshr, -306674912 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp ult i32 %iv, 31 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + %lcssa = phi i32 [ %crc.next, %loop ] + ret i32 %lcssa +} + + +define i32 @crc32.8(i8 %msg, i32 %checksum) #0 { +; CHECK-LABEL: define i32 @crc32.8( +; CHECK-SAME: i8 [[MSG:%.*]], i32 [[CHECKSUM:%.*]]) +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[MSG]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.crc32b(i32 [[CHECKSUM]], i32 [[EXT]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHEKC: [[LOOP]]: +; CHECK: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[RES]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[CRC_NEXT_LCSSA]] +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i32 %crc to i8 + %xor.data.crc = xor i8 %data, %crc.trunc + %and.data.crc = and i8 %xor.data.crc, 1 + %data.next = lshr i8 %data, 1 + %check.sb = icmp eq i8 %and.data.crc, 0 + %crc.lshr = lshr i32 %crc, 1 + %xor = xor i32 %crc.lshr, -306674912 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + %lcssa = phi i32 [ %crc.next, %loop ] + ret i32 %lcssa +} + +define i32 @crc32.16(i16 %msg, i32 %checksum) #0 { +; CHECK-LABEL: define i32 @crc32.16( +; CHECK-SAME: i16 [[MSG:%.*]], i32 [[CHECKSUM:%.*]]) +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[EXT:%.*]] = zext i16 [[MSG]] to i32 +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.crc32h(i32 [[CHECKSUM]], i32 [[EXT]]) +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHEKC: [[LOOP]]: +; CHECK: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[RES]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[CRC_NEXT_LCSSA]] +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i32 %crc to i16 + %xor.data.crc = xor i16 %data, %crc.trunc + %and.data.crc = and i16 %xor.data.crc, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp eq i16 %and.data.crc, 0 + %crc.lshr = lshr i32 %crc, 1 + %xor = xor i32 %crc.lshr, -306674912 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp ult i32 %iv, 15 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + %lcssa = phi i32 [ %crc.next, %loop ] + ret i32 %lcssa +} +attributes #0 = {"target-features"="+crc"} diff --git a/llvm/test/Transforms/LoopIdiom/crc-loop-to-table-check.ll b/llvm/test/Transforms/LoopIdiom/crc-loop-to-table-check.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ae34f20964edaa88e1976e6f30de31cae1385b9 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/crc-loop-to-table-check.ll @@ -0,0 +1,542 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt -passes=loop-idiom --enable-crc-recognize -S %s | FileCheck %s + +;. +; CHECK: @.crctable = private constant [256 x i16] [i16 0, i16 -16191, i16 -15999, i16 320, i16 -15615, i16 960, i16 640, i16 -15807, i16 -14847, i16 1728, i16 1920, i16 -14527, i16 1280, i16 -14911, i16 -15231, i16 1088, i16 -13311, i16 3264, i16 3456, i16 -12991, i16 3840, i16 -12351, i16 -12671, i16 3648, i16 2560, i16 -13631, i16 -13439, i16 2880, i16 -14079, i16 2496, i16 2176, i16 -14271, i16 -10239, i16 6336, i16 6528, i16 -9919, i16 6912, i16 -9279, i16 -9599, i16 6720, i16 7680, i16 -8511, i16 -8319, i16 8000, i16 -8959, i16 7616, i16 7296, i16 -9151, i16 5120, i16 -11071, i16 -10879, i16 5440, i16 -10495, i16 6080, i16 5760, i16 -10687, i16 -11775, i16 4800, i16 4992, i16 -11455, i16 4352, i16 -11839, i16 -12159, i16 4160, i16 -4095, i16 12480, i16 12672, i16 -3775, i16 13056, i16 -3135, i16 -3455, i16 12864, i16 13824, i16 -2367, i16 -2175, i16 14144, i16 -2815, i16 13760, i16 13440, i16 -3007, i16 15360, i16 -831, i16 -639, i16 15680, i16 -255, i16 16320, i16 16000, i16 -447, i16 -1535, i16 15040, i16 15232, i16 -1215, i16 14592, i16 -1599, i16 -1919, i16 14400, i16 10240, i16 -5951, i16 -5759, i16 10560, i16 -5375, i16 11200, i16 10880, i16 -5567, i16 -4607, i16 11968, i16 12160, i16 -4287, i16 11520, i16 -4671, i16 -4991, i16 11328, i16 -7167, i16 9408, i16 9600, i16 -6847, i16 9984, i16 -6207, i16 -6527, i16 9792, i16 8704, i16 -7487, i16 -7295, i16 9024, i16 -7935, i16 8640, i16 8320, i16 -8127, i16 -24575, i16 24768, i16 24960, i16 -24255, i16 25344, i16 -23615, i16 -23935, i16 25152, i16 26112, i16 -22847, i16 -22655, i16 26432, i16 -23295, i16 26048, i16 25728, i16 -23487, i16 27648, i16 -21311, i16 -21119, i16 27968, i16 -20735, i16 28608, i16 28288, i16 -20927, i16 -22015, i16 27328, i16 27520, i16 -21695, i16 26880, i16 -22079, i16 -22399, i16 26688, i16 30720, i16 -18239, i16 -18047, i16 31040, i16 -17663, i16 31680, i16 31360, i16 -17855, i16 -16895, i16 32448, i16 32640, i16 -16575, i16 32000, i16 -16959, i16 -17279, i16 31808, i16 -19455, i16 29888, i16 30080, i16 -19135, i16 30464, i16 -18495, i16 -18815, i16 30272, i16 29184, i16 -19775, i16 -19583, i16 29504, i16 -20223, i16 29120, i16 28800, i16 -20415, i16 20480, i16 -28479, i16 -28287, i16 20800, i16 -27903, i16 21440, i16 21120, i16 -28095, i16 -27135, i16 22208, i16 22400, i16 -26815, i16 21760, i16 -27199, i16 -27519, i16 21568, i16 -25599, i16 23744, i16 23936, i16 -25279, i16 24320, i16 -24639, i16 -24959, i16 24128, i16 23040, i16 -25919, i16 -25727, i16 23360, i16 -26367, i16 22976, i16 22656, i16 -26559, i16 -30719, i16 18624, i16 18816, i16 -30399, i16 19200, i16 -29759, i16 -30079, i16 19008, i16 19968, i16 -28991, i16 -28799, i16 20288, i16 -29439, i16 19904, i16 19584, i16 -29631, i16 17408, i16 -31551, i16 -31359, i16 17728, i16 -30975, i16 18368, i16 18048, i16 -31167, i16 -32255, i16 17088, i16 17280, i16 -31935, i16 16640, i16 -32319, i16 -32639, i16 16448] +; CHECK: @.crctable.1 = private constant [256 x i16] [i16 0, i16 -16191, i16 -15999, i16 320, i16 -15615, i16 960, i16 640, i16 -15807, i16 -14847, i16 1728, i16 1920, i16 -14527, i16 1280, i16 -14911, i16 -15231, i16 1088, i16 -13311, i16 3264, i16 3456, i16 -12991, i16 3840, i16 -12351, i16 -12671, i16 3648, i16 2560, i16 -13631, i16 -13439, i16 2880, i16 -14079, i16 2496, i16 2176, i16 -14271, i16 -10239, i16 6336, i16 6528, i16 -9919, i16 6912, i16 -9279, i16 -9599, i16 6720, i16 7680, i16 -8511, i16 -8319, i16 8000, i16 -8959, i16 7616, i16 7296, i16 -9151, i16 5120, i16 -11071, i16 -10879, i16 5440, i16 -10495, i16 6080, i16 5760, i16 -10687, i16 -11775, i16 4800, i16 4992, i16 -11455, i16 4352, i16 -11839, i16 -12159, i16 4160, i16 -4095, i16 12480, i16 12672, i16 -3775, i16 13056, i16 -3135, i16 -3455, i16 12864, i16 13824, i16 -2367, i16 -2175, i16 14144, i16 -2815, i16 13760, i16 13440, i16 -3007, i16 15360, i16 -831, i16 -639, i16 15680, i16 -255, i16 16320, i16 16000, i16 -447, i16 -1535, i16 15040, i16 15232, i16 -1215, i16 14592, i16 -1599, i16 -1919, i16 14400, i16 10240, i16 -5951, i16 -5759, i16 10560, i16 -5375, i16 11200, i16 10880, i16 -5567, i16 -4607, i16 11968, i16 12160, i16 -4287, i16 11520, i16 -4671, i16 -4991, i16 11328, i16 -7167, i16 9408, i16 9600, i16 -6847, i16 9984, i16 -6207, i16 -6527, i16 9792, i16 8704, i16 -7487, i16 -7295, i16 9024, i16 -7935, i16 8640, i16 8320, i16 -8127, i16 -24575, i16 24768, i16 24960, i16 -24255, i16 25344, i16 -23615, i16 -23935, i16 25152, i16 26112, i16 -22847, i16 -22655, i16 26432, i16 -23295, i16 26048, i16 25728, i16 -23487, i16 27648, i16 -21311, i16 -21119, i16 27968, i16 -20735, i16 28608, i16 28288, i16 -20927, i16 -22015, i16 27328, i16 27520, i16 -21695, i16 26880, i16 -22079, i16 -22399, i16 26688, i16 30720, i16 -18239, i16 -18047, i16 31040, i16 -17663, i16 31680, i16 31360, i16 -17855, i16 -16895, i16 32448, i16 32640, i16 -16575, i16 32000, i16 -16959, i16 -17279, i16 31808, i16 -19455, i16 29888, i16 30080, i16 -19135, i16 30464, i16 -18495, i16 -18815, i16 30272, i16 29184, i16 -19775, i16 -19583, i16 29504, i16 -20223, i16 29120, i16 28800, i16 -20415, i16 20480, i16 -28479, i16 -28287, i16 20800, i16 -27903, i16 21440, i16 21120, i16 -28095, i16 -27135, i16 22208, i16 22400, i16 -26815, i16 21760, i16 -27199, i16 -27519, i16 21568, i16 -25599, i16 23744, i16 23936, i16 -25279, i16 24320, i16 -24639, i16 -24959, i16 24128, i16 23040, i16 -25919, i16 -25727, i16 23360, i16 -26367, i16 22976, i16 22656, i16 -26559, i16 -30719, i16 18624, i16 18816, i16 -30399, i16 19200, i16 -29759, i16 -30079, i16 19008, i16 19968, i16 -28991, i16 -28799, i16 20288, i16 -29439, i16 19904, i16 19584, i16 -29631, i16 17408, i16 -31551, i16 -31359, i16 17728, i16 -30975, i16 18368, i16 18048, i16 -31167, i16 -32255, i16 17088, i16 17280, i16 -31935, i16 16640, i16 -32319, i16 -32639, i16 16448] +; CHECK: @.crctable.2 = private constant [256 x i16] [i16 0, i16 -16191, i16 -15999, i16 320, i16 -15615, i16 960, i16 640, i16 -15807, i16 -14847, i16 1728, i16 1920, i16 -14527, i16 1280, i16 -14911, i16 -15231, i16 1088, i16 -13311, i16 3264, i16 3456, i16 -12991, i16 3840, i16 -12351, i16 -12671, i16 3648, i16 2560, i16 -13631, i16 -13439, i16 2880, i16 -14079, i16 2496, i16 2176, i16 -14271, i16 -10239, i16 6336, i16 6528, i16 -9919, i16 6912, i16 -9279, i16 -9599, i16 6720, i16 7680, i16 -8511, i16 -8319, i16 8000, i16 -8959, i16 7616, i16 7296, i16 -9151, i16 5120, i16 -11071, i16 -10879, i16 5440, i16 -10495, i16 6080, i16 5760, i16 -10687, i16 -11775, i16 4800, i16 4992, i16 -11455, i16 4352, i16 -11839, i16 -12159, i16 4160, i16 -4095, i16 12480, i16 12672, i16 -3775, i16 13056, i16 -3135, i16 -3455, i16 12864, i16 13824, i16 -2367, i16 -2175, i16 14144, i16 -2815, i16 13760, i16 13440, i16 -3007, i16 15360, i16 -831, i16 -639, i16 15680, i16 -255, i16 16320, i16 16000, i16 -447, i16 -1535, i16 15040, i16 15232, i16 -1215, i16 14592, i16 -1599, i16 -1919, i16 14400, i16 10240, i16 -5951, i16 -5759, i16 10560, i16 -5375, i16 11200, i16 10880, i16 -5567, i16 -4607, i16 11968, i16 12160, i16 -4287, i16 11520, i16 -4671, i16 -4991, i16 11328, i16 -7167, i16 9408, i16 9600, i16 -6847, i16 9984, i16 -6207, i16 -6527, i16 9792, i16 8704, i16 -7487, i16 -7295, i16 9024, i16 -7935, i16 8640, i16 8320, i16 -8127, i16 -24575, i16 24768, i16 24960, i16 -24255, i16 25344, i16 -23615, i16 -23935, i16 25152, i16 26112, i16 -22847, i16 -22655, i16 26432, i16 -23295, i16 26048, i16 25728, i16 -23487, i16 27648, i16 -21311, i16 -21119, i16 27968, i16 -20735, i16 28608, i16 28288, i16 -20927, i16 -22015, i16 27328, i16 27520, i16 -21695, i16 26880, i16 -22079, i16 -22399, i16 26688, i16 30720, i16 -18239, i16 -18047, i16 31040, i16 -17663, i16 31680, i16 31360, i16 -17855, i16 -16895, i16 32448, i16 32640, i16 -16575, i16 32000, i16 -16959, i16 -17279, i16 31808, i16 -19455, i16 29888, i16 30080, i16 -19135, i16 30464, i16 -18495, i16 -18815, i16 30272, i16 29184, i16 -19775, i16 -19583, i16 29504, i16 -20223, i16 29120, i16 28800, i16 -20415, i16 20480, i16 -28479, i16 -28287, i16 20800, i16 -27903, i16 21440, i16 21120, i16 -28095, i16 -27135, i16 22208, i16 22400, i16 -26815, i16 21760, i16 -27199, i16 -27519, i16 21568, i16 -25599, i16 23744, i16 23936, i16 -25279, i16 24320, i16 -24639, i16 -24959, i16 24128, i16 23040, i16 -25919, i16 -25727, i16 23360, i16 -26367, i16 22976, i16 22656, i16 -26559, i16 -30719, i16 18624, i16 18816, i16 -30399, i16 19200, i16 -29759, i16 -30079, i16 19008, i16 19968, i16 -28991, i16 -28799, i16 20288, i16 -29439, i16 19904, i16 19584, i16 -29631, i16 17408, i16 -31551, i16 -31359, i16 17728, i16 -30975, i16 18368, i16 18048, i16 -31167, i16 -32255, i16 17088, i16 17280, i16 -31935, i16 16640, i16 -32319, i16 -32639, i16 16448] +; CHECK: @.crctable.3 = private constant [256 x i8] c"\00\09\12\1B\1F\16\0D\04\05\0C\17\1E\1A\13\08\01\0A\03\18\11\15\1C\07\0E\0F\06\1D\14\10\19\02\0B\14\1D\06\0F\0B\02\19\10\11\18\03\0A\0E\07\1C\15\1E\17\0C\05\01\08\13\1A\1B\12\09\00\04\0D\16\1F\13\1A\01\08\0C\05\1E\17\16\1F\04\0D\09\00\1B\12\19\10\0B\02\06\0F\14\1D\1C\15\0E\07\03\0A\11\18\07\0E\15\1C\18\11\0A\03\02\0B\10\19\1D\14\0F\06\0D\04\1F\16\12\1B\00\09\08\01\1A\13\17\1E\05\0C\1D\14\0F\06\02\0B\10\19\18\11\0A\03\07\0E\15\1C\17\1E\05\0C\08\01\1A\13\12\1B\00\09\0D\04\1F\16\09\00\1B\12\16\1F\04\0D\0C\05\1E\17\13\1A\01\08\03\0A\11\18\1C\15\0E\07\06\0F\14\1D\19\10\0B\02\0E\07\1C\15\11\18\03\0A\0B\02\19\10\14\1D\06\0F\04\0D\16\1F\1B\12\09\00\01\08\13\1A\1E\17\0C\05\1A\13\08\01\05\0C\17\1E\1F\16\0D\04\00\09\12\1B\10\19\02\0B\0F\06\1D\14\15\1C\07\0E\0A\03\18\11" +; CHECK: @.crctable.4 = private constant [256 x i16] [i16 0, i16 4129, i16 8258, i16 12387, i16 16516, i16 20645, i16 24774, i16 28903, i16 -32504, i16 -28375, i16 -24246, i16 -20117, i16 -15988, i16 -11859, i16 -7730, i16 -3601, i16 4657, i16 528, i16 12915, i16 8786, i16 21173, i16 17044, i16 29431, i16 25302, i16 -27847, i16 -31976, i16 -19589, i16 -23718, i16 -11331, i16 -15460, i16 -3073, i16 -7202, i16 9314, i16 13379, i16 1056, i16 5121, i16 25830, i16 29895, i16 17572, i16 21637, i16 -23190, i16 -19125, i16 -31448, i16 -27383, i16 -6674, i16 -2609, i16 -14932, i16 -10867, i16 13907, i16 9842, i16 5649, i16 1584, i16 30423, i16 26358, i16 22165, i16 18100, i16 -18597, i16 -22662, i16 -26855, i16 -30920, i16 -2081, i16 -6146, i16 -10339, i16 -14404, i16 18628, i16 22757, i16 26758, i16 30887, i16 2112, i16 6241, i16 10242, i16 14371, i16 -13876, i16 -9747, i16 -5746, i16 -1617, i16 -30392, i16 -26263, i16 -22262, i16 -18133, i16 23285, i16 19156, i16 31415, i16 27286, i16 6769, i16 2640, i16 14899, i16 10770, i16 -9219, i16 -13348, i16 -1089, i16 -5218, i16 -25735, i16 -29864, i16 -17605, i16 -21734, i16 27814, i16 31879, i16 19684, i16 23749, i16 11298, i16 15363, i16 3168, i16 7233, i16 -4690, i16 -625, i16 -12820, i16 -8755, i16 -21206, i16 -17141, i16 -29336, i16 -25271, i16 32407, i16 28342, i16 24277, i16 20212, i16 15891, i16 11826, i16 7761, i16 3696, i16 -97, i16 -4162, i16 -8227, i16 -12292, i16 -16613, i16 -20678, i16 -24743, i16 -28808, i16 -28280, i16 -32343, i16 -20022, i16 -24085, i16 -12020, i16 -16083, i16 -3762, i16 -7825, i16 4224, i16 161, i16 12482, i16 8419, i16 20484, i16 16421, i16 28742, i16 24679, i16 -31815, i16 -27752, i16 -23557, i16 -19494, i16 -15555, i16 -11492, i16 -7297, i16 -3234, i16 689, i16 4752, i16 8947, i16 13010, i16 16949, i16 21012, i16 25207, i16 29270, i16 -18966, i16 -23093, i16 -27224, i16 -31351, i16 -2706, i16 -6833, i16 -10964, i16 -15091, i16 13538, i16 9411, i16 5280, i16 1153, i16 29798, i16 25671, i16 21540, i16 17413, i16 -22565, i16 -18438, i16 -30823, i16 -26696, i16 -6305, i16 -2178, i16 -14563, i16 -10436, i16 9939, i16 14066, i16 1681, i16 5808, i16 26199, i16 30326, i16 17941, i16 22068, i16 -9908, i16 -13971, i16 -1778, i16 -5841, i16 -26168, i16 -30231, i16 -18038, i16 -22101, i16 22596, i16 18533, i16 30726, i16 26663, i16 6336, i16 2273, i16 14466, i16 10403, i16 -13443, i16 -9380, i16 -5313, i16 -1250, i16 -29703, i16 -25640, i16 -21573, i16 -17510, i16 19061, i16 23124, i16 27191, i16 31254, i16 2801, i16 6864, i16 10931, i16 14994, i16 -722, i16 -4849, i16 -8852, i16 -12979, i16 -16982, i16 -21109, i16 -25112, i16 -29239, i16 31782, i16 27655, i16 23652, i16 19525, i16 15522, i16 11395, i16 7392, i16 3265, i16 -4321, i16 -194, i16 -12451, i16 -8324, i16 -20581, i16 -16454, i16 -28711, i16 -24584, i16 28183, i16 32310, i16 20053, i16 24180, i16 11923, i16 16050, i16 3793, i16 7920] +; CHECK: @.crctable.5 = private constant [256 x i16] [i16 0, i16 4129, i16 8258, i16 12387, i16 16516, i16 20645, i16 24774, i16 28903, i16 -32504, i16 -28375, i16 -24246, i16 -20117, i16 -15988, i16 -11859, i16 -7730, i16 -3601, i16 4657, i16 528, i16 12915, i16 8786, i16 21173, i16 17044, i16 29431, i16 25302, i16 -27847, i16 -31976, i16 -19589, i16 -23718, i16 -11331, i16 -15460, i16 -3073, i16 -7202, i16 9314, i16 13379, i16 1056, i16 5121, i16 25830, i16 29895, i16 17572, i16 21637, i16 -23190, i16 -19125, i16 -31448, i16 -27383, i16 -6674, i16 -2609, i16 -14932, i16 -10867, i16 13907, i16 9842, i16 5649, i16 1584, i16 30423, i16 26358, i16 22165, i16 18100, i16 -18597, i16 -22662, i16 -26855, i16 -30920, i16 -2081, i16 -6146, i16 -10339, i16 -14404, i16 18628, i16 22757, i16 26758, i16 30887, i16 2112, i16 6241, i16 10242, i16 14371, i16 -13876, i16 -9747, i16 -5746, i16 -1617, i16 -30392, i16 -26263, i16 -22262, i16 -18133, i16 23285, i16 19156, i16 31415, i16 27286, i16 6769, i16 2640, i16 14899, i16 10770, i16 -9219, i16 -13348, i16 -1089, i16 -5218, i16 -25735, i16 -29864, i16 -17605, i16 -21734, i16 27814, i16 31879, i16 19684, i16 23749, i16 11298, i16 15363, i16 3168, i16 7233, i16 -4690, i16 -625, i16 -12820, i16 -8755, i16 -21206, i16 -17141, i16 -29336, i16 -25271, i16 32407, i16 28342, i16 24277, i16 20212, i16 15891, i16 11826, i16 7761, i16 3696, i16 -97, i16 -4162, i16 -8227, i16 -12292, i16 -16613, i16 -20678, i16 -24743, i16 -28808, i16 -28280, i16 -32343, i16 -20022, i16 -24085, i16 -12020, i16 -16083, i16 -3762, i16 -7825, i16 4224, i16 161, i16 12482, i16 8419, i16 20484, i16 16421, i16 28742, i16 24679, i16 -31815, i16 -27752, i16 -23557, i16 -19494, i16 -15555, i16 -11492, i16 -7297, i16 -3234, i16 689, i16 4752, i16 8947, i16 13010, i16 16949, i16 21012, i16 25207, i16 29270, i16 -18966, i16 -23093, i16 -27224, i16 -31351, i16 -2706, i16 -6833, i16 -10964, i16 -15091, i16 13538, i16 9411, i16 5280, i16 1153, i16 29798, i16 25671, i16 21540, i16 17413, i16 -22565, i16 -18438, i16 -30823, i16 -26696, i16 -6305, i16 -2178, i16 -14563, i16 -10436, i16 9939, i16 14066, i16 1681, i16 5808, i16 26199, i16 30326, i16 17941, i16 22068, i16 -9908, i16 -13971, i16 -1778, i16 -5841, i16 -26168, i16 -30231, i16 -18038, i16 -22101, i16 22596, i16 18533, i16 30726, i16 26663, i16 6336, i16 2273, i16 14466, i16 10403, i16 -13443, i16 -9380, i16 -5313, i16 -1250, i16 -29703, i16 -25640, i16 -21573, i16 -17510, i16 19061, i16 23124, i16 27191, i16 31254, i16 2801, i16 6864, i16 10931, i16 14994, i16 -722, i16 -4849, i16 -8852, i16 -12979, i16 -16982, i16 -21109, i16 -25112, i16 -29239, i16 31782, i16 27655, i16 23652, i16 19525, i16 15522, i16 11395, i16 7392, i16 3265, i16 -4321, i16 -194, i16 -12451, i16 -8324, i16 -20581, i16 -16454, i16 -28711, i16 -24584, i16 28183, i16 32310, i16 20053, i16 24180, i16 11923, i16 16050, i16 3793, i16 7920] +; CHECK: @.crctable.6 = private constant [256 x i16] [i16 0, i16 4129, i16 8258, i16 12387, i16 16516, i16 20645, i16 24774, i16 28903, i16 -32504, i16 -28375, i16 -24246, i16 -20117, i16 -15988, i16 -11859, i16 -7730, i16 -3601, i16 4657, i16 528, i16 12915, i16 8786, i16 21173, i16 17044, i16 29431, i16 25302, i16 -27847, i16 -31976, i16 -19589, i16 -23718, i16 -11331, i16 -15460, i16 -3073, i16 -7202, i16 9314, i16 13379, i16 1056, i16 5121, i16 25830, i16 29895, i16 17572, i16 21637, i16 -23190, i16 -19125, i16 -31448, i16 -27383, i16 -6674, i16 -2609, i16 -14932, i16 -10867, i16 13907, i16 9842, i16 5649, i16 1584, i16 30423, i16 26358, i16 22165, i16 18100, i16 -18597, i16 -22662, i16 -26855, i16 -30920, i16 -2081, i16 -6146, i16 -10339, i16 -14404, i16 18628, i16 22757, i16 26758, i16 30887, i16 2112, i16 6241, i16 10242, i16 14371, i16 -13876, i16 -9747, i16 -5746, i16 -1617, i16 -30392, i16 -26263, i16 -22262, i16 -18133, i16 23285, i16 19156, i16 31415, i16 27286, i16 6769, i16 2640, i16 14899, i16 10770, i16 -9219, i16 -13348, i16 -1089, i16 -5218, i16 -25735, i16 -29864, i16 -17605, i16 -21734, i16 27814, i16 31879, i16 19684, i16 23749, i16 11298, i16 15363, i16 3168, i16 7233, i16 -4690, i16 -625, i16 -12820, i16 -8755, i16 -21206, i16 -17141, i16 -29336, i16 -25271, i16 32407, i16 28342, i16 24277, i16 20212, i16 15891, i16 11826, i16 7761, i16 3696, i16 -97, i16 -4162, i16 -8227, i16 -12292, i16 -16613, i16 -20678, i16 -24743, i16 -28808, i16 -28280, i16 -32343, i16 -20022, i16 -24085, i16 -12020, i16 -16083, i16 -3762, i16 -7825, i16 4224, i16 161, i16 12482, i16 8419, i16 20484, i16 16421, i16 28742, i16 24679, i16 -31815, i16 -27752, i16 -23557, i16 -19494, i16 -15555, i16 -11492, i16 -7297, i16 -3234, i16 689, i16 4752, i16 8947, i16 13010, i16 16949, i16 21012, i16 25207, i16 29270, i16 -18966, i16 -23093, i16 -27224, i16 -31351, i16 -2706, i16 -6833, i16 -10964, i16 -15091, i16 13538, i16 9411, i16 5280, i16 1153, i16 29798, i16 25671, i16 21540, i16 17413, i16 -22565, i16 -18438, i16 -30823, i16 -26696, i16 -6305, i16 -2178, i16 -14563, i16 -10436, i16 9939, i16 14066, i16 1681, i16 5808, i16 26199, i16 30326, i16 17941, i16 22068, i16 -9908, i16 -13971, i16 -1778, i16 -5841, i16 -26168, i16 -30231, i16 -18038, i16 -22101, i16 22596, i16 18533, i16 30726, i16 26663, i16 6336, i16 2273, i16 14466, i16 10403, i16 -13443, i16 -9380, i16 -5313, i16 -1250, i16 -29703, i16 -25640, i16 -21573, i16 -17510, i16 19061, i16 23124, i16 27191, i16 31254, i16 2801, i16 6864, i16 10931, i16 14994, i16 -722, i16 -4849, i16 -8852, i16 -12979, i16 -16982, i16 -21109, i16 -25112, i16 -29239, i16 31782, i16 27655, i16 23652, i16 19525, i16 15522, i16 11395, i16 7392, i16 3265, i16 -4321, i16 -194, i16 -12451, i16 -8324, i16 -20581, i16 -16454, i16 -28711, i16 -24584, i16 28183, i16 32310, i16 20053, i16 24180, i16 11923, i16 16050, i16 3793, i16 7920] +; CHECK: @.crctable.7 = private constant [256 x i8] c"\00\1D:'tiNS\E8\F5\D2\CF\9C\81\A6\BB\CD\D0\F7\EA\B9\A4\83\9E%8\1F\02QLkv\87\9A\BD\A0\F3\EE\C9\D4orUH\1B\06!#\04\19\A2\BF\98\85\D6\CB\EC\F1\13\0E)4gz]@\FB\E6\C1\DC\8F\92\B5\A8\DE\C3\E4\F9\AA\B7\90\8D6+\0C\11B_xe\94\89\AE\B3\E0\FD\DA\C7|aF[\08\152/YDc~-0\17\0A\B1\AC\8B\96\C5\D8\FF\E2& +; CHECK: @.crctable.8 = private constant [256 x i32] [i32 0, i32 4489, i32 8978, i32 12955, i32 17956, i32 22445, i32 25910, i32 29887, i32 35912, i32 40385, i32 44890, i32 48851, i32 51820, i32 56293, i32 59774, i32 63735, i32 4225, i32 264, i32 13203, i32 8730, i32 22181, i32 18220, i32 30135, i32 25662, i32 40137, i32 36160, i32 49115, i32 44626, i32 56045, i32 52068, i32 63999, i32 59510, i32 8450, i32 12427, i32 528, i32 5017, i32 26406, i32 30383, i32 17460, i32 21949, i32 44362, i32 48323, i32 36440, i32 40913, i32 60270, i32 64231, i32 51324, i32 55797, i32 12675, i32 8202, i32 4753, i32 792, i32 30631, i32 26158, i32 21685, i32 17724, i32 48587, i32 44098, i32 40665, i32 36688, i32 64495, i32 60006, i32 55549, i32 51572, i32 16900, i32 21389, i32 24854, i32 28831, i32 1056, i32 5545, i32 10034, i32 14011, i32 52812, i32 57285, i32 60766, i32 64727, i32 34920, i32 39393, i32 43898, i32 47859, i32 21125, i32 17164, i32 29079, i32 24606, i32 5281, i32 1320, i32 14259, i32 9786, i32 57037, i32 53060, i32 64991, i32 60502, i32 39145, i32 35168, i32 48123, i32 43634, i32 25350, i32 29327, i32 16404, i32 20893, i32 9506, i32 13483, i32 1584, i32 6073, i32 61262, i32 65223, i32 52316, i32 56789, i32 43370, i32 47331, i32 35448, i32 39921, i32 29575, i32 25102, i32 20629, i32 16668, i32 13731, i32 9258, i32 5809, i32 1848, i32 65487, i32 60998, i32 56541, i32 52564, i32 47595, i32 43106, i32 39673, i32 35696, i32 33800, i32 38273, i32 42778, i32 46739, i32 49708, i32 54181, i32 57662, i32 61623, i32 2112, i32 6601, i32 11090, i32 15067, i32 20068, i32 24557, i32 28022, i32 31999, i32 38025, i32 34048, i32 47003, i32 42514, i32 53933, i32 49956, i32 61887, i32 57398, i32 6337, i32 2376, i32 15315, i32 10842, i32 24293, i32 20332, i32 32247, i32 27774, i32 42250, i32 46211, i32 34328, i32 38801, i32 58158, i32 62119, i32 49212, i32 53685, i32 10562, i32 14539, i32 2640, i32 7129, i32 28518, i32 32495, i32 19572, i32 24061, i32 46475, i32 41986, i32 38553, i32 34576, i32 62383, i32 57894, i32 53437, i32 49460, i32 14787, i32 10314, i32 6865, i32 2904, i32 32743, i32 28270, i32 23797, i32 19836, i32 50700, i32 55173, i32 58654, i32 62615, i32 32808, i32 37281, i32 41786, i32 45747, i32 19012, i32 23501, i32 26966, i32 30943, i32 3168, i32 7657, i32 12146, i32 16123, i32 54925, i32 50948, i32 62879, i32 58390, i32 37033, i32 33056, i32 46011, i32 41522, i32 23237, i32 19276, i32 31191, i32 26718, i32 7393, i32 3432, i32 16371, i32 11898, i32 59150, i32 63111, i32 50204, i32 54677, i32 41258, i32 45219, i32 33336, i32 37809, i32 27462, i32 31439, i32 18516, i32 23005, i32 11618, i32 15595, i32 3696, i32 8185, i32 63375, i32 58886, i32 54429, i32 50452, i32 45483, i32 40994, i32 37561, i32 33584, i32 31687, i32 27214, i32 22741, i32 18780, i32 15843, i32 11370, i32 7921, i32 3960] +;. +define i16 @crc16.le.tc8(i8 %msg, i16 %checksum) { +; CHECK-LABEL: define i16 @crc16.le.tc8( +; CHECK-SAME: i8 [[MSG:%.*]], i16 [[CHECKSUM:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_BITS:%.*]] = shl i8 [[IV]], 3 +; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i8 [[MSG]], [[IV_BITS]] +; CHECK-NEXT: [[CRC_INDEXER_CAST:%.*]] = trunc i16 [[CRC2]] to i8 +; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i8 [[DATA_INDEXER]], [[CRC_INDEXER_CAST]] +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[CRC_DATA_INDEXER]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 +; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i16 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i16 [[CRC_LE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i8 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i16 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i16 %crc to i8 + %xor.data.crc = xor i8 %data, %crc.trunc + %and.data.crc = and i8 %xor.data.crc, 1 + %data.next = lshr i8 %data, 1 + %check.sb = icmp eq i8 %and.data.crc, 0 + %crc.lshr = lshr i16 %crc, 1 + %xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.le.tc8.udiv(i8 %msg, i16 %checksum) { +; CHECK-LABEL: define i16 @crc16.le.tc8.udiv( +; CHECK-SAME: i8 [[MSG:%.*]], i16 [[CHECKSUM:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_BITS:%.*]] = shl i8 [[IV]], 3 +; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i8 [[MSG]], [[IV_BITS]] +; CHECK-NEXT: [[CRC_INDEXER_CAST:%.*]] = trunc i16 [[CRC2]] to i8 +; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i8 [[DATA_INDEXER]], [[CRC_INDEXER_CAST]] +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[CRC_DATA_INDEXER]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.1, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 +; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i16 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i16 [[CRC_LE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i8 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i16 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i8 [ %msg, %entry ], [ %data.next, %loop ] + %crc.trunc = trunc i16 %crc to i8 + %xor.data.crc = xor i8 %data, %crc.trunc + %and.data.crc = and i8 %xor.data.crc, 1 + %data.next = lshr i8 %data, 1 + %check.sb = icmp eq i8 %and.data.crc, 0 + %crc.lshr = lshr i16 %crc, 1 + %xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.le.tc16(i16 %msg, i16 %checksum) { +; CHECK-LABEL: define i16 @crc16.le.tc16( +; CHECK-SAME: i16 [[MSG:%.*]], i16 [[CHECKSUM:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_BITS:%.*]] = shl i8 [[IV]], 3 +; CHECK-NEXT: [[IV_INDEXER:%.*]] = zext i8 [[IV_BITS]] to i16 +; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i16 [[MSG]], [[IV_INDEXER]] +; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i16 [[DATA_INDEXER]], [[CRC2]] +; CHECK-NEXT: [[INDEXER_LO:%.*]] = and i16 [[CRC_DATA_INDEXER]], 255 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_LO]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.2, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 +; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i16 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i16 [[CRC_LE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i8 [[IV]], 1 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i16 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %xor.crc.data = xor i16 %crc, %data + %and.crc.data = and i16 %xor.crc.data, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp eq i16 %and.crc.data, 0 + %crc.lshr = lshr i16 %crc, 1 + %crc.xor = xor i16 %crc.lshr, -24575 + %crc.next = select i1 %check.sb, i16 %crc.lshr, i16 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp ult i8 %iv, 15 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i8 @crc8.le.tc16(i16 %msg, i8 %checksum) { +; CHECK-LABEL: define i8 @crc8.le.tc16( +; CHECK-SAME: i16 [[MSG:%.*]], i8 [[CHECKSUM:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i8 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[TBL_LD:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_BITS:%.*]] = shl i8 [[IV]], 3 +; CHECK-NEXT: [[IV_INDEXER:%.*]] = zext i8 [[IV_BITS]] to i16 +; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i16 [[MSG]], [[IV_INDEXER]] +; CHECK-NEXT: [[CRC_INDEXER_CAST:%.*]] = zext i8 [[CRC2]] to i16 +; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i16 [[DATA_INDEXER]], [[CRC_INDEXER_CAST]] +; CHECK-NEXT: [[INDEXER_LO:%.*]] = and i16 [[CRC_DATA_INDEXER]], 255 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_LO]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i8, ptr @.crctable.3, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD]] = load i8, ptr [[TBL_PTRADD]], align 1 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i8 [[IV]], 1 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i8 [ [[TBL_LD]], %[[LOOP]] ] +; CHECK-NEXT: ret i8 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i8 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i16 [ %msg, %entry ], [ %data.next, %loop ] + %data.trunc = trunc i16 %data to i8 + %xor.crc.data = xor i8 %crc, %data.trunc + %and.crc.data = and i8 %xor.crc.data, 1 + %data.next = lshr i16 %data, 1 + %check.sb = icmp eq i8 %and.crc.data, 0 + %crc.lshr = lshr i8 %crc, 1 + %crc.xor = xor i8 %crc.lshr, 29 + %crc.next = select i1 %check.sb, i8 %crc.lshr, i8 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %exit.cond = icmp ult i8 %iv, 15 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i8 %crc.next +} + +define i16 @crc16.be.tc8.crc.init.li(i16 %checksum, i8 %msg) { +; CHECK-LABEL: define i16 @crc16.be.tc8.crc.init.li( +; CHECK-SAME: i16 [[CHECKSUM:%.*]], i8 [[MSG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MSG_EXT:%.*]] = zext i8 [[MSG]] to i16 +; CHECK-NEXT: [[MSG_SHL:%.*]] = shl nuw i16 [[MSG_EXT]], 8 +; CHECK-NEXT: [[CRC_INIT:%.*]] = xor i16 [[MSG_SHL]], [[CHECKSUM]] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INDEXER_HI:%.*]] = lshr i16 [[CRC2]], 8 +; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = and i16 [[INDEXER_HI]], 255 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_HI_LO_BYTE]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.4, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 +; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = shl i16 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i16 [[CRC_BE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i16 [[CRC_NEXT_LCSSA]] +; +entry: + %msg.ext = zext i8 %msg to i16 + %msg.shl = shl nuw i16 %msg.ext, 8 + %crc.init = xor i16 %msg.shl, %checksum + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.be.tc8.crc.init.arg(i16 %crc.init) { +; CHECK-LABEL: define i16 @crc16.be.tc8.crc.init.arg( +; CHECK-SAME: i16 [[CRC_INIT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INDEXER_HI:%.*]] = lshr i16 [[CRC2]], 8 +; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = and i16 [[INDEXER_HI]], 255 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_HI_LO_BYTE]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.5, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 +; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = shl i16 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i16 [[CRC_BE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i16 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp slt i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.xor, i16 %crc.shl + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i16 @crc16.be.tc8.crc.init.arg.flipped.sb.check(i16 %crc.init) { +; CHECK-LABEL: define i16 @crc16.be.tc8.crc.init.arg.flipped.sb.check( +; CHECK-SAME: i16 [[CRC_INIT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i16 [ [[CRC_INIT]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[INDEXER_HI:%.*]] = lshr i16 [[CRC2]], 8 +; CHECK-NEXT: [[INDEXER_HI_LO_BYTE:%.*]] = and i16 [[INDEXER_HI]], 255 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i16 [[INDEXER_HI_LO_BYTE]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i16, ptr @.crctable.6, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i16, ptr [[TBL_PTRADD]], align 2 +; CHECK-NEXT: [[CRC_BE_SHIFT:%.*]] = shl i16 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i16 [[CRC_BE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i16 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i16 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %crc = phi i16 [ %crc.init, %entry ], [ %crc.next, %loop ] + %crc.shl = shl i16 %crc, 1 + %crc.xor = xor i16 %crc.shl, 4129 + %check.sb = icmp sge i16 %crc, 0 + %crc.next = select i1 %check.sb, i16 %crc.shl, i16 %crc.xor + %iv.next = add nuw nsw i32 %iv, 1 + %exit.cond = icmp ult i32 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i16 %crc.next +} + +define i8 @crc8.be.tc8.ptr.nested.loop(ptr %msg, i32 %loop.limit) { +; CHECK-LABEL: define i8 @crc8.be.tc8.ptr.nested.loop( +; CHECK-SAME: ptr [[MSG:%.*]], i32 [[LOOP_LIMIT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[OUTER_LOOP:.*]] +; CHECK: [[OUTER_LOOP]]: +; CHECK-NEXT: [[CRC_OUTER:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[CRC_NEXT_LCSSA:%.*]], %[[INNER_EXIT:.*]] ] +; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[INNER_EXIT]] ] +; CHECK-NEXT: [[OUTER_EXIT_COND:%.*]] = icmp ult i32 [[OUTER_IV]], [[LOOP_LIMIT]] +; CHECK-NEXT: br i1 [[OUTER_EXIT_COND]], label %[[PH:.*]], label %[[EXIT:.*]] +; CHECK: [[PH]]: +; CHECK-NEXT: [[OUTER_IV_EXT:%.*]] = sext i32 [[OUTER_IV]] to i64 +; CHECK-NEXT: [[MSG_OUTER_IV:%.*]] = getelementptr inbounds i8, ptr [[MSG]], i64 [[OUTER_IV_EXT]] +; CHECK-NEXT: [[MSG_LOAD:%.*]] = load i8, ptr [[MSG_OUTER_IV]], align 1 +; CHECK-NEXT: [[CRC_INIT:%.*]] = xor i8 [[MSG_LOAD]], [[CRC_OUTER]] +; CHECK-NEXT: br label %[[INNER_LOOP:.*]] +; CHECK: [[INNER_LOOP]]: +; CHECK-NEXT: [[INNER_IV:%.*]] = phi i32 [ 0, %[[PH]] ], [ [[INNER_IV_NEXT:%.*]], %[[INNER_LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i8 [ [[CRC_INIT]], %[[PH]] ], [ [[TBL_LD:%.*]], %[[INNER_LOOP]] ] +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i8 [[CRC2]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i8, ptr @.crctable.7, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD]] = load i8, ptr [[TBL_PTRADD]], align 1 +; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i32 [[INNER_IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i32 [[INNER_IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[INNER_LOOP]], label %[[INNER_EXIT]] +; CHECK: [[INNER_EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA]] = phi i8 [ [[TBL_LD]], %[[INNER_LOOP]] ] +; CHECK-NEXT: [[OUTER_IV_NEXT]] = add i32 [[OUTER_IV]], 1 +; CHECK-NEXT: br label %[[OUTER_LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_OUTER_LCSSA:%.*]] = phi i8 [ [[CRC_OUTER]], %[[OUTER_LOOP]] ] +; CHECK-NEXT: ret i8 [[CRC_OUTER_LCSSA]] +; +entry: + br label %outer.loop + +outer.loop: ; preds = %inner.exit, %entry + %crc.outer = phi i8 [ 0, %entry ], [ %crc.next, %inner.exit ] + %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %inner.exit ] + %outer.exit.cond = icmp ult i32 %outer.iv, %loop.limit + br i1 %outer.exit.cond, label %ph, label %exit + +ph: ; preds = %outer.loop + %outer.iv.ext = sext i32 %outer.iv to i64 + %msg.outer.iv = getelementptr inbounds i8, ptr %msg, i64 %outer.iv.ext + %msg.load = load i8, ptr %msg.outer.iv, align 1 + %crc.init = xor i8 %msg.load, %crc.outer + br label %inner.loop + +inner.loop: ; preds = %inner.loop, %ph + %inner.iv = phi i32 [ 0, %ph ], [ %inner.iv.next, %inner.loop ] + %crc = phi i8 [ %crc.init, %ph ], [ %crc.next, %inner.loop ] + %crc.shl = shl i8 %crc, 1 + %crc.xor = xor i8 %crc.shl, 29 + %check.sb = icmp slt i8 %crc, 0 + %crc.next = select i1 %check.sb, i8 %crc.xor, i8 %crc.shl + %inner.iv.next = add nuw nsw i32 %inner.iv, 1 + %exit.cond = icmp ult i32 %inner.iv, 7 + br i1 %exit.cond, label %inner.loop, label %inner.exit + +inner.exit: ; preds = %inner.loop + %outer.iv.next = add i32 %outer.iv, 1 + br label %outer.loop + +exit: ; preds = %outer.loop + ret i8 %crc.outer +} + +define i32 @crc32.le.tc8.data32(i32 %checksum, i32 %msg) { +; CHECK-LABEL: define i32 @crc32.le.tc8.data32( +; CHECK-SAME: i32 [[CHECKSUM:%.*]], i32 [[MSG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[CRC2:%.*]] = phi i32 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[CRC_NEXT3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_BITS:%.*]] = shl i8 [[IV]], 3 +; CHECK-NEXT: [[IV_INDEXER:%.*]] = zext i8 [[IV_BITS]] to i32 +; CHECK-NEXT: [[DATA_INDEXER:%.*]] = lshr i32 [[MSG]], [[IV_INDEXER]] +; CHECK-NEXT: [[CRC_DATA_INDEXER:%.*]] = xor i32 [[DATA_INDEXER]], [[CRC2]] +; CHECK-NEXT: [[INDEXER_LO:%.*]] = and i32 [[CRC_DATA_INDEXER]], 255 +; CHECK-NEXT: [[INDEXER_EXT:%.*]] = zext i32 [[INDEXER_LO]] to i64 +; CHECK-NEXT: [[TBL_PTRADD:%.*]] = getelementptr inbounds i32, ptr @.crctable.8, i64 [[INDEXER_EXT]] +; CHECK-NEXT: [[TBL_LD:%.*]] = load i32, ptr [[TBL_PTRADD]], align 4 +; CHECK-NEXT: [[CRC_LE_SHIFT:%.*]] = lshr i32 [[CRC2]], 8 +; CHECK-NEXT: [[CRC_NEXT3]] = xor i32 [[CRC_LE_SHIFT]], [[TBL_LD]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[EXIT_COND1:%.*]] = icmp ne i8 [[IV]], 0 +; CHECK-NEXT: br i1 [[EXIT_COND1]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[CRC_NEXT3]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %xor.crc.data = xor i32 %crc, %data + %sb.crc.data = and i32 %xor.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + +define i32 @crc.disabled.optsize(i32 %checksum, i32 %msg) optsize { +; CHECK-LABEL: define i32 @crc.disabled.optsize( +; CHECK-SAME: i32 [[CHECKSUM:%.*]], i32 [[MSG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[CRC:%.*]] = phi i32 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[CRC_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[DATA:%.*]] = phi i32 [ [[MSG]], %[[ENTRY]] ], [ [[DATA_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[XOR_CRC_DATA:%.*]] = xor i32 [[CRC]], [[DATA]] +; CHECK-NEXT: [[SB_CRC_DATA:%.*]] = and i32 [[XOR_CRC_DATA]], 1 +; CHECK-NEXT: [[CHECK_SB:%.*]] = icmp eq i32 [[SB_CRC_DATA]], 0 +; CHECK-NEXT: [[CRC_LSHR:%.*]] = lshr i32 [[CRC]], 1 +; CHECK-NEXT: [[CRC_XOR:%.*]] = xor i32 [[CRC_LSHR]], 33800 +; CHECK-NEXT: [[CRC_NEXT]] = select i1 [[CHECK_SB]], i32 [[CRC_LSHR]], i32 [[CRC_XOR]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[DATA_NEXT]] = lshr i32 [[DATA]], 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i8 [[IV]], 7 +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[CRC_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[CRC_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %xor.crc.data = xor i32 %crc, %data + %sb.crc.data = and i32 %xor.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %exit.cond = icmp ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + ret i32 %crc.next +} + +define i32 @not.crc.unrelated.computation(i32 %checksum, i32 %msg) { +; CHECK-LABEL: define i32 @not.crc.unrelated.computation( +; CHECK-SAME: i32 [[CHECKSUM:%.*]], i32 [[MSG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[CRC:%.*]] = phi i32 [ [[CHECKSUM]], %[[ENTRY]] ], [ [[CRC_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[DATA:%.*]] = phi i32 [ [[MSG]], %[[ENTRY]] ], [ [[DATA_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[UNRELATED:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[UNRELATED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[XOR_CRC_DATA:%.*]] = xor i32 [[CRC]], [[DATA]] +; CHECK-NEXT: [[SB_CRC_DATA:%.*]] = and i32 [[XOR_CRC_DATA]], 1 +; CHECK-NEXT: [[CHECK_SB:%.*]] = icmp eq i32 [[SB_CRC_DATA]], 0 +; CHECK-NEXT: [[CRC_LSHR:%.*]] = lshr i32 [[CRC]], 1 +; CHECK-NEXT: [[CRC_XOR:%.*]] = xor i32 [[CRC_LSHR]], 33800 +; CHECK-NEXT: [[CRC_NEXT]] = select i1 [[CHECK_SB]], i32 [[CRC_LSHR]], i32 [[CRC_XOR]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1 +; CHECK-NEXT: [[DATA_NEXT]] = lshr i32 [[DATA]], 1 +; CHECK-NEXT: [[UNRELATED_NEXT]] = shl i32 [[UNRELATED]], 2 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp ult i8 [[IV]], 7 +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CRC_NEXT_LCSSA:%.*]] = phi i32 [ [[CRC_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[UNRELATED_NEXT_LCSSA:%.*]] = phi i32 [ [[UNRELATED_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: [[RET:%.*]] = and i32 [[UNRELATED_NEXT_LCSSA]], [[CRC_NEXT_LCSSA]] +; CHECK-NEXT: ret i32 [[RET]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %crc = phi i32 [ %checksum, %entry ], [ %crc.next, %loop ] + %data = phi i32 [ %msg, %entry ], [ %data.next, %loop ] + %iv = phi i8 [ 0, %entry ], [ %iv.next, %loop ] + %unrelated = phi i32 [ 1, %entry ], [ %unrelated.next, %loop ] + %xor.crc.data = xor i32 %crc, %data + %sb.crc.data = and i32 %xor.crc.data, 1 + %check.sb = icmp eq i32 %sb.crc.data, 0 + %crc.lshr = lshr i32 %crc, 1 + %crc.xor = xor i32 %crc.lshr, 33800 + %crc.next = select i1 %check.sb, i32 %crc.lshr, i32 %crc.xor + %iv.next = add nuw nsw i8 %iv, 1 + %data.next = lshr i32 %data, 1 + %unrelated.next = shl i32 %unrelated, 2 + %exit.cond = icmp ult i8 %iv, 7 + br i1 %exit.cond, label %loop, label %exit + +exit: ; preds = %loop + %ret = and i32 %unrelated.next, %crc.next + ret i32 %ret +} +;. +; CHECK: attributes #[[ATTR0]] = { optsize } +;.