diff --git a/12a.diffs b/12a.diffs new file mode 100644 index 0000000000000000000000000000000000000000..32081dd77ff32dca684d2a93150dbf9d5b8789f1 --- /dev/null +++ b/12a.diffs @@ -0,0 +1,12 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index 1b5dab3ca..98a4553d4 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -171,6 +171,7 @@ MIRStructType *AArch64CGFunc::GetLmbcStructArgType(BaseNode &stmt, int32 argNo) + } + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); + } else if (stmt.GetOpCode() == OP_icallproto) { ++ argNo--; /* 1st opnd of icallproto is funcname, skip it relative to param list */ + IcallNode &icallproto = static_cast(stmt); + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallproto.GetRetTyIdx()); + MIRFuncType *fType = static_cast(type); diff --git a/12b.diffs b/12b.diffs new file mode 100644 index 0000000000000000000000000000000000000000..9d3af27e773e46b66d1a8e4cc8cb6c4e7eacd550 --- /dev/null +++ b/12b.diffs @@ -0,0 +1,278 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index 1b5dab3ca..72e396ca1 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -171,6 +171,7 @@ MIRStructType *AArch64CGFunc::GetLmbcStructArgType(BaseNode &stmt, int32 argNo) + } + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); + } else if (stmt.GetOpCode() == OP_icallproto) { ++ argNo--; /* 1st opnd of icallproto is funcname, skip it relative to param list */ + IcallNode &icallproto = static_cast(stmt); + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallproto.GetRetTyIdx()); + MIRFuncType *fType = static_cast(type); +@@ -3180,47 +3181,12 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode + RegType regty = GetRegTyFromPrimTy(primType); + RegOperand *result = nullptr; + if (offset >= 0) { +-#if OLD_LMBC // TBD +- uint32 bitlen = bytelen * kBitsPerByte; +- LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); +- if (info->GetPrimType() == PTY_agg) { +- if (info->IsOnStack()) { +- result = GenLmbcParamLoad(info->GetOnStackOffset(), GetPrimTypeSize(PTY_a64), kRegTyInt, PTY_a64); +- regno_t baseRegno = result->GetRegisterNumber(); +- result = GenLmbcParamLoad(offset - info->GetOffset(), bytelen, regty, primType, (AArch64reg)baseRegno); +- } else if (primType == PTY_agg) { +- CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); +- result = LmbcStructReturnLoad(offset); +- } else { +- result = GenLmbcParamLoad(offset, bytelen, regty, primType); +- } +- } else { +- CHECK_FATAL(primType == info->GetPrimType(), "Incorrect primtype"); +- CHECK_FATAL(offset == info->GetOffset(), "Incorrect offset"); +- if (info->GetRegNO() == 0 || info->HasRegassign() == false) { +- result = GenLmbcParamLoad(offset, bytelen, regty, primType); +- } else { +- result = &GetOrCreatePhysicalRegisterOperand(static_cast(info->GetRegNO()), bitlen, regty); +- } +- } +-#else + CHECK_FATAL(0, "Invalid ireadfpoff offset"); +-#endif + } else { + if (primType == PTY_agg) { +-#if OLD_LMBC // TBD +- if (parent.GetOpCode() == OP_call || parent.GetOpCode() == OP_icallproto) { +- /* ireadfpoff is now for loading locals under calls. Large agg arg is handled via +- SelectParmListPreprocess, Hence only small agg is handled here */ +- SelectParmListIreadSmallAggregate(ireadoff, ty, srcOpnds, 0, parmLocator); +- } else { +-#endif +- /* agg return */ +- CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); +- result = LmbcStructReturnLoad(offset); +-#if OLD_LMBC // TBD +- } +-#endif ++ /* agg return */ ++ CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); ++ result = LmbcStructReturnLoad(offset); + } else { + result = GenLmbcParamLoad(offset, bytelen, regty, primType); + } +@@ -8016,10 +7982,6 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNo + MIRStructType *ty = GetLmbcStructArgType(parent, argNo); + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); + if (symSize > 16 /*kParmMemcpySize*/) { +-#if OLD_LMBC // TBD +- MemOperand *addrOpnd0 = GenLmbcFpMemOperand(ireadoff.GetOffset(), kSizeOfPtr, RFP); +- RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); +-#endif + RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + RegOperand *addrOpnd = &CreateRegisterOperandOfType(PTY_a64); + ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); +@@ -10243,11 +10205,6 @@ void AArch64CGFunc::SelectCVaStart(const IntrinsiccallNode &intrnNode) { + inReg++; + } + } +-#if OLD_LMBC // TBD +- if (GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +- stkSize += (inReg * k8ByteSize); +- } +-#endif + if (CGOptions::IsArm64ilp32()) { + stkSize = static_cast(RoundUp(stkSize, k8ByteSize)); + } else { +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index bd5ad4975..28b33f9d5 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -196,20 +196,6 @@ void AArch64MemLayout::LayoutVarargParams() { + } + + void AArch64MemLayout::LayoutFormalParams() { +-#if OLD_LMBC // TBD +- bool isLmbc = (be.GetMIRModule().GetFlavor() == kFlavorLmbc); +- if (isLmbc && mirFunction->GetFormalCount() == 0) { +- /* +- * lmbc : upformalsize - size of formals passed from caller's frame into current function +- * framesize - total frame size of current function used by Maple IR +- * outparmsize - portion of frame size of current function used by call parameters +- */ +- segArgsStkPassed.SetSize(mirFunction->GetOutParmSize()); +- segArgsRegPassed.SetSize(mirFunction->GetOutParmSize()); +- return; +- } +-#endif +- + AArch64CallConvImpl parmLocator(be); + CCLocInfo ploc; + for (size_t i = 0; i < mirFunction->GetFormalCount(); ++i) { +@@ -263,10 +249,6 @@ void AArch64MemLayout::LayoutFormalParams() { + segArgsRegPassed.SetSize(static_cast(RoundUp(segArgsRegPassed.GetSize(), align))); + symLoc->SetOffset(segArgsRegPassed.GetSize()); + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + size); +-#if OLD_LMBC // TBD +- } else if (isLmbc) { +- segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + k8ByteSize); +-#endif + } + } else { /* stack */ + uint32 size; +@@ -381,15 +363,7 @@ void AArch64MemLayout::LayoutReturnRef(std::vector &returnDelays, + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } +-#if OLD_LMBC // TBD +- if (be.GetMIRModule().GetFlavor() == kFlavorLmbc) { +- segArgsToStkPass.SetSize(mirFunction->GetOutParmSize() + kDivide2 * k8ByteSize); +- } else { +- segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); +- } +-#else + segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); +-#endif + maxParmStackSize = static_cast(segArgsToStkPass.GetSize()); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + AssignSpillLocationsToPseudoRegisters(); +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +index 02f27d5e0..1bb2260e5 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +@@ -1119,11 +1119,6 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); +-#if OLD_LMBC //TBD +- if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +- argsToStkPassSize -= (kDivide2 * k8ByteSize); +- } +-#endif + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); +@@ -1262,15 +1257,10 @@ void AArch64GenProEpilog::GeneratePushRegs() { + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +-#if OLD_LMBC // TBD +- offset = static_cast(memLayout->RealStackFrameSize() - +- aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); +-#else + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - + memLayout->GetSizeOfLocals() - + memLayout->SizeOfArgsToStackPass()); +-#endif + } else { + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - +@@ -1337,23 +1327,10 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + size = kSizeOfPtr; + } + uint32 dataSizeBits = size * kBitsPerByte; +- uint32 offset; +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ +- if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { +- offset += size; /* End of area should be aligned. Hole between VR and GR area */ +- } +-#if OLD_LMBC // TBD +- } else { +- offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */ +- if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { +- offset -= size; +- } ++ uint32 offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ ++ if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { ++ offset += size; /* End of area should be aligned. Hole between VR and GR area */ + } +- uint32 grSize = -offset; +-#endif + uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { +@@ -1363,16 +1340,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + tmpOffset += 8U - (dataSizeBits >> 3); + } + } +- Operand *stackLoc; +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); +-#if OLD_LMBC // TBD +- } else { +- stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); +- } +-#endif ++ Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = +@@ -1381,15 +1349,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + offset += size; + } + if (!CGOptions::UseGeneralRegOnly()) { +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); +-#if OLD_LMBC // TBD +- } else { +- offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1; +- } +-#endif ++ offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { +@@ -1399,16 +1359,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + tmpOffset += 16U - (dataSizeBits >> 3); + } + } +- Operand *stackLoc; +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); +-#if OLD_LMBC // TBD +- } else { +- stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); +- } +-#endif ++ Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + Insn &inst = +@@ -1713,11 +1664,7 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ +-#if OLD_LMBC // TBD +- bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc); +-#else + bool isLmbc = false; +-#endif + if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0 || isLmbc) { + int lmbcOffset = 0; + if (isLmbc == false) { +@@ -1805,15 +1752,8 @@ void AArch64GenProEpilog::GeneratePopRegs() { + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +-#if OLD_LMBC // TBD + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); +-#else +- offset = static_cast(memLayout->RealStackFrameSize() - +- (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - +- memLayout->GetSizeOfLocals() - +- memLayout->SizeOfArgsToStackPass()); +-#endif + } else { + offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - diff --git a/9.diff b/9.diff new file mode 100644 index 0000000000000000000000000000000000000000..a4ed47e320d4f49512dbe565c94356a24f0871e4 --- /dev/null +++ b/9.diff @@ -0,0 +1,112 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +index 76232802e..178762ac0 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +@@ -865,7 +865,7 @@ class AArch64CGFunc : public CGFunc { + void SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRType &structType, + ListOperand &srcOpnds, + int32 offset, AArch64CallConvImpl &parmLocator, FieldID fieldID); +- void SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, ListOperand &srcOpnds, ++ void SelectParmListIreadSmallAggregate(BaseNode &parent, BaseNode &iread, MIRType &structType, ListOperand &srcOpnds, + int32 offset, AArch64CallConvImpl &parmLocator); + void SelectParmListDreadLargeAggregate(const MIRSymbol &sym, MIRType &structType, + ListOperand &srcOpnds, +@@ -874,11 +874,11 @@ class AArch64CGFunc : public CGFunc { + AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 fromOffset); + void CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, AArch64CallConvImpl &parmLocator, + ListOperand &srcOpnds); +- void SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator, +- int32 &structCopyOffset); ++ void SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator, ++ int32 &structCopyOffset, int argNo); + size_t SelectParmListGetStructReturnSize(StmtNode &naryNode); + bool MarkParmListCall(BaseNode &expr); +- void SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset); ++ void SelectParmListPreprocessLargeStruct(const BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int argNo); + void SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs); + void SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bool isCallNative = false); + Operand *SelectClearStackCallParam(const AddrofNode &expr, int64 &offsetValue); +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index b8c045b3f..bf08e4968 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -7847,8 +7847,9 @@ void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 + } + } + +-void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, +- AArch64CallConvImpl &parmLocator, int32 &structCopyOffset) { ++void AArch64CGFunc::SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, ++ AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, ++ int argNo) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { +@@ -7896,6 +7897,17 @@ void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &s + } else { + SelectParmListIreadLargeAggregate(iread, *ty, srcOpnds, parmLocator, structCopyOffset, rhsOffset); + } ++ } else if (argExpr.GetOpCode() == OP_ireadfpoff) { ++ //IreadFPoffNode &iread = static_cast(argExpr); ++ if (parent.GetOpCode() == OP_call) { ++ CallNode &callNode = static_cast(parent); ++ MIRFunction *callFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); ++ MIRType *ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); ++ CHECK_FATAL(ty->IsStructType(), "lmbc agg arg error"); ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); ++ CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } + } else { + CHECK_FATAL(0, "NYI"); + } +@@ -7938,7 +7950,7 @@ size_t AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { + return 0; + } + +-void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset) { ++void AArch64CGFunc::SelectParmListPreprocessLargeStruct(const BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int argNo) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { +@@ -7985,6 +7997,22 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 + uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); + structCopyOffset += static_cast(numMemOp * kSizeOfPtr); + } ++ } else if (argExpr.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &iread = static_cast(argExpr); ++ const CallNode &callNode = static_cast(const_cast(parent)); ++ MIRFunction *callFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); ++ MIRType *ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); ++ CHECK_FATAL(ty->IsStructType(), "lmbc agg arg error"); ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); ++ if (symSize > 0 /*TBD: kParmMemcpySize*/) { ++ RegOperand *ireadOpnd = static_cast(SelectIreadfpoff(parent, iread)); ++ RegOperand *addrOpnd = &LoadIntoRegister(*ireadOpnd, iread.Opnd(0)->GetPrimType()); ++ CreateCallStructParamMemcpy(nullptr, addrOpnd, static_cast(symSize), structCopyOffset, 0); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } else if (symSize > k16ByteSize) { ++ uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); ++ structCopyOffset += static_cast(numMemOp * kSizeOfPtr); ++ } + } + } + +@@ -8029,7 +8057,7 @@ void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t st + if (primType != PTY_agg) { + continue; + } +- SelectParmListPreprocessLargeStruct(*argExpr, structCopyOffset); ++ SelectParmListPreprocessLargeStruct(naryNode, *argExpr, structCopyOffset, i); + } + } + +@@ -8123,7 +8151,7 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bo + } + /* use alloca */ + if (primType == PTY_agg) { +- SelectParmListForAggregate(*argExpr, srcOpnds, parmLocator, structCopyOffset); ++ SelectParmListForAggregate(naryNode, *argExpr, srcOpnds, parmLocator, structCopyOffset, i); + continue; + } + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; diff --git a/Patches/a_abi.cpp b/Patches/a_abi.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1a061c9110431107f12f41cc044293e9b77c6623 --- /dev/null +++ b/Patches/a_abi.cpp @@ -0,0 +1,66 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp +index 11db2be0..408af8f1 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp +@@ -451,6 +451,13 @@ int32 ParmLocator::LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst + case PTY_f32: + case PTY_f64: + case PTY_c64: ++ case PTY_v2i32: ++ case PTY_v4i16: ++ case PTY_v8i8: ++ case PTY_v2u32: ++ case PTY_v4u16: ++ case PTY_v8u8: ++ case PTY_v2f32: + /* Rule C.1 */ + ASSERT(GetPrimTypeSize(PTY_f64) == k8ByteSize, "unexpected type size"); + typeSize = k8ByteSize; +@@ -462,6 +469,16 @@ int32 ParmLocator::LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst + * - callees de-marshall one f128 value into the real and the imaginery part + */ + case PTY_c128: ++ case PTY_v2i64: ++ case PTY_v4i32: ++ case PTY_v8i16: ++ case PTY_v16i8: ++ case PTY_v2u64: ++ case PTY_v4u32: ++ case PTY_v8u16: ++ case PTY_v16u8: ++ case PTY_v2f64: ++ case PTY_v4f32: + /* SIMD-FP registers have 128-bits. */ + pLoc.reg0 = AllocateSIMDFPRegister(); + ASSERT(nextFloatRegNO <= AArch64Abi::kNumFloatParmRegs, "regNO should not be greater than kNumFloatParmRegs"); +@@ -622,6 +639,13 @@ ReturnMechanism::ReturnMechanism(MIRType &retTy, const BECommon &be) + case PTY_f32: + case PTY_f64: + case PTY_c64: ++ case PTY_v2i32: ++ case PTY_v4i16: ++ case PTY_v8i8: ++ case PTY_v2u32: ++ case PTY_v4u16: ++ case PTY_v8u8: ++ case PTY_v2f32: + + /* + * for c128 complex numbers, we assume +@@ -629,6 +653,16 @@ ReturnMechanism::ReturnMechanism(MIRType &retTy, const BECommon &be) + * - callees de-marshall one f128 value into the real and the imaginery part + */ + case PTY_c128: ++ case PTY_v2i64: ++ case PTY_v4i32: ++ case PTY_v8i16: ++ case PTY_v16i8: ++ case PTY_v2u64: ++ case PTY_v4u32: ++ case PTY_v8u16: ++ case PTY_v16u8: ++ case PTY_v2f64: ++ case PTY_v4f32: + regCount = 1; + reg0 = AArch64Abi::floatReturnRegs[0]; + primTypeOfReg0 = pType; diff --git a/Patches/a_args.cpp b/Patches/a_args.cpp new file mode 100644 index 0000000000000000000000000000000000000000..84592047113de0e43cdb8e6b1c11bf6668923dc8 --- /dev/null +++ b/Patches/a_args.cpp @@ -0,0 +1,16 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp +index e3d3eb27..464f61c6 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp +@@ -92,7 +92,10 @@ ArgInfo AArch64MoveRegArgs::GetArgInfo(std::map &argsList, s + argInfo.createTwoStores = false; + argInfo.isTwoRegParm = false; + +- if ((argInfo.symSize > k8ByteSize) && (argInfo.symSize <= k16ByteSize)) { ++ if (GetPrimTypeLanes(argInfo.mirTy->GetPrimType()) > 0) { ++ /* vector type */ ++ argInfo.stkSize = argInfo.symSize; ++ } else if ((argInfo.symSize > k8ByteSize) && (argInfo.symSize <= k16ByteSize)) { + argInfo.isTwoRegParm = true; + if (numFpRegs[argIndex] > kOneRegister) { + argInfo.symSize = argInfo.stkSize = fpSize[argIndex]; diff --git a/Patches/a_cg.cpp b/Patches/a_cg.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4dde08b2f4b9e5c998b44072bef9e0467d59e53f --- /dev/null +++ b/Patches/a_cg.cpp @@ -0,0 +1,26 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp +index ac6050ce..b69775ab 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp +@@ -58,11 +58,19 @@ std::array, kIntRegTypeNum> AArch64CG: + "err", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", + "sp", "xzr", /* x29 is fp */ +- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", +- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", ++ "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", ++ "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31", + "errMaxRegNum", "rflag" } + }; + ++std::array AArch64CG::vectorRegNames = { ++ "err", "err0", "err1", "err2", "err3", "err4", "err5", "err6", "err7", "err8", "err9", "err10", ++ "err11", "err12", "err13", "err14", "err15", "err16", "err17", "err18", "err19", "err20", "err21", "err22", ++ "err23", "err24", "err25", "err26", "err27", "err28", "err29", "err30", "errsp", "errzr", /* x29 is fp */ ++ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", ++ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", ++ "errMaxRegNum", "rflag" }; ++ + bool AArch64CG::IsExclusiveFunc(MIRFunction &mirFunc) { + const std::string &funcName = mirFunc.GetName(); + for (const auto &it : ehExclusiveNameVec) { diff --git a/Patches/a_cg.h b/Patches/a_cg.h new file mode 100644 index 0000000000000000000000000000000000000000..09c80849e5c088c07a0bb23b67141ac70963d331 --- /dev/null +++ b/Patches/a_cg.h @@ -0,0 +1,19 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h +index f9689e68..61a1606e 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h +@@ -146,6 +146,7 @@ class AArch64CG : public CG { + kV64List + }; + static std::array, kIntRegTypeNum> intRegNames; ++ static std::array vectorRegNames; + + private: + const std::vector &ehExclusiveNameVec; +@@ -155,4 +156,4 @@ class AArch64CG : public CG { + }; + } /* namespace maplebe */ + +-#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H */ +\ No newline at end of file ++#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H */ diff --git a/Patches/a_cgfunc.cpp b/Patches/a_cgfunc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..93bceaba3ead0ef110bd39f8afc44cb093b27493 --- /dev/null +++ b/Patches/a_cgfunc.cpp @@ -0,0 +1,365 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index 600c89a6..64349f11 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -37,7 +37,7 @@ CondOperand AArch64CGFunc::ccOperands[kCcLast] = { + namespace { + constexpr int32 kSignedDimension = 2; /* signed and unsigned */ + constexpr int32 kIntByteSizeDimension = 4; /* 1 byte, 2 byte, 4 bytes, 8 bytes */ +-constexpr int32 kFloatByteSizeDimension = 2; /* 4 bytes, 8 bytes */ ++constexpr int32 kFloatByteSizeDimension = 3; /* 4 bytes, 8 bytes, 16 bytes(vector) */ + constexpr int32 kShiftAmount12 = 12; /* for instruction that can use shift, shift amount must be 0 or 12 */ + + MOperator ldIs[kSignedDimension][kIntByteSizeDimension] = { +@@ -68,11 +68,11 @@ MOperator stIsRel[kSignedDimension][kIntByteSizeDimension] = { + { MOP_wstlrb, MOP_wstlrh, MOP_wstlr, MOP_xstlr } + }; + +-MOperator ldFs[kFloatByteSizeDimension] = { MOP_sldr, MOP_dldr }; +-MOperator stFs[kFloatByteSizeDimension] = { MOP_sstr, MOP_dstr }; ++MOperator ldFs[kFloatByteSizeDimension] = { MOP_sldr, MOP_dldr, MOP_qldr }; ++MOperator stFs[kFloatByteSizeDimension] = { MOP_sstr, MOP_dstr, MOP_qstr }; + +-MOperator ldFsAcq[kFloatByteSizeDimension] = { MOP_undef, MOP_undef }; +-MOperator stFsRel[kFloatByteSizeDimension] = { MOP_undef, MOP_undef }; ++MOperator ldFsAcq[kFloatByteSizeDimension] = { MOP_undef, MOP_undef, MOP_undef }; ++MOperator stFsRel[kFloatByteSizeDimension] = { MOP_undef, MOP_undef, MOP_undef }; + + /* extended to unsigned ints */ + MOperator uextIs[kIntByteSizeDimension][kIntByteSizeDimension] = { +@@ -107,7 +107,7 @@ MOperator PickLdStInsn(bool isLoad, uint32 bitSize, PrimType primType, AArch64is + } + + /* __builtin_ffs(x) returns: 0 -> 0, 1 -> 1, 2 -> 2, 4 -> 3, 8 -> 4 */ +- if (IsPrimitiveInteger(primType) || primType == PTY_agg) { ++ if ((IsPrimitiveInteger(primType) || primType == PTY_agg) && !IsPrimitiveVector(primType)) { + MOperator(*table)[kIntByteSizeDimension]; + if (isLoad) { + table = (memOrd == AArch64isa::kMoAcquire) ? ldIsAcq : ldIs; +@@ -134,9 +134,9 @@ MOperator PickLdStInsn(bool isLoad, uint32 bitSize, PrimType primType, AArch64is + table = (memOrd == AArch64isa::kMoRelease) ? stFsRel : stFs; + } + +- /* __builtin_ffs(x) returns: 32 -> 6, 64 -> 7 */ ++ /* __builtin_ffs(x) returns: 32 -> 6, 64 -> 7, 128 -> 8 */ + uint32 size = static_cast(__builtin_ffs(static_cast(bitSize))) - 6; +- ASSERT(size <= 1, "size must be 0 or 1"); ++ ASSERT(size <= 2, "size must be 0 to 2"); + return table[size]; + } + } +@@ -214,12 +214,12 @@ MOperator AArch64CGFunc::PickMovInsn(RegOperand &lhs, RegOperand &rhs) { + } + + MOperator AArch64CGFunc::PickMovInsn(uint32 bitLen, RegType regType) { +- ASSERT((bitLen == k32BitSize) || (bitLen == k64BitSize), "size check"); ++ ASSERT((bitLen == k32BitSize) || (bitLen == k64BitSize || bitLen == k128BitSize), "size check"); + ASSERT((regType == kRegTyInt) || (regType == kRegTyFloat), "type check"); + if (regType == kRegTyInt) { + return (bitLen == k32BitSize) ? MOP_wmovrr : MOP_xmovrr; + } +- return (bitLen == k32BitSize) ? MOP_xvmovs : MOP_xvmovd; ++ return bitLen == k128BitSize ? MOP_vmovvv : ((bitLen == k32BitSize) ? MOP_xvmovs : MOP_xvmovd); + } + + void AArch64CGFunc::SelectLoadAcquire(Operand &dest, PrimType dtype, Operand &src, PrimType stype, +@@ -470,7 +470,7 @@ void AArch64CGFunc::SelectCopyMemOpnd(Operand &dest, PrimType dtype, uint32 dsiz + PrimType regTy = PTY_void; + RegOperand *loadReg = nullptr; + MOperator mop = MOP_undef; +- if (IsPrimitiveFloat(stype)) { ++ if (IsPrimitiveFloat(stype) || IsPrimitiveVector(stype)) { + CHECK_FATAL(dsize == ssize, "dsize %u expect equals ssize %u", dtype, ssize); + insn = &GetCG()->BuildInstruction(PickLdInsn(ssize, stype), dest, src); + } else { +@@ -710,6 +710,19 @@ void AArch64CGFunc::SelectCopy(Operand &dest, PrimType dtype, Operand &src, Prim + dest, AArch64RegOperand::GetZeroRegister(dsize))); + break; + case Operand::kOpdRegister: ++ if (opnd0Type == Operand::kOpdRegister && GetPrimTypeLanes(stype) > 0) { ++ /* check vector reg to vector reg move */ ++ CHECK_FATAL(GetPrimTypeLanes(dtype) > 0, "invalid vectreg to vectreg move"); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = k16BitSize; ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = k16BitSize; ++ Insn *insn = &GetCG()->BuildInstruction(dsize <= k64BitSize ? MOP_vmovuu : MOP_vmovvv, dest, src); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ break; ++ } + SelectCopyRegOpnd(dest, dtype, opnd0Type, dsize, src, stype); + break; + default: +@@ -739,7 +752,7 @@ RegOperand &AArch64CGFunc::SelectCopy(Operand &src, PrimType stype, PrimType dty + */ + bool AArch64CGFunc::IsImmediateOffsetOutOfRange(AArch64MemOperand &memOpnd, uint32 bitLen) { + ASSERT(bitLen >= k8BitSize, "bitlen error"); +- ASSERT(bitLen <= k64BitSize, "bitlen error"); ++ ASSERT(bitLen <= k128BitSize, "bitlen error"); + + if (bitLen >= k8BitSize) { + bitLen = RoundUp(bitLen, k8BitSize); +@@ -973,6 +986,11 @@ void AArch64CGFunc::SelectDassign(StIdx stIdx, FieldID fieldId, PrimType rhsPTyp + AArch64MemOperand &archMemOperand = *static_cast(memOpnd); + if ((memOpnd->GetMemVaryType() == kNotVary) && IsImmediateOffsetOutOfRange(archMemOperand, dataSize)) { + memOpnd = &SplitOffsetWithAddInstruction(archMemOperand, dataSize); ++#if 0 ++ uint32 dSize = dataSize > k64BitSize ? k64BitSize : dataSize; /* TBD */ ++ if ((memOpnd->GetMemVaryType() == kNotVary) && IsImmediateOffsetOutOfRange(archMemOperand, dSize)) { ++ memOpnd = &SplitOffsetWithAddInstruction(archMemOperand, dSize); ++#endif + } + + /* In bpl mode, a func symbol's type is represented as a MIRFuncType instead of a MIRPtrType (pointing to +@@ -1392,6 +1410,9 @@ void AArch64CGFunc::SelectIassign(IassignNode &stmt) { + if (destType == PTY_agg) { + destType = PTY_a64; + } ++ if (GetPrimTypeLanes(styp) != 0) { /* a vector type */ ++ destType = styp; ++ } + ASSERT(stmt.Opnd(0) != nullptr, "null ptr check"); + MemOperand &memOpnd = CreateMemOpnd(destType, stmt, *stmt.Opnd(0), offset); + if (isVolStore && static_cast(memOpnd).GetAddrMode() == AArch64MemOperand::kAddrModeBOi) { +@@ -4766,12 +4787,13 @@ Operand &AArch64CGFunc::GetTargetRetOperand(PrimType primType, int32 sReg) { + uint32 bitSize = GetPrimTypeBitSize(primType) < k32BitSize ? k32BitSize : GetPrimTypeBitSize(primType); + AArch64reg pReg; + if (sReg < 0) { +- return GetOrCreatePhysicalRegisterOperand(IsPrimitiveFloat(primType) ? S0 : R0, bitSize, +- GetRegTyFromPrimTy(primType)); ++ return GetOrCreatePhysicalRegisterOperand( ++ IsPrimitiveFloat(primType) || (GetPrimTypeLanes(primType) > 0) ? S0 : R0, ++ bitSize, GetRegTyFromPrimTy(primType)); + } else { + switch (sReg) { + case kSregRetval0: +- pReg = IsPrimitiveFloat(primType) ? S0 : R0; ++ pReg = IsPrimitiveFloat(primType) || (GetPrimTypeLanes(primType) > 0) ? S0 : R0; + break; + case kSregRetval1: + pReg = R1; +@@ -6494,7 +6516,7 @@ AArch64RegOperand &AArch64CGFunc::GetOrCreatePhysicalRegisterOperand(AArch64reg + size = k32BitSize; + aarch64PhyRegIdx = aarch64PhyRegIdx << 1; + } else { +- size = k64BitSize; ++ size = size == k128BitSize ? k128BitSize : k64BitSize; + aarch64PhyRegIdx = (aarch64PhyRegIdx << 1) + 1; + } + ASSERT(aarch64PhyRegIdx < k256BitSize, "phyRegOperandTable index out of range"); +@@ -6960,6 +6982,9 @@ int32 AArch64CGFunc::GetBaseOffset(const SymbolAlloc &sa) { + return offset; + } else if (sgKind == kMsArgsRegPassed) { + int32 baseOffset = memLayout->GetSizeOfLocals() + symAlloc->GetOffset() + memLayout->GetSizeOfRefLocals(); ++ if (symAlloc->GetMemSegment()->ContainVector()) { ++ baseOffset = RoundUp(baseOffset, k16ByteSize); ++ } + return baseOffset + sizeofFplr; + } else if (sgKind == kMsRefLocals) { + int32 baseOffset = symAlloc->GetOffset() + memLayout->GetSizeOfLocals(); +@@ -7927,4 +7952,203 @@ void AArch64CGFunc::InsertJumpPad(Insn *insn) { + fallthruBB->PushBackPreds(*brBB); + } + ++RegOperand *AArch64CGFunc::SelectVectorFromScalar(IntrinsicopNode &intrnNode) { ++ PrimType pType = intrnNode.GetPrimType(); ++ ++ RegOperand *res = &CreateRegisterOperandOfType(pType); ++ BaseNode *argExpr = intrnNode.Opnd(0); ++ Operand *opnd0 = HandleExpr(intrnNode, *argExpr); ++ VectorRegSpec *vecSpec = GetMemoryPool()->New(); ++ vecSpec->vecLaneMax = GetPrimTypeLanes(pType); ++ ++ Operand *reg = opnd0; ++ if (opnd0->IsConstImmediate()) { ++ ImmOperand *immOpnd = static_cast(opnd0); ++ ConstvalNode *constvalNode = static_cast(argExpr); ++ MIRConst *mirConst = constvalNode->GetConstVal(); ++ int32 val = safe_cast(mirConst)->GetValue(); ++ if (val >= -128 && val <= 255) { ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vmovvi, *res, *immOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpec); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++ } ++ reg = &CreateRegisterOperandOfType(argExpr->GetPrimType()); ++ SelectCopyImm(*reg, *immOpnd, argExpr->GetPrimType()); ++ } ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vdupvr, *res, *reg); ++ static_cast(insn)->PushRegSpecEntry(vecSpec); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++Operand *AArch64CGFunc::SelectVectorStore(IntrinsicopNode &intrnNode) { ++ BaseNode &argExpr = *intrnNode.Opnd(1); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ ++ PrimType dType = argExpr.GetPrimType(); ++ MemOperand &memOpnd = CreateMemOpnd(dType, intrnNode, *intrnNode.Opnd(0), 0); ++ SelectCopy(memOpnd, dType, srcOpnd, dType); ++ return &srcOpnd; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorGetHigh(IntrinsicopNode &intrnNode) { ++ BaseNode &argExpr = *intrnNode.Opnd(0); ++ PrimType srcType = argExpr.GetPrimType(); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ vecSpecSrc->vecLane = 1; ++ ++ PrimType resType = intrnNode.GetPrimType(); ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vduprv, *res, srcOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorGetLow(IntrinsicopNode &intrnNode) { ++ BaseNode &argExpr = *intrnNode.Opnd(0); ++ PrimType srcType = argExpr.GetPrimType(); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ vecSpecSrc->vecLane = 0; ++ ++ PrimType resType = intrnNode.GetPrimType(); ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vduprv, *res, srcOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorGetElement(IntrinsicopNode &intrnNode) { ++ PrimType resType = intrnNode.GetPrimType(); /* uint32_t result */ ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ ++ BaseNode *argExpr = intrnNode.Opnd(0); /* vector operand */ ++ Operand *opndSrc = HandleExpr(intrnNode, *argExpr); ++ PrimType srcType = argExpr->GetPrimType(); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ ++ BaseNode *argLane = intrnNode.Opnd(1); /* lane const operand */ ++ Operand *opndLane = HandleExpr(intrnNode, *argLane); ++ if (opndLane->IsConstImmediate()) { ++ ConstvalNode *constvalNode = static_cast(argLane); ++ MIRConst *mirConst = constvalNode->GetConstVal(); ++ vecSpecSrc->vecLane = safe_cast(mirConst)->GetValue(); ++ } else { ++ CHECK_FATAL(0, "VectorGetElement does not have lane const"); ++ } ++ ++ MOperator mop = GetPrimTypeBitSize(srcType) <= k64BitSize ? MOP_vwmovru : MOP_vwmovrv; ++ Insn *insn = &GetCG()->BuildInstruction(mop, *res, *opndSrc); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorPairwiseAdd(IntrinsicopNode &intrnNode) { ++ PrimType resType = intrnNode.GetPrimType(); ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = GetPrimTypeLanes(resType); ++ ++ BaseNode *argExpr = intrnNode.Opnd(0); ++ Operand *opnd = HandleExpr(intrnNode, *argExpr); ++ PrimType srcType = argExpr->GetPrimType(); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vpaddvv, *res, *opnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); /* dest pushed first, popped first */ ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorSetElement(IntrinsicopNode &intrnNode) { ++ BaseNode *arg0 = intrnNode.Opnd(0); /* uint32_t operand */ ++ Operand *opnd0 = HandleExpr(intrnNode, *arg0); ++ ASSERT(GetPrimTypeBitSize(arg0->GetPrimType()) <= k32BitSize, "VectorSetElement: invalid opnd0"); ++ ++ BaseNode *arg1 = intrnNode.Opnd(1); /* vector operand == result */ ++ Operand *opnd1 = HandleExpr(intrnNode, *arg1); ++ PrimType vType = arg1->GetPrimType(); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(vType); ++ ++ BaseNode *arg2 = intrnNode.Opnd(2); /* lane const operand */ ++ Operand *opnd2 = HandleExpr(intrnNode, *arg2); ++ if (opnd2->IsConstImmediate()) { ++ ConstvalNode *constvalNode = static_cast(arg2); ++ MIRConst *mirConst = constvalNode->GetConstVal(); ++ vecSpecSrc->vecLane = safe_cast(mirConst)->GetValue(); ++ } else { ++ CHECK_FATAL(0, "VectorSetElement does not have lane const"); ++ } ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vinsvr, *opnd1, *opnd0); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return static_cast(opnd1); ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorMerge(IntrinsicopNode &intrnNode) { ++ PrimType resType = intrnNode.GetPrimType(); /* result operand */ ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = GetPrimTypeLanes(resType); ++ ++ BaseNode *arg1 = intrnNode.Opnd(0); /* vector operand1 */ ++ Operand *opnd1 = HandleExpr(intrnNode, *arg1); ++ PrimType o1Type = arg1->GetPrimType(); ++ VectorRegSpec *vecSpecOpd1 = GetMemoryPool()->New(); ++ vecSpecOpd1->vecLaneMax = GetPrimTypeLanes(o1Type); ++ ++ BaseNode *arg2 = intrnNode.Opnd(1); /* vector operand2 */ ++ Operand *opnd2 = HandleExpr(intrnNode, *arg1); ++ PrimType o2Type = arg2->GetPrimType(); ++ VectorRegSpec *vecSpecOpd2 = GetMemoryPool()->New(); ++ vecSpecOpd2->vecLaneMax = GetPrimTypeLanes(o2Type); ++ ++ BaseNode *arg3 = intrnNode.Opnd(2); /* lane const operand */ ++ Operand *opnd3 = HandleExpr(intrnNode, *arg3); ++ if (!opnd3->IsConstImmediate()) { ++ CHECK_FATAL(0, "VectorSetElement does not have lane const"); ++ } ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vextvvv, *res, *opnd1, *opnd2, *opnd3); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); ++ static_cast(insn)->PushRegSpecEntry(vecSpecOpd1); ++ static_cast(insn)->PushRegSpecEntry(vecSpecOpd2); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorReverse(IntrinsicopNode &intrnNode, uint32 size) { ++ PrimType resType = intrnNode.GetPrimType(); /* result operand */ ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = GetPrimTypeLanes(resType); ++ ++ BaseNode &argExpr = *intrnNode.Opnd(0); ++ PrimType srcType = argExpr.GetPrimType(); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ ++ MOperator mOp = size >= 64 ? MOP_vrev64vv : (size >= 32 ? MOP_vrev32vv : MOP_vrev16vv); ++ Insn *insn = &GetCG()->BuildInstruction(mOp, *res, srcOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ + } /* namespace maplebe */ diff --git a/Patches/a_cgfunc.h b/Patches/a_cgfunc.h new file mode 100644 index 0000000000000000000000000000000000000000..64bf520fe8de814dba415ac3b4f559dee3647925 --- /dev/null +++ b/Patches/a_cgfunc.h @@ -0,0 +1,21 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +index c544a206..e7410750 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +@@ -229,6 +229,16 @@ class AArch64CGFunc : public CGFunc { + LabelOperand &CreateFuncLabelOperand(const MIRSymbol &func); + uint32 GetAggCopySize(uint32 offset1, uint32 offset2, uint32 alignment); + ++ RegOperand *SelectVectorFromScalar(IntrinsicopNode &intrnNode) override; ++ Operand *SelectVectorStore(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorMerge(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorGetHigh(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorGetLow(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorGetElement(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorPairwiseAdd(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorSetElement(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorReverse(IntrinsicopNode &intrnNode, uint32 size) override; ++ + AArch64ImmOperand &CreateImmOperand(PrimType ptyp, int64 val) override { + return CreateImmOperand(val, GetPrimTypeBitSize(ptyp), IsSignedInteger(ptyp)); + } diff --git a/Patches/a_insn.cpp b/Patches/a_insn.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0ed04039bda2e896a4f638b9a47360424c970969 --- /dev/null +++ b/Patches/a_insn.cpp @@ -0,0 +1,45 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp +index a6d60424..90ec93b5 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp +@@ -1075,6 +1075,18 @@ void AArch64Insn::Emit(const CG &cg, Emitter &emitter) const { + emitter.Emit(nameOpnd->GetName() + emitter.HugeSoPostFix()); + break; + } ++ AArch64RegOperand *regOpnd = static_cast(opnds[seq[i]]); ++ if (regOpnd != nullptr && static_cast(md->operand[seq[i]])->IsVectorOperand()) { ++ regOpnd->SetVecLanePosition(-1); ++ regOpnd->SetVecLaneSize(0); ++ if (IsVectorOp()) { ++ AArch64Insn *insn = const_cast(this); ++ AArch64VectorInsn *vInsn = static_cast(insn); ++ VectorRegSpec* vecSpec = vInsn->GetAndRemoveRegSpecFromList(); ++ regOpnd->SetVecLanePosition(vecSpec->vecLane); ++ regOpnd->SetVecLaneSize(vecSpec->vecLaneMax); ++ } ++ } + opnds[seq[i]]->Emit(emitter, md->operand[seq[i]]); + /* reset opnd0 ref-field flag, so following instruction has correct register */ + if (isRefField && (i == 0)) { +@@ -1168,8 +1180,10 @@ uint8 AArch64Insn::GetLoadStoreSize() const { + case MOP_xldp: + case MOP_xldpsw: + case MOP_dldp: ++ case MOP_qldr: + case MOP_xstp: + case MOP_dstp: ++ case MOP_qstr: + return k16ByteSize; + + default: +@@ -1290,6 +1304,10 @@ bool AArch64Insn::IsCall() const { + return AArch64CG::kMd[mOp].IsCall(); + } + ++bool AArch64Insn::IsVectorOp() const { ++ return AArch64CG::kMd[mOp].IsVectorOp(); ++} ++ + bool AArch64Insn::HasLoop() const { + return AArch64CG::kMd[mOp].HasLoop(); + } diff --git a/Patches/a_insn.h b/Patches/a_insn.h new file mode 100644 index 0000000000000000000000000000000000000000..a510b8711a89acb33d74c8ee59e6132c306866de --- /dev/null +++ b/Patches/a_insn.h @@ -0,0 +1,59 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h +index 620158ea..0362bc5c 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h +@@ -124,6 +124,7 @@ class AArch64Insn : public Insn { + } + bool IsMemAccessBar() const override; + bool IsMemAccess() const override; ++ bool IsVectorOp() const final; + + Operand *GetCallTargetOperand() const override { + ASSERT(IsCall(), "should be call"); +@@ -184,6 +185,46 @@ class AArch64Insn : public Insn { + void EmitCounter(const CG&, Emitter&) const; + }; + ++struct VectorRegSpec { ++ VectorRegSpec() : vecLane(-1), vecLaneMax(0) {} ++ ++ int16 vecLane; /* -1 for whole reg, 0 to 15 to specify individual lane */ ++ uint16 vecLaneMax; /* Maximum number of lanes for this vregister */ ++}; ++ ++class AArch64VectorInsn : public AArch64Insn { ++ public: ++ AArch64VectorInsn(MemPool &memPool, MOperator opc) ++ : AArch64Insn(memPool, opc), ++ regSpecList(localAlloc.Adapter()) { ++ regSpecList.clear(); ++ } ++ ++ ~AArch64VectorInsn() override = default; ++ ++ void ClearRegSpecList() { ++ regSpecList.clear(); ++ } ++ ++ VectorRegSpec *GetAndRemoveRegSpecFromList() { ++ //ASSERT(regSpecList.size() > 0, "regSpecList empty"); ++ if (regSpecList.size() == 0) { ++ VectorRegSpec *vecSpec = CG::GetCurCGFuncNoConst()->GetMemoryPool()->New(); ++ return vecSpec; ++ } ++ VectorRegSpec *ret = regSpecList.back(); ++ regSpecList.pop_back(); ++ return ret; ++ } ++ ++ void PushRegSpecEntry(VectorRegSpec *v) { ++ regSpecList.emplace(regSpecList.begin(), v); /* add at front */ ++ } ++ ++ private: ++ MapleVector regSpecList; ++}; ++ + class AArch64cleancallInsn : public AArch64Insn { + public: + AArch64cleancallInsn(MemPool &memPool, MOperator opc) diff --git a/Patches/a_isa.h b/Patches/a_isa.h new file mode 100644 index 0000000000000000000000000000000000000000..4573815f950716463cf549db8af9350b963980ba --- /dev/null +++ b/Patches/a_isa.h @@ -0,0 +1,34 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h +index e1e5fd7f..a1b3a603 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h +@@ -166,6 +166,7 @@ constexpr uint32 kLiteralLow12 = kMemLow12; + constexpr uint32 kPreInc = 0x20; + constexpr uint32 kPostInc = 0x40; + constexpr uint32 kLoadLiteral = 0x80; ++constexpr uint32 kVector = 0x100; + + class RegProp { + public: +@@ -242,6 +243,10 @@ class AArch64OpndProp : public OpndProp { + return static_cast(size); + } + ++ bool IsVectorOperand() const { ++ return regProp.GetDefUse() & kVector; ++ } ++ + void SetContainImm() { + isContainImm = true; + } +@@ -396,6 +401,10 @@ struct AArch64MD { + return properties & ISPARTDEF; + } + ++ bool IsVectorOp() const { ++ return properties & ISVECTOR; ++ } ++ + LatencyType GetLatencyType() const { + return latencyType; + } diff --git a/Patches/a_md.def b/Patches/a_md.def new file mode 100644 index 0000000000000000000000000000000000000000..7133a5dd09f5be821e2bb5ee63bc4e9baa206913 --- /dev/null +++ b/Patches/a_md.def @@ -0,0 +1,44 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +index af949c9b..b9c5b94d 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +@@ -416,6 +416,8 @@ DEFINE_MOP(MOP_hldr, {mopdReg16FD,mopdMem16S},ISLOAD|CANTHROW,kLtFLoad64,"ldr"," + DEFINE_MOP(MOP_sldr, {mopdReg32FD,mopdMem32S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) + /* MOP_dldr */ + DEFINE_MOP(MOP_dldr, {mopdReg64FD,mopdMem64S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) ++/* MOP_qldr */ ++DEFINE_MOP(MOP_qldr, {mopdReg128VD,mopdMem128S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) + + /* AArch64 LDP/LDPSW */ + /* MOP_wldp */ +@@ -604,6 +606,8 @@ DEFINE_MOP(MOP_xstr, {mopdReg64IS,mopdMem64D},ISSTORE|CANTHROW,kLtStore2,"str"," + DEFINE_MOP(MOP_sstr, {mopdReg32FS,mopdMem32D},ISSTORE|CANTHROW,kLtStore2,"str","0,1",1) + /* MOP_dstr -- Store Register SIMD/FP Double */ + DEFINE_MOP(MOP_dstr, {mopdReg64FS,mopdMem64D},ISSTORE|CANTHROW,kLtStore3plus,"str","0,1",1) ++/* MOP_qstr -- Store Register SIMD/FP Double */ ++DEFINE_MOP(MOP_qstr, {mopdReg128VS,mopdMem128D},ISSTORE|CANTHROW,kLtStore3plus,"str","0,1",1) + + /* AArch64 STP. */ + /* MOP_wstp */ +@@ -648,6 +652,21 @@ DEFINE_MOP(MOP_dmb_ishst, {}, HASRELEASE|ISDMB,kLtBranch, "dmb\tishst", "",1) + /* MOP_dmb_ish */ + DEFINE_MOP(MOP_dmb_ish, {}, HASACQUIRE|HASRELEASE|ISDMB,kLtBranch, "dmb\tish", "",1) + ++/* Neon simd, r-nonvector reg, u-64b vector reg, v-128b vector reg */ ++DEFINE_MOP(MOP_vmovvi, {mopdReg128VD,mopdImm8},ISMOVE|ISVECTOR,kLtFpalu,"movi","0,1",1) ++DEFINE_MOP(MOP_vdupvr, {mopdReg128VD,mopdReg32IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) ++DEFINE_MOP(MOP_vduprv, {mopdReg64FD,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) ++DEFINE_MOP(MOP_vextvvv, {mopdReg128VD,mopdReg128VS,mopdReg128VS,mopdImm8},ISVECTOR,kLtFpalu,"ext","0,1,2,3",1) ++DEFINE_MOP(MOP_vpaddvv, {mopdReg128VD,mopdReg128VS},ISVECTOR,kLtAlu,"uaddlp","0,1",1) ++DEFINE_MOP(MOP_vwmovru, {mopdReg32ID,mopdReg64VS},ISMOVE|ISVECTOR,kLtFpalu,"umov","0,1",1) ++DEFINE_MOP(MOP_vwmovrv, {mopdReg32ID,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"umov","0,1",1) ++DEFINE_MOP(MOP_vinsvr, {mopdReg128VDS,mopdReg32IS},ISVECTOR,kLtFpalu,"ins","0,1",1) ++DEFINE_MOP(MOP_vmovuu, {mopdReg64VD,mopdReg64VS},ISMOVE|ISVECTOR,kLtFpalu,"mov","0,1",1) ++DEFINE_MOP(MOP_vmovvv, {mopdReg128VD,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"mov","0,1",1) ++DEFINE_MOP(MOP_vrev16vv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"rev16","0,1",1) ++DEFINE_MOP(MOP_vrev32vv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"rev32","0,1",1) ++DEFINE_MOP(MOP_vrev64vv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"rev64","0,1",1) ++ + /* + * MOP_clinit + * will be emit to four instructions in a row: diff --git a/Patches/a_memlayout.cpp b/Patches/a_memlayout.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a5a231710ccc5de9fffc810119138df897b1a9ab --- /dev/null +++ b/Patches/a_memlayout.cpp @@ -0,0 +1,14 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index ff138448..e96b5742 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -194,6 +194,9 @@ void AArch64MemLayout::LayoutFormalParams() { + bool noStackPara = false; + MIRType *ty = mirFunction->GetNthParamType(i); + uint32 ptyIdx = ty->GetTypeIndex(); ++ if (GetPrimTypeLanes(ty->GetPrimType()) > 0) { ++ segArgsRegPassed.SetContainVector(); ++ } + parmLocator.LocateNextParm(*ty, ploc, i == 0); + if (ploc.reg0 != kRinvalid) { /* register */ + symLoc->SetRegisters(ploc.reg0, ploc.reg1, ploc.reg2, ploc.reg3); diff --git a/Patches/a_operand.cpp b/Patches/a_operand.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d12f79b9bf17bbe0bb439203f934c46a843be2f7 --- /dev/null +++ b/Patches/a_operand.cpp @@ -0,0 +1,47 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp +index f77bbea1..99643da9 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp +@@ -84,11 +84,28 @@ void AArch64RegOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + break; + } + case kRegTyFloat: { +- ASSERT((opndSize == k8BitSize || opndSize == k16BitSize || opndSize == k32BitSize || opndSize == k64BitSize), +- "illegal register size"); +- /* FP reg cannot be reffield. 8~0, 16~1, 32~2, 64~3. 8 is 1000b, has 3 zero. */ +- uint32 regSet = __builtin_ctz(opndSize) - 3; +- emitter.Emit(AArch64CG::intRegNames[regSet][regNO]); ++ ASSERT((opndSize == k8BitSize || opndSize == k16BitSize || opndSize == k32BitSize || opndSize == k64BitSize ++ || opndSize == k128BitSize), "illegal register size"); ++ int32 laneSize = GetVecLaneSize(); ++ if (static_cast(opndProp)->IsVectorOperand() && laneSize != 0) { ++ std::string width; ++ if (opndSize == k128BitSize) { ++ width = laneSize == 16 ? "b" : (laneSize == 8 ? "h" : (laneSize == 4 ? "s" : "d")); ++ } else if (opndSize == k64BitSize) { ++ width = laneSize == 8 ? "b" : (laneSize == 4 ? "h" : "s"); ++ } ++ int16 lanePos = GetVecLanePosition(); ++ emitter.Emit(AArch64CG::vectorRegNames[regNO]); ++ if (lanePos == -1) { ++ emitter.Emit("." + std::to_string(laneSize) + width); ++ } else { ++ emitter.Emit("." + width + "[" + std::to_string(lanePos) + "]"); ++ } ++ } else { ++ /* FP reg cannot be reffield. 8~0, 16~1, 32~2, 64~3. 8 is 1000b, has 3 zero. */ ++ uint32 regSet = __builtin_ctz(opndSize) - 3; ++ emitter.Emit(AArch64CG::intRegNames[regSet][regNO]); ++ } + break; + } + default: +@@ -213,7 +230,7 @@ void AArch64MemOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + #if DEBUG + const AArch64MD *md = &AArch64CG::kMd[emitter.GetCurrentMOP()]; + bool isLDSTpair = md->IsLoadStorePair(); +- ASSERT(md->Is64Bit() || md->GetOperandSize() <= k32BitSize, "unexpected opnd size"); ++ ASSERT(md->Is64Bit() || md->GetOperandSize() <= k32BitSize || md->GetOperandSize() == k128BitSize, "unexpected opnd size"); + #endif + if (addressMode == AArch64MemOperand::kAddrModeBOi) { + emitter.Emit("["); diff --git a/Patches/a_operand.h b/Patches/a_operand.h new file mode 100644 index 0000000000000000000000000000000000000000..621d77f1640ee7cb9993d591b7dc88d7de26946a --- /dev/null +++ b/Patches/a_operand.h @@ -0,0 +1,77 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h +index a8fbf05a..6d79c939 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h +@@ -89,6 +89,23 @@ class AArch64RegOperand : public RegOperand { + return memPool.Clone(*this); + } + ++ void SetVecLanePosition(int32 pos) { ++ vecLane = pos; ++ } ++ ++ int32 GetVecLanePosition() const { ++ return vecLane; ++ } ++ ++ void SetVecLaneSize(uint32 size) { ++ vecLaneSize = size; ++ } ++ ++ uint32 GetVecLaneSize() const { ++ return vecLaneSize; ++ } ++ ++ + bool operator==(const AArch64RegOperand &opnd) const; + + bool operator<(const AArch64RegOperand &opnd) const; +@@ -110,8 +127,11 @@ class AArch64RegOperand : public RegOperand { + private: + static AArch64RegOperand zero64; + static AArch64RegOperand zero32; ++ + bool isRefField = false; + uint32 flag; ++ int16 vecLane = -1; /* -1 for whole reg, 0 to 15 to specify each lane one at a time */ ++ uint16 vecLaneSize = 0; /* Number of lanes */ + }; + + /* +@@ -664,15 +684,16 @@ class AArch64MemOperand : public MemOperand { + /* Returns N where alignment == 2^N */ + static int32 GetImmediateOffsetAlignment(uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + /* dSize==8: 0, dSize==16 : 1, dSize==32: 2, dSize==64: 3 */ + return __builtin_ctz(dSize) - kBaseOffsetAlignment; + } + + static int32 GetMaxPIMM(uint32 dSize) { ++ dSize = dSize > k64BitSize ? k64BitSize : dSize; + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + int32 alignment = GetImmediateOffsetAlignment(dSize); + /* alignment is between kAlignmentOf8Bit and kAlignmentOf64Bit */ +@@ -683,7 +704,7 @@ class AArch64MemOperand : public MemOperand { + + bool IsOffsetMisaligned(uint32 dSize) const { + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + if (dSize == k8BitSize || addrMode != kAddrModeBOi) { + return false; +@@ -705,7 +726,7 @@ class AArch64MemOperand : public MemOperand { + + static bool IsPIMMOffsetOutOfRange(int32 offset, uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + return (offset < 0 || offset > GetMaxPIMM(dSize)); + } diff --git a/Patches/a_opnd.def b/Patches/a_opnd.def new file mode 100644 index 0000000000000000000000000000000000000000..daafffea2dad1bcd9fc1360d504c28b14a92569e --- /dev/null +++ b/Patches/a_opnd.def @@ -0,0 +1,61 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def b/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def +index 1c638228..74693422 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def +@@ -28,6 +28,12 @@ AArch64OpndProp mopdF32RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRe + AArch64OpndProp mopdF64RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse}, 64}; + AArch64OpndProp mopdF64RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef}, 64}; + AArch64OpndProp mopdF64RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kRegPropUse}, 64}; ++AArch64OpndProp mopdV128RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse | kVector}, 128}; ++AArch64OpndProp mopdV128RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kVector}, 128}; ++AArch64OpndProp mopdV128RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kRegPropUse | kVector}, 128}; ++AArch64OpndProp mopdV64RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse | kVector}, 64}; ++AArch64OpndProp mopdV64RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kVector}, 64}; ++AArch64OpndProp mopdV64RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kVector}, 64}; + AArch64OpndProp mopdIntImm4Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 4}; + AArch64OpndProp mopdIntImm5Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 5}; + AArch64OpndProp mopdIntImm6Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 6}; +@@ -135,6 +141,7 @@ AArch64ImmOpndProp mopdMemPair32Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNu + AArch64OpndProp mopdMem32SrcH = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 16}; + AArch64OpndProp mopdMem32SrcL = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 16}; + AArch64ImmOpndProp mopdMem64Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64, StrLdr64ImmValid}; ++AArch64OpndProp mopdMem128Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 128}; + AArch64ImmOpndProp mopdMemPair64Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64, StrLdr64PairImmValid}; + AArch64OpndProp mopdMem64SrcL = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 12}; + +@@ -142,6 +149,7 @@ AArch64ImmOpndProp mopdMem8Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, + AArch64ImmOpndProp mopdMem16Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 16, StrLdr16ImmValid}; + AArch64ImmOpndProp mopdMem32Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 32, StrLdr32ImmValid}; + AArch64ImmOpndProp mopdMem64Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 64, StrLdr64ImmValid}; ++AArch64OpndProp mopdMem128Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef}, 128}; + AArch64ImmOpndProp mopdMemPair32Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 32, StrLdr32PairImmValid}; + AArch64ImmOpndProp mopdMemPair64Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 64, StrLdr64PairImmValid}; + +@@ -193,6 +201,12 @@ AArch64OpndProp *mopdReg32FDS = &mopdF32RegDestSrc; + AArch64OpndProp *mopdReg64FS = &mopdF64RegSrc; + AArch64OpndProp *mopdReg64FD = &mopdF64RegDest; + AArch64OpndProp *mopdReg64FDS = &mopdF64RegDestSrc; ++AArch64OpndProp *mopdReg128VS = &mopdV128RegSrc; ++AArch64OpndProp *mopdReg128VD = &mopdV128RegDest; ++AArch64OpndProp *mopdReg128VDS = &mopdV128RegDestSrc; ++AArch64OpndProp *mopdReg64VS = &mopdV64RegSrc; ++AArch64OpndProp *mopdReg64VD = &mopdV64RegDest; ++AArch64OpndProp *mopdReg64VDS = &mopdV64RegDestSrc; + AArch64OpndProp *mopdMem = &mopdMem32Src; + AArch64OpndProp *mopdMem8S = &mopdMem8Src; + AArch64OpndProp *mopdMem16S = &mopdMem16Src; +@@ -202,12 +216,14 @@ AArch64OpndProp *mopdMem32SL = &mopdMem32SrcL; + AArch64OpndProp *mopdMem32SH = &mopdMem32SrcH; + AArch64OpndProp *mopdMem64PS = &mopdMemPair64Src; + AArch64OpndProp *mopdMem64S = &mopdMem64Src; ++AArch64OpndProp *mopdMem128S = &mopdMem128Src; + AArch64OpndProp *mopdMem64SL = &mopdMem64SrcL; + AArch64OpndProp *mopdMem8D = &mopdMem8Dest; + AArch64OpndProp *mopdMem16D = &mopdMem16Dest; + AArch64OpndProp *mopdMem32D = &mopdMem32Dest; + AArch64OpndProp *mopdMem32PD = &mopdMemPair32Dest; + AArch64OpndProp *mopdMem64D = &mopdMem64Dest; ++AArch64OpndProp *mopdMem128D = &mopdMem128Dest; + AArch64OpndProp *mopdMem64PD = &mopdMemPair64Dest; + AArch64OpndProp *mopdMem32SPRE = &mopdMem32SrcPre; + AArch64OpndProp *mopdMem32SPOST = &mopdMem32SrcPost; diff --git a/Patches/a_peep.cpp b/Patches/a_peep.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e1bc0fd077dd5a26ed6a5c0cbf247b8f97ef826d --- /dev/null +++ b/Patches/a_peep.cpp @@ -0,0 +1,15 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp +index 81cc38f8..ca22f12a 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp +@@ -75,7 +75,9 @@ void AArch64PeepHole::Run(BB &bb, Insn &insn) { + case MOP_wmovrr: + case MOP_xmovrr: + case MOP_xvmovs: +- case MOP_xvmovd: { ++ case MOP_xvmovd: ++ case MOP_vmovuu: ++ case MOP_vmovvv: { + (static_cast(optimizations[kRemoveMovingtoSameRegOpt]))->Run(bb, insn); + break; + } diff --git a/Patches/cgfunc.cpp b/Patches/cgfunc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3009fb4884519e8b0f5089257e257f7fe10cab73 --- /dev/null +++ b/Patches/cgfunc.cpp @@ -0,0 +1,33 @@ +diff --git a/src/mapleall/maple_be/src/cg/cgfunc.cpp b/src/mapleall/maple_be/src/cg/cgfunc.cpp +index 33225735..ca51b564 100644 +--- a/src/mapleall/maple_be/src/cg/cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/cgfunc.cpp +@@ -375,6 +375,28 @@ Operand *HandleIntrinOp(const BaseNode &parent, BaseNode &expr, CGFunc &cgFunc) + case INTRN_C_ctz32: + case INTRN_C_ctz64: + return cgFunc.SelectCctz(intrinsicopNode); ++ case INTRN_vector_from_scalar_v4i32: ++ case INTRN_vector_from_scalar_v16u8: ++ return cgFunc.SelectVectorFromScalar(intrinsicopNode); ++ case INTRN_vector_merge_v16u8: ++ return cgFunc.SelectVectorMerge(intrinsicopNode); ++ case INTRN_vector_store_v4i32: ++ case INTRN_vector_store_v16u8: ++ return cgFunc.SelectVectorStore(intrinsicopNode); ++ case INTRN_vector_get_high_v2u64: ++ return cgFunc.SelectVectorGetHigh(intrinsicopNode); ++ case INTRN_vector_get_low_v2u64: ++ return cgFunc.SelectVectorGetLow(intrinsicopNode); ++ case INTRN_vector_get_element_v2u32: ++ case INTRN_vector_get_element_v4u32: ++ return cgFunc.SelectVectorGetElement(intrinsicopNode); ++ case INTRN_vector_pairwise_add_v8u16: ++ case INTRN_vector_pairwise_add_v4u32: ++ return cgFunc.SelectVectorPairwiseAdd(intrinsicopNode); ++ case INTRN_vector_set_element_v4u32: ++ return cgFunc.SelectVectorSetElement(intrinsicopNode); ++ case INTRN_vector_reverse_v16u8: ++ return cgFunc.SelectVectorReverse(intrinsicopNode, 32); + default: + ASSERT(false, "Should not reach here."); + return nullptr; diff --git a/Patches/cgfunc.h b/Patches/cgfunc.h new file mode 100644 index 0000000000000000000000000000000000000000..080947802f0f62fcdcebf1a68b070f935f83da83 --- /dev/null +++ b/Patches/cgfunc.h @@ -0,0 +1,45 @@ +diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h +index c1ed5141..11178972 100644 +--- a/src/mapleall/maple_be/include/cg/cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/cgfunc.h +@@ -259,6 +259,18 @@ class CGFunc { + virtual Operand *CreateZeroOperand(PrimType primType) = 0; + + virtual bool IsFrameReg(const RegOperand &opnd) const = 0; ++ ++ /* For Neon intrinsics */ ++ virtual RegOperand *SelectVectorFromScalar(IntrinsicopNode &intrnNode) = 0; ++ virtual Operand *SelectVectorStore(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorMerge(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorGetHigh(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorGetLow(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorGetElement(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorPairwiseAdd(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorSetElement(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorReverse(IntrinsicopNode &intrnNode, uint32 size) = 0; ++ + /* For ebo issue. */ + virtual Operand *GetTrueOpnd() { + return nullptr; +@@ -303,7 +315,7 @@ class CGFunc { + if (size < k4ByteSize) { + size = k4ByteSize; + } +- ASSERT(size == k4ByteSize || size == k8ByteSize, "check size"); ++ ASSERT(size == k4ByteSize || size == k8ByteSize || size == k16ByteSize, "check size"); + #endif + new (&vRegTable[vRegCount]) VirtualRegNode(regType, size); + return vRegCount++; +@@ -331,6 +343,12 @@ class CGFunc { + return kRegTyInt; + case PTY_f32: + case PTY_f64: ++ case PTY_v2u32: ++ case PTY_v4i32: ++ case PTY_v4u32: ++ case PTY_v16u8: ++ case PTY_v2u64: ++ case PTY_v8u16: + return kRegTyFloat; + default: + ASSERT(false, "Unexpected pty"); diff --git a/Patches/insn.h b/Patches/insn.h new file mode 100644 index 0000000000000000000000000000000000000000..8471123a35e49a4c1ae9f4b7ff824eb8aa05e718 --- /dev/null +++ b/Patches/insn.h @@ -0,0 +1,15 @@ +diff --git a/src/mapleall/maple_be/include/cg/insn.h b/src/mapleall/maple_be/include/cg/insn.h +index 90d17ac0..d5d5e0df 100644 +--- a/src/mapleall/maple_be/include/cg/insn.h ++++ b/src/mapleall/maple_be/include/cg/insn.h +@@ -361,6 +361,10 @@ class Insn { + return false; + } + ++ virtual bool IsVectorOp() const { ++ return false; ++ } ++ + virtual Operand *GetCallTargetOperand() const { + return nullptr; + } diff --git a/Patches/isa.h b/Patches/isa.h new file mode 100644 index 0000000000000000000000000000000000000000..df28b14f7f9edeab02779ebe4355c82afef1309e --- /dev/null +++ b/Patches/isa.h @@ -0,0 +1,20 @@ +diff --git a/src/mapleall/maple_be/include/cg/isa.h b/src/mapleall/maple_be/include/cg/isa.h +index b826464a..f04c87b1 100644 +--- a/src/mapleall/maple_be/include/cg/isa.h ++++ b/src/mapleall/maple_be/include/cg/isa.h +@@ -56,6 +56,7 @@ enum MopProperty : maple::uint8 { + kPropIsUnCondBr, + kPropIsCondBr, + kPropHasLoop, ++ kPropIsVectorOp, + }; + + #define ISMOVE (1ULL << kPropIsMove) +@@ -82,6 +83,7 @@ enum MopProperty : maple::uint8 { + #define ISUNCONDBRANCH (1ULL << kPropIsUnCondBr) + #define ISCONDBRANCH (1ULL << kPropIsCondBr) + #define HASLOOP (1ULL << kPropHasLoop) ++#define ISVECTOR (1ULL << kPropIsVectorOp) + + using regno_t = uint32_t; + diff --git a/Patches/lower.cpp b/Patches/lower.cpp new file mode 100644 index 0000000000000000000000000000000000000000..615316713ac03f74737dea0172997cbfda9add6a --- /dev/null +++ b/Patches/lower.cpp @@ -0,0 +1,14 @@ +diff --git a/src/mapleall/maple_be/src/be/lower.cpp b/src/mapleall/maple_be/src/be/lower.cpp +index 3187ec95..2a812481 100644 +--- a/src/mapleall/maple_be/src/be/lower.cpp ++++ b/src/mapleall/maple_be/src/be/lower.cpp +@@ -2897,6 +2897,9 @@ BaseNode *CGLowerer::LowerIntrinsicop(const BaseNode &parent, IntrinsicopNode &i + opnd->op == OP_conststr || opnd->op == OP_conststr16) ? 1 : 0; + return mirModule.GetMIRBuilder()->CreateIntConst(val, PTY_i32); + } ++ if (intrinDesc.IsVectorOp()) { ++ return &intrinNode; ++ } + CHECK_FATAL(false, "unexpected intrinsic type in CGLowerer::LowerIntrinsicop"); + return &intrinNode; + } diff --git a/Patches/m20.patches b/Patches/m20.patches new file mode 100644 index 0000000000000000000000000000000000000000..e8444af35b5edcf7456b4b4544a2e0c74a39c467 --- /dev/null +++ b/Patches/m20.patches @@ -0,0 +1,1119 @@ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h +index f9689e68..61a1606e 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cg.h +@@ -146,6 +146,7 @@ class AArch64CG : public CG { + kV64List + }; + static std::array, kIntRegTypeNum> intRegNames; ++ static std::array vectorRegNames; + + private: + const std::vector &ehExclusiveNameVec; +@@ -155,4 +156,4 @@ class AArch64CG : public CG { + }; + } /* namespace maplebe */ + +-#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H */ +\ No newline at end of file ++#endif /* MAPLEBE_INCLUDE_CG_AARCH64_AARCH64_CG_H */ +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +index c544a206..e7410750 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +@@ -229,6 +229,16 @@ class AArch64CGFunc : public CGFunc { + LabelOperand &CreateFuncLabelOperand(const MIRSymbol &func); + uint32 GetAggCopySize(uint32 offset1, uint32 offset2, uint32 alignment); + ++ RegOperand *SelectVectorFromScalar(IntrinsicopNode &intrnNode) override; ++ Operand *SelectVectorStore(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorMerge(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorGetHigh(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorGetLow(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorGetElement(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorPairwiseAdd(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorSetElement(IntrinsicopNode &intrnNode) override; ++ RegOperand *SelectVectorReverse(IntrinsicopNode &intrnNode, uint32 size) override; ++ + AArch64ImmOperand &CreateImmOperand(PrimType ptyp, int64 val) override { + return CreateImmOperand(val, GetPrimTypeBitSize(ptyp), IsSignedInteger(ptyp)); + } +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h +index 620158ea..0362bc5c 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_insn.h +@@ -124,6 +124,7 @@ class AArch64Insn : public Insn { + } + bool IsMemAccessBar() const override; + bool IsMemAccess() const override; ++ bool IsVectorOp() const final; + + Operand *GetCallTargetOperand() const override { + ASSERT(IsCall(), "should be call"); +@@ -184,6 +185,46 @@ class AArch64Insn : public Insn { + void EmitCounter(const CG&, Emitter&) const; + }; + ++struct VectorRegSpec { ++ VectorRegSpec() : vecLane(-1), vecLaneMax(0) {} ++ ++ int16 vecLane; /* -1 for whole reg, 0 to 15 to specify individual lane */ ++ uint16 vecLaneMax; /* Maximum number of lanes for this vregister */ ++}; ++ ++class AArch64VectorInsn : public AArch64Insn { ++ public: ++ AArch64VectorInsn(MemPool &memPool, MOperator opc) ++ : AArch64Insn(memPool, opc), ++ regSpecList(localAlloc.Adapter()) { ++ regSpecList.clear(); ++ } ++ ++ ~AArch64VectorInsn() override = default; ++ ++ void ClearRegSpecList() { ++ regSpecList.clear(); ++ } ++ ++ VectorRegSpec *GetAndRemoveRegSpecFromList() { ++ //ASSERT(regSpecList.size() > 0, "regSpecList empty"); ++ if (regSpecList.size() == 0) { ++ VectorRegSpec *vecSpec = CG::GetCurCGFuncNoConst()->GetMemoryPool()->New(); ++ return vecSpec; ++ } ++ VectorRegSpec *ret = regSpecList.back(); ++ regSpecList.pop_back(); ++ return ret; ++ } ++ ++ void PushRegSpecEntry(VectorRegSpec *v) { ++ regSpecList.emplace(regSpecList.begin(), v); /* add at front */ ++ } ++ ++ private: ++ MapleVector regSpecList; ++}; ++ + class AArch64cleancallInsn : public AArch64Insn { + public: + AArch64cleancallInsn(MemPool &memPool, MOperator opc) +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h +index e1e5fd7f..a1b3a603 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_isa.h +@@ -166,6 +166,7 @@ constexpr uint32 kLiteralLow12 = kMemLow12; + constexpr uint32 kPreInc = 0x20; + constexpr uint32 kPostInc = 0x40; + constexpr uint32 kLoadLiteral = 0x80; ++constexpr uint32 kVector = 0x100; + + class RegProp { + public: +@@ -242,6 +243,10 @@ class AArch64OpndProp : public OpndProp { + return static_cast(size); + } + ++ bool IsVectorOperand() const { ++ return regProp.GetDefUse() & kVector; ++ } ++ + void SetContainImm() { + isContainImm = true; + } +@@ -396,6 +401,10 @@ struct AArch64MD { + return properties & ISPARTDEF; + } + ++ bool IsVectorOp() const { ++ return properties & ISVECTOR; ++ } ++ + LatencyType GetLatencyType() const { + return latencyType; + } +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +index af949c9b..b9c5b94d 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_md.def +@@ -416,6 +416,8 @@ DEFINE_MOP(MOP_hldr, {mopdReg16FD,mopdMem16S},ISLOAD|CANTHROW,kLtFLoad64,"ldr"," + DEFINE_MOP(MOP_sldr, {mopdReg32FD,mopdMem32S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) + /* MOP_dldr */ + DEFINE_MOP(MOP_dldr, {mopdReg64FD,mopdMem64S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) ++/* MOP_qldr */ ++DEFINE_MOP(MOP_qldr, {mopdReg128VD,mopdMem128S},ISLOAD|CANTHROW,kLtFLoadMany,"ldr","0,1",1) + + /* AArch64 LDP/LDPSW */ + /* MOP_wldp */ +@@ -604,6 +606,8 @@ DEFINE_MOP(MOP_xstr, {mopdReg64IS,mopdMem64D},ISSTORE|CANTHROW,kLtStore2,"str"," + DEFINE_MOP(MOP_sstr, {mopdReg32FS,mopdMem32D},ISSTORE|CANTHROW,kLtStore2,"str","0,1",1) + /* MOP_dstr -- Store Register SIMD/FP Double */ + DEFINE_MOP(MOP_dstr, {mopdReg64FS,mopdMem64D},ISSTORE|CANTHROW,kLtStore3plus,"str","0,1",1) ++/* MOP_qstr -- Store Register SIMD/FP Double */ ++DEFINE_MOP(MOP_qstr, {mopdReg128VS,mopdMem128D},ISSTORE|CANTHROW,kLtStore3plus,"str","0,1",1) + + /* AArch64 STP. */ + /* MOP_wstp */ +@@ -648,6 +652,21 @@ DEFINE_MOP(MOP_dmb_ishst, {}, HASRELEASE|ISDMB,kLtBranch, "dmb\tishst", "",1) + /* MOP_dmb_ish */ + DEFINE_MOP(MOP_dmb_ish, {}, HASACQUIRE|HASRELEASE|ISDMB,kLtBranch, "dmb\tish", "",1) + ++/* Neon simd, r-nonvector reg, u-64b vector reg, v-128b vector reg */ ++DEFINE_MOP(MOP_vmovvi, {mopdReg128VD,mopdImm8},ISMOVE|ISVECTOR,kLtFpalu,"movi","0,1",1) ++DEFINE_MOP(MOP_vdupvr, {mopdReg128VD,mopdReg32IS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) ++DEFINE_MOP(MOP_vduprv, {mopdReg64FD,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"dup","0,1",1) ++DEFINE_MOP(MOP_vextvvv, {mopdReg128VD,mopdReg128VS,mopdReg128VS,mopdImm8},ISVECTOR,kLtFpalu,"ext","0,1,2,3",1) ++DEFINE_MOP(MOP_vpaddvv, {mopdReg128VD,mopdReg128VS},ISVECTOR,kLtAlu,"uaddlp","0,1",1) ++DEFINE_MOP(MOP_vwmovru, {mopdReg32ID,mopdReg64VS},ISMOVE|ISVECTOR,kLtFpalu,"umov","0,1",1) ++DEFINE_MOP(MOP_vwmovrv, {mopdReg32ID,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"umov","0,1",1) ++DEFINE_MOP(MOP_vinsvr, {mopdReg128VDS,mopdReg32IS},ISVECTOR,kLtFpalu,"ins","0,1",1) ++DEFINE_MOP(MOP_vmovuu, {mopdReg64VD,mopdReg64VS},ISMOVE|ISVECTOR,kLtFpalu,"mov","0,1",1) ++DEFINE_MOP(MOP_vmovvv, {mopdReg128VD,mopdReg128VS},ISMOVE|ISVECTOR,kLtFpalu,"mov","0,1",1) ++DEFINE_MOP(MOP_vrev16vv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"rev16","0,1",1) ++DEFINE_MOP(MOP_vrev32vv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"rev32","0,1",1) ++DEFINE_MOP(MOP_vrev64vv,{mopdReg128VD,mopdReg128VS},ISVECTOR,kLtFpalu,"rev64","0,1",1) ++ + /* + * MOP_clinit + * will be emit to four instructions in a row: +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h +index a8fbf05a..6d79c939 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_operand.h +@@ -89,6 +89,23 @@ class AArch64RegOperand : public RegOperand { + return memPool.Clone(*this); + } + ++ void SetVecLanePosition(int32 pos) { ++ vecLane = pos; ++ } ++ ++ int32 GetVecLanePosition() const { ++ return vecLane; ++ } ++ ++ void SetVecLaneSize(uint32 size) { ++ vecLaneSize = size; ++ } ++ ++ uint32 GetVecLaneSize() const { ++ return vecLaneSize; ++ } ++ ++ + bool operator==(const AArch64RegOperand &opnd) const; + + bool operator<(const AArch64RegOperand &opnd) const; +@@ -110,8 +127,11 @@ class AArch64RegOperand : public RegOperand { + private: + static AArch64RegOperand zero64; + static AArch64RegOperand zero32; ++ + bool isRefField = false; + uint32 flag; ++ int16 vecLane = -1; /* -1 for whole reg, 0 to 15 to specify each lane one at a time */ ++ uint16 vecLaneSize = 0; /* Number of lanes */ + }; + + /* +@@ -664,15 +684,16 @@ class AArch64MemOperand : public MemOperand { + /* Returns N where alignment == 2^N */ + static int32 GetImmediateOffsetAlignment(uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + /* dSize==8: 0, dSize==16 : 1, dSize==32: 2, dSize==64: 3 */ + return __builtin_ctz(dSize) - kBaseOffsetAlignment; + } + + static int32 GetMaxPIMM(uint32 dSize) { ++ dSize = dSize > k64BitSize ? k64BitSize : dSize; + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + int32 alignment = GetImmediateOffsetAlignment(dSize); + /* alignment is between kAlignmentOf8Bit and kAlignmentOf64Bit */ +@@ -683,7 +704,7 @@ class AArch64MemOperand : public MemOperand { + + bool IsOffsetMisaligned(uint32 dSize) const { + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + if (dSize == k8BitSize || addrMode != kAddrModeBOi) { + return false; +@@ -705,7 +726,7 @@ class AArch64MemOperand : public MemOperand { + + static bool IsPIMMOffsetOutOfRange(int32 offset, uint32 dSize) { + ASSERT(dSize >= k8BitSize, "error val:dSize"); +- ASSERT(dSize <= k64BitSize, "error val:dSize"); ++ ASSERT(dSize <= k128BitSize, "error val:dSize"); + ASSERT((dSize & (dSize - 1)) == 0, "error val:dSize"); + return (offset < 0 || offset > GetMaxPIMM(dSize)); + } +diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h +index c1ed5141..11178972 100644 +--- a/src/mapleall/maple_be/include/cg/cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/cgfunc.h +@@ -259,6 +259,18 @@ class CGFunc { + virtual Operand *CreateZeroOperand(PrimType primType) = 0; + + virtual bool IsFrameReg(const RegOperand &opnd) const = 0; ++ ++ /* For Neon intrinsics */ ++ virtual RegOperand *SelectVectorFromScalar(IntrinsicopNode &intrnNode) = 0; ++ virtual Operand *SelectVectorStore(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorMerge(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorGetHigh(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorGetLow(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorGetElement(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorPairwiseAdd(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorSetElement(IntrinsicopNode &intrnNode) = 0; ++ virtual RegOperand *SelectVectorReverse(IntrinsicopNode &intrnNode, uint32 size) = 0; ++ + /* For ebo issue. */ + virtual Operand *GetTrueOpnd() { + return nullptr; +@@ -303,7 +315,7 @@ class CGFunc { + if (size < k4ByteSize) { + size = k4ByteSize; + } +- ASSERT(size == k4ByteSize || size == k8ByteSize, "check size"); ++ ASSERT(size == k4ByteSize || size == k8ByteSize || size == k16ByteSize, "check size"); + #endif + new (&vRegTable[vRegCount]) VirtualRegNode(regType, size); + return vRegCount++; +@@ -331,6 +343,12 @@ class CGFunc { + return kRegTyInt; + case PTY_f32: + case PTY_f64: ++ case PTY_v2u32: ++ case PTY_v4i32: ++ case PTY_v4u32: ++ case PTY_v16u8: ++ case PTY_v2u64: ++ case PTY_v8u16: + return kRegTyFloat; + default: + ASSERT(false, "Unexpected pty"); +diff --git a/src/mapleall/maple_be/include/cg/insn.h b/src/mapleall/maple_be/include/cg/insn.h +index 90d17ac0..d5d5e0df 100644 +--- a/src/mapleall/maple_be/include/cg/insn.h ++++ b/src/mapleall/maple_be/include/cg/insn.h +@@ -361,6 +361,10 @@ class Insn { + return false; + } + ++ virtual bool IsVectorOp() const { ++ return false; ++ } ++ + virtual Operand *GetCallTargetOperand() const { + return nullptr; + } +diff --git a/src/mapleall/maple_be/include/cg/isa.h b/src/mapleall/maple_be/include/cg/isa.h +index b826464a..f04c87b1 100644 +--- a/src/mapleall/maple_be/include/cg/isa.h ++++ b/src/mapleall/maple_be/include/cg/isa.h +@@ -56,6 +56,7 @@ enum MopProperty : maple::uint8 { + kPropIsUnCondBr, + kPropIsCondBr, + kPropHasLoop, ++ kPropIsVectorOp, + }; + + #define ISMOVE (1ULL << kPropIsMove) +@@ -82,6 +83,7 @@ enum MopProperty : maple::uint8 { + #define ISUNCONDBRANCH (1ULL << kPropIsUnCondBr) + #define ISCONDBRANCH (1ULL << kPropIsCondBr) + #define HASLOOP (1ULL << kPropHasLoop) ++#define ISVECTOR (1ULL << kPropIsVectorOp) + + using regno_t = uint32_t; + +diff --git a/src/mapleall/maple_be/include/cg/memlayout.h b/src/mapleall/maple_be/include/cg/memlayout.h +index 018150e8..616a7761 100644 +--- a/src/mapleall/maple_be/include/cg/memlayout.h ++++ b/src/mapleall/maple_be/include/cg/memlayout.h +@@ -73,7 +73,7 @@ class CGFunc; + /* keeps track of the allocation of a memory segment */ + class MemSegment { + public: +- explicit MemSegment(MemSegmentKind memSegKind) : kind(memSegKind), size(0) {} ++ explicit MemSegment(MemSegmentKind memSegKind) : kind(memSegKind), size(0), containVector(false) {} + + ~MemSegment() = default; + +@@ -89,9 +89,18 @@ class MemSegment { + return kind; + } + ++ void SetContainVector() { ++ containVector = true; ++ } ++ ++ bool ContainVector() const { ++ return containVector; ++ } ++ + private: + MemSegmentKind kind; + int32 size; /* size is negative if allocated offsets are negative */ ++ bool containVector; + }; /* class MemSegment */ + + /* describes where a symbol is allocated */ +diff --git a/src/mapleall/maple_be/src/be/lower.cpp b/src/mapleall/maple_be/src/be/lower.cpp +index 3187ec95..2a812481 100644 +--- a/src/mapleall/maple_be/src/be/lower.cpp ++++ b/src/mapleall/maple_be/src/be/lower.cpp +@@ -2897,6 +2897,9 @@ BaseNode *CGLowerer::LowerIntrinsicop(const BaseNode &parent, IntrinsicopNode &i + opnd->op == OP_conststr || opnd->op == OP_conststr16) ? 1 : 0; + return mirModule.GetMIRBuilder()->CreateIntConst(val, PTY_i32); + } ++ if (intrinDesc.IsVectorOp()) { ++ return &intrinNode; ++ } + CHECK_FATAL(false, "unexpected intrinsic type in CGLowerer::LowerIntrinsicop"); + return &intrinNode; + } +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp +index 11db2be0..408af8f1 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_abi.cpp +@@ -451,6 +451,13 @@ int32 ParmLocator::LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst + case PTY_f32: + case PTY_f64: + case PTY_c64: ++ case PTY_v2i32: ++ case PTY_v4i16: ++ case PTY_v8i8: ++ case PTY_v2u32: ++ case PTY_v4u16: ++ case PTY_v8u8: ++ case PTY_v2f32: + /* Rule C.1 */ + ASSERT(GetPrimTypeSize(PTY_f64) == k8ByteSize, "unexpected type size"); + typeSize = k8ByteSize; +@@ -462,6 +469,16 @@ int32 ParmLocator::LocateNextParm(MIRType &mirType, PLocInfo &pLoc, bool isFirst + * - callees de-marshall one f128 value into the real and the imaginery part + */ + case PTY_c128: ++ case PTY_v2i64: ++ case PTY_v4i32: ++ case PTY_v8i16: ++ case PTY_v16i8: ++ case PTY_v2u64: ++ case PTY_v4u32: ++ case PTY_v8u16: ++ case PTY_v16u8: ++ case PTY_v2f64: ++ case PTY_v4f32: + /* SIMD-FP registers have 128-bits. */ + pLoc.reg0 = AllocateSIMDFPRegister(); + ASSERT(nextFloatRegNO <= AArch64Abi::kNumFloatParmRegs, "regNO should not be greater than kNumFloatParmRegs"); +@@ -622,6 +639,13 @@ ReturnMechanism::ReturnMechanism(MIRType &retTy, const BECommon &be) + case PTY_f32: + case PTY_f64: + case PTY_c64: ++ case PTY_v2i32: ++ case PTY_v4i16: ++ case PTY_v8i8: ++ case PTY_v2u32: ++ case PTY_v4u16: ++ case PTY_v8u8: ++ case PTY_v2f32: + + /* + * for c128 complex numbers, we assume +@@ -629,6 +653,16 @@ ReturnMechanism::ReturnMechanism(MIRType &retTy, const BECommon &be) + * - callees de-marshall one f128 value into the real and the imaginery part + */ + case PTY_c128: ++ case PTY_v2i64: ++ case PTY_v4i32: ++ case PTY_v8i16: ++ case PTY_v16i8: ++ case PTY_v2u64: ++ case PTY_v4u32: ++ case PTY_v8u16: ++ case PTY_v16u8: ++ case PTY_v2f64: ++ case PTY_v4f32: + regCount = 1; + reg0 = AArch64Abi::floatReturnRegs[0]; + primTypeOfReg0 = pType; +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp +index e3d3eb27..464f61c6 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_args.cpp +@@ -92,7 +92,10 @@ ArgInfo AArch64MoveRegArgs::GetArgInfo(std::map &argsList, s + argInfo.createTwoStores = false; + argInfo.isTwoRegParm = false; + +- if ((argInfo.symSize > k8ByteSize) && (argInfo.symSize <= k16ByteSize)) { ++ if (GetPrimTypeLanes(argInfo.mirTy->GetPrimType()) > 0) { ++ /* vector type */ ++ argInfo.stkSize = argInfo.symSize; ++ } else if ((argInfo.symSize > k8ByteSize) && (argInfo.symSize <= k16ByteSize)) { + argInfo.isTwoRegParm = true; + if (numFpRegs[argIndex] > kOneRegister) { + argInfo.symSize = argInfo.stkSize = fpSize[argIndex]; +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp +index ac6050ce..b69775ab 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cg.cpp +@@ -58,11 +58,19 @@ std::array, kIntRegTypeNum> AArch64CG: + "err", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", + "sp", "xzr", /* x29 is fp */ +- "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", +- "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", ++ "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", ++ "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31", + "errMaxRegNum", "rflag" } + }; + ++std::array AArch64CG::vectorRegNames = { ++ "err", "err0", "err1", "err2", "err3", "err4", "err5", "err6", "err7", "err8", "err9", "err10", ++ "err11", "err12", "err13", "err14", "err15", "err16", "err17", "err18", "err19", "err20", "err21", "err22", ++ "err23", "err24", "err25", "err26", "err27", "err28", "err29", "err30", "errsp", "errzr", /* x29 is fp */ ++ "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", ++ "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", ++ "errMaxRegNum", "rflag" }; ++ + bool AArch64CG::IsExclusiveFunc(MIRFunction &mirFunc) { + const std::string &funcName = mirFunc.GetName(); + for (const auto &it : ehExclusiveNameVec) { +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index 600c89a6..64349f11 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -37,7 +37,7 @@ CondOperand AArch64CGFunc::ccOperands[kCcLast] = { + namespace { + constexpr int32 kSignedDimension = 2; /* signed and unsigned */ + constexpr int32 kIntByteSizeDimension = 4; /* 1 byte, 2 byte, 4 bytes, 8 bytes */ +-constexpr int32 kFloatByteSizeDimension = 2; /* 4 bytes, 8 bytes */ ++constexpr int32 kFloatByteSizeDimension = 3; /* 4 bytes, 8 bytes, 16 bytes(vector) */ + constexpr int32 kShiftAmount12 = 12; /* for instruction that can use shift, shift amount must be 0 or 12 */ + + MOperator ldIs[kSignedDimension][kIntByteSizeDimension] = { +@@ -68,11 +68,11 @@ MOperator stIsRel[kSignedDimension][kIntByteSizeDimension] = { + { MOP_wstlrb, MOP_wstlrh, MOP_wstlr, MOP_xstlr } + }; + +-MOperator ldFs[kFloatByteSizeDimension] = { MOP_sldr, MOP_dldr }; +-MOperator stFs[kFloatByteSizeDimension] = { MOP_sstr, MOP_dstr }; ++MOperator ldFs[kFloatByteSizeDimension] = { MOP_sldr, MOP_dldr, MOP_qldr }; ++MOperator stFs[kFloatByteSizeDimension] = { MOP_sstr, MOP_dstr, MOP_qstr }; + +-MOperator ldFsAcq[kFloatByteSizeDimension] = { MOP_undef, MOP_undef }; +-MOperator stFsRel[kFloatByteSizeDimension] = { MOP_undef, MOP_undef }; ++MOperator ldFsAcq[kFloatByteSizeDimension] = { MOP_undef, MOP_undef, MOP_undef }; ++MOperator stFsRel[kFloatByteSizeDimension] = { MOP_undef, MOP_undef, MOP_undef }; + + /* extended to unsigned ints */ + MOperator uextIs[kIntByteSizeDimension][kIntByteSizeDimension] = { +@@ -107,7 +107,7 @@ MOperator PickLdStInsn(bool isLoad, uint32 bitSize, PrimType primType, AArch64is + } + + /* __builtin_ffs(x) returns: 0 -> 0, 1 -> 1, 2 -> 2, 4 -> 3, 8 -> 4 */ +- if (IsPrimitiveInteger(primType) || primType == PTY_agg) { ++ if ((IsPrimitiveInteger(primType) || primType == PTY_agg) && !IsPrimitiveVector(primType)) { + MOperator(*table)[kIntByteSizeDimension]; + if (isLoad) { + table = (memOrd == AArch64isa::kMoAcquire) ? ldIsAcq : ldIs; +@@ -134,9 +134,9 @@ MOperator PickLdStInsn(bool isLoad, uint32 bitSize, PrimType primType, AArch64is + table = (memOrd == AArch64isa::kMoRelease) ? stFsRel : stFs; + } + +- /* __builtin_ffs(x) returns: 32 -> 6, 64 -> 7 */ ++ /* __builtin_ffs(x) returns: 32 -> 6, 64 -> 7, 128 -> 8 */ + uint32 size = static_cast(__builtin_ffs(static_cast(bitSize))) - 6; +- ASSERT(size <= 1, "size must be 0 or 1"); ++ ASSERT(size <= 2, "size must be 0 to 2"); + return table[size]; + } + } +@@ -214,12 +214,12 @@ MOperator AArch64CGFunc::PickMovInsn(RegOperand &lhs, RegOperand &rhs) { + } + + MOperator AArch64CGFunc::PickMovInsn(uint32 bitLen, RegType regType) { +- ASSERT((bitLen == k32BitSize) || (bitLen == k64BitSize), "size check"); ++ ASSERT((bitLen == k32BitSize) || (bitLen == k64BitSize || bitLen == k128BitSize), "size check"); + ASSERT((regType == kRegTyInt) || (regType == kRegTyFloat), "type check"); + if (regType == kRegTyInt) { + return (bitLen == k32BitSize) ? MOP_wmovrr : MOP_xmovrr; + } +- return (bitLen == k32BitSize) ? MOP_xvmovs : MOP_xvmovd; ++ return bitLen == k128BitSize ? MOP_vmovvv : ((bitLen == k32BitSize) ? MOP_xvmovs : MOP_xvmovd); + } + + void AArch64CGFunc::SelectLoadAcquire(Operand &dest, PrimType dtype, Operand &src, PrimType stype, +@@ -470,7 +470,7 @@ void AArch64CGFunc::SelectCopyMemOpnd(Operand &dest, PrimType dtype, uint32 dsiz + PrimType regTy = PTY_void; + RegOperand *loadReg = nullptr; + MOperator mop = MOP_undef; +- if (IsPrimitiveFloat(stype)) { ++ if (IsPrimitiveFloat(stype) || IsPrimitiveVector(stype)) { + CHECK_FATAL(dsize == ssize, "dsize %u expect equals ssize %u", dtype, ssize); + insn = &GetCG()->BuildInstruction(PickLdInsn(ssize, stype), dest, src); + } else { +@@ -710,6 +710,19 @@ void AArch64CGFunc::SelectCopy(Operand &dest, PrimType dtype, Operand &src, Prim + dest, AArch64RegOperand::GetZeroRegister(dsize))); + break; + case Operand::kOpdRegister: ++ if (opnd0Type == Operand::kOpdRegister && GetPrimTypeLanes(stype) > 0) { ++ /* check vector reg to vector reg move */ ++ CHECK_FATAL(GetPrimTypeLanes(dtype) > 0, "invalid vectreg to vectreg move"); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = k16BitSize; ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = k16BitSize; ++ Insn *insn = &GetCG()->BuildInstruction(dsize <= k64BitSize ? MOP_vmovuu : MOP_vmovvv, dest, src); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ break; ++ } + SelectCopyRegOpnd(dest, dtype, opnd0Type, dsize, src, stype); + break; + default: +@@ -739,7 +752,7 @@ RegOperand &AArch64CGFunc::SelectCopy(Operand &src, PrimType stype, PrimType dty + */ + bool AArch64CGFunc::IsImmediateOffsetOutOfRange(AArch64MemOperand &memOpnd, uint32 bitLen) { + ASSERT(bitLen >= k8BitSize, "bitlen error"); +- ASSERT(bitLen <= k64BitSize, "bitlen error"); ++ ASSERT(bitLen <= k128BitSize, "bitlen error"); + + if (bitLen >= k8BitSize) { + bitLen = RoundUp(bitLen, k8BitSize); +@@ -973,6 +986,11 @@ void AArch64CGFunc::SelectDassign(StIdx stIdx, FieldID fieldId, PrimType rhsPTyp + AArch64MemOperand &archMemOperand = *static_cast(memOpnd); + if ((memOpnd->GetMemVaryType() == kNotVary) && IsImmediateOffsetOutOfRange(archMemOperand, dataSize)) { + memOpnd = &SplitOffsetWithAddInstruction(archMemOperand, dataSize); ++#if 0 ++ uint32 dSize = dataSize > k64BitSize ? k64BitSize : dataSize; /* TBD */ ++ if ((memOpnd->GetMemVaryType() == kNotVary) && IsImmediateOffsetOutOfRange(archMemOperand, dSize)) { ++ memOpnd = &SplitOffsetWithAddInstruction(archMemOperand, dSize); ++#endif + } + + /* In bpl mode, a func symbol's type is represented as a MIRFuncType instead of a MIRPtrType (pointing to +@@ -1392,6 +1410,9 @@ void AArch64CGFunc::SelectIassign(IassignNode &stmt) { + if (destType == PTY_agg) { + destType = PTY_a64; + } ++ if (GetPrimTypeLanes(styp) != 0) { /* a vector type */ ++ destType = styp; ++ } + ASSERT(stmt.Opnd(0) != nullptr, "null ptr check"); + MemOperand &memOpnd = CreateMemOpnd(destType, stmt, *stmt.Opnd(0), offset); + if (isVolStore && static_cast(memOpnd).GetAddrMode() == AArch64MemOperand::kAddrModeBOi) { +@@ -4766,12 +4787,13 @@ Operand &AArch64CGFunc::GetTargetRetOperand(PrimType primType, int32 sReg) { + uint32 bitSize = GetPrimTypeBitSize(primType) < k32BitSize ? k32BitSize : GetPrimTypeBitSize(primType); + AArch64reg pReg; + if (sReg < 0) { +- return GetOrCreatePhysicalRegisterOperand(IsPrimitiveFloat(primType) ? S0 : R0, bitSize, +- GetRegTyFromPrimTy(primType)); ++ return GetOrCreatePhysicalRegisterOperand( ++ IsPrimitiveFloat(primType) || (GetPrimTypeLanes(primType) > 0) ? S0 : R0, ++ bitSize, GetRegTyFromPrimTy(primType)); + } else { + switch (sReg) { + case kSregRetval0: +- pReg = IsPrimitiveFloat(primType) ? S0 : R0; ++ pReg = IsPrimitiveFloat(primType) || (GetPrimTypeLanes(primType) > 0) ? S0 : R0; + break; + case kSregRetval1: + pReg = R1; +@@ -6494,7 +6516,7 @@ AArch64RegOperand &AArch64CGFunc::GetOrCreatePhysicalRegisterOperand(AArch64reg + size = k32BitSize; + aarch64PhyRegIdx = aarch64PhyRegIdx << 1; + } else { +- size = k64BitSize; ++ size = size == k128BitSize ? k128BitSize : k64BitSize; + aarch64PhyRegIdx = (aarch64PhyRegIdx << 1) + 1; + } + ASSERT(aarch64PhyRegIdx < k256BitSize, "phyRegOperandTable index out of range"); +@@ -6960,6 +6982,9 @@ int32 AArch64CGFunc::GetBaseOffset(const SymbolAlloc &sa) { + return offset; + } else if (sgKind == kMsArgsRegPassed) { + int32 baseOffset = memLayout->GetSizeOfLocals() + symAlloc->GetOffset() + memLayout->GetSizeOfRefLocals(); ++ if (symAlloc->GetMemSegment()->ContainVector()) { ++ baseOffset = RoundUp(baseOffset, k16ByteSize); ++ } + return baseOffset + sizeofFplr; + } else if (sgKind == kMsRefLocals) { + int32 baseOffset = symAlloc->GetOffset() + memLayout->GetSizeOfLocals(); +@@ -7927,4 +7952,203 @@ void AArch64CGFunc::InsertJumpPad(Insn *insn) { + fallthruBB->PushBackPreds(*brBB); + } + ++RegOperand *AArch64CGFunc::SelectVectorFromScalar(IntrinsicopNode &intrnNode) { ++ PrimType pType = intrnNode.GetPrimType(); ++ ++ RegOperand *res = &CreateRegisterOperandOfType(pType); ++ BaseNode *argExpr = intrnNode.Opnd(0); ++ Operand *opnd0 = HandleExpr(intrnNode, *argExpr); ++ VectorRegSpec *vecSpec = GetMemoryPool()->New(); ++ vecSpec->vecLaneMax = GetPrimTypeLanes(pType); ++ ++ Operand *reg = opnd0; ++ if (opnd0->IsConstImmediate()) { ++ ImmOperand *immOpnd = static_cast(opnd0); ++ ConstvalNode *constvalNode = static_cast(argExpr); ++ MIRConst *mirConst = constvalNode->GetConstVal(); ++ int32 val = safe_cast(mirConst)->GetValue(); ++ if (val >= -128 && val <= 255) { ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vmovvi, *res, *immOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpec); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++ } ++ reg = &CreateRegisterOperandOfType(argExpr->GetPrimType()); ++ SelectCopyImm(*reg, *immOpnd, argExpr->GetPrimType()); ++ } ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vdupvr, *res, *reg); ++ static_cast(insn)->PushRegSpecEntry(vecSpec); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++Operand *AArch64CGFunc::SelectVectorStore(IntrinsicopNode &intrnNode) { ++ BaseNode &argExpr = *intrnNode.Opnd(1); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ ++ PrimType dType = argExpr.GetPrimType(); ++ MemOperand &memOpnd = CreateMemOpnd(dType, intrnNode, *intrnNode.Opnd(0), 0); ++ SelectCopy(memOpnd, dType, srcOpnd, dType); ++ return &srcOpnd; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorGetHigh(IntrinsicopNode &intrnNode) { ++ BaseNode &argExpr = *intrnNode.Opnd(0); ++ PrimType srcType = argExpr.GetPrimType(); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ vecSpecSrc->vecLane = 1; ++ ++ PrimType resType = intrnNode.GetPrimType(); ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vduprv, *res, srcOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorGetLow(IntrinsicopNode &intrnNode) { ++ BaseNode &argExpr = *intrnNode.Opnd(0); ++ PrimType srcType = argExpr.GetPrimType(); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ vecSpecSrc->vecLane = 0; ++ ++ PrimType resType = intrnNode.GetPrimType(); ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vduprv, *res, srcOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorGetElement(IntrinsicopNode &intrnNode) { ++ PrimType resType = intrnNode.GetPrimType(); /* uint32_t result */ ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ ++ BaseNode *argExpr = intrnNode.Opnd(0); /* vector operand */ ++ Operand *opndSrc = HandleExpr(intrnNode, *argExpr); ++ PrimType srcType = argExpr->GetPrimType(); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ ++ BaseNode *argLane = intrnNode.Opnd(1); /* lane const operand */ ++ Operand *opndLane = HandleExpr(intrnNode, *argLane); ++ if (opndLane->IsConstImmediate()) { ++ ConstvalNode *constvalNode = static_cast(argLane); ++ MIRConst *mirConst = constvalNode->GetConstVal(); ++ vecSpecSrc->vecLane = safe_cast(mirConst)->GetValue(); ++ } else { ++ CHECK_FATAL(0, "VectorGetElement does not have lane const"); ++ } ++ ++ MOperator mop = GetPrimTypeBitSize(srcType) <= k64BitSize ? MOP_vwmovru : MOP_vwmovrv; ++ Insn *insn = &GetCG()->BuildInstruction(mop, *res, *opndSrc); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorPairwiseAdd(IntrinsicopNode &intrnNode) { ++ PrimType resType = intrnNode.GetPrimType(); ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = GetPrimTypeLanes(resType); ++ ++ BaseNode *argExpr = intrnNode.Opnd(0); ++ Operand *opnd = HandleExpr(intrnNode, *argExpr); ++ PrimType srcType = argExpr->GetPrimType(); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vpaddvv, *res, *opnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); /* dest pushed first, popped first */ ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorSetElement(IntrinsicopNode &intrnNode) { ++ BaseNode *arg0 = intrnNode.Opnd(0); /* uint32_t operand */ ++ Operand *opnd0 = HandleExpr(intrnNode, *arg0); ++ ASSERT(GetPrimTypeBitSize(arg0->GetPrimType()) <= k32BitSize, "VectorSetElement: invalid opnd0"); ++ ++ BaseNode *arg1 = intrnNode.Opnd(1); /* vector operand == result */ ++ Operand *opnd1 = HandleExpr(intrnNode, *arg1); ++ PrimType vType = arg1->GetPrimType(); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(vType); ++ ++ BaseNode *arg2 = intrnNode.Opnd(2); /* lane const operand */ ++ Operand *opnd2 = HandleExpr(intrnNode, *arg2); ++ if (opnd2->IsConstImmediate()) { ++ ConstvalNode *constvalNode = static_cast(arg2); ++ MIRConst *mirConst = constvalNode->GetConstVal(); ++ vecSpecSrc->vecLane = safe_cast(mirConst)->GetValue(); ++ } else { ++ CHECK_FATAL(0, "VectorSetElement does not have lane const"); ++ } ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vinsvr, *opnd1, *opnd0); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return static_cast(opnd1); ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorMerge(IntrinsicopNode &intrnNode) { ++ PrimType resType = intrnNode.GetPrimType(); /* result operand */ ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = GetPrimTypeLanes(resType); ++ ++ BaseNode *arg1 = intrnNode.Opnd(0); /* vector operand1 */ ++ Operand *opnd1 = HandleExpr(intrnNode, *arg1); ++ PrimType o1Type = arg1->GetPrimType(); ++ VectorRegSpec *vecSpecOpd1 = GetMemoryPool()->New(); ++ vecSpecOpd1->vecLaneMax = GetPrimTypeLanes(o1Type); ++ ++ BaseNode *arg2 = intrnNode.Opnd(1); /* vector operand2 */ ++ Operand *opnd2 = HandleExpr(intrnNode, *arg1); ++ PrimType o2Type = arg2->GetPrimType(); ++ VectorRegSpec *vecSpecOpd2 = GetMemoryPool()->New(); ++ vecSpecOpd2->vecLaneMax = GetPrimTypeLanes(o2Type); ++ ++ BaseNode *arg3 = intrnNode.Opnd(2); /* lane const operand */ ++ Operand *opnd3 = HandleExpr(intrnNode, *arg3); ++ if (!opnd3->IsConstImmediate()) { ++ CHECK_FATAL(0, "VectorSetElement does not have lane const"); ++ } ++ ++ Insn *insn = &GetCG()->BuildInstruction(MOP_vextvvv, *res, *opnd1, *opnd2, *opnd3); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); ++ static_cast(insn)->PushRegSpecEntry(vecSpecOpd1); ++ static_cast(insn)->PushRegSpecEntry(vecSpecOpd2); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ ++RegOperand *AArch64CGFunc::SelectVectorReverse(IntrinsicopNode &intrnNode, uint32 size) { ++ PrimType resType = intrnNode.GetPrimType(); /* result operand */ ++ RegOperand *res = &CreateRegisterOperandOfType(resType); ++ VectorRegSpec *vecSpecDest = GetMemoryPool()->New(); ++ vecSpecDest->vecLaneMax = GetPrimTypeLanes(resType); ++ ++ BaseNode &argExpr = *intrnNode.Opnd(0); ++ PrimType srcType = argExpr.GetPrimType(); ++ Operand &srcOpnd = *HandleExpr(intrnNode, argExpr); ++ VectorRegSpec *vecSpecSrc = GetMemoryPool()->New(); ++ vecSpecSrc->vecLaneMax = GetPrimTypeLanes(srcType); ++ ++ MOperator mOp = size >= 64 ? MOP_vrev64vv : (size >= 32 ? MOP_vrev32vv : MOP_vrev16vv); ++ Insn *insn = &GetCG()->BuildInstruction(mOp, *res, srcOpnd); ++ static_cast(insn)->PushRegSpecEntry(vecSpecDest); ++ static_cast(insn)->PushRegSpecEntry(vecSpecSrc); ++ GetCurBB()->AppendInsn(*insn); ++ return res; ++} ++ + } /* namespace maplebe */ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp +index a6d60424..90ec93b5 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp +@@ -1075,6 +1075,18 @@ void AArch64Insn::Emit(const CG &cg, Emitter &emitter) const { + emitter.Emit(nameOpnd->GetName() + emitter.HugeSoPostFix()); + break; + } ++ AArch64RegOperand *regOpnd = static_cast(opnds[seq[i]]); ++ if (regOpnd != nullptr && static_cast(md->operand[seq[i]])->IsVectorOperand()) { ++ regOpnd->SetVecLanePosition(-1); ++ regOpnd->SetVecLaneSize(0); ++ if (IsVectorOp()) { ++ AArch64Insn *insn = const_cast(this); ++ AArch64VectorInsn *vInsn = static_cast(insn); ++ VectorRegSpec* vecSpec = vInsn->GetAndRemoveRegSpecFromList(); ++ regOpnd->SetVecLanePosition(vecSpec->vecLane); ++ regOpnd->SetVecLaneSize(vecSpec->vecLaneMax); ++ } ++ } + opnds[seq[i]]->Emit(emitter, md->operand[seq[i]]); + /* reset opnd0 ref-field flag, so following instruction has correct register */ + if (isRefField && (i == 0)) { +@@ -1168,8 +1180,10 @@ uint8 AArch64Insn::GetLoadStoreSize() const { + case MOP_xldp: + case MOP_xldpsw: + case MOP_dldp: ++ case MOP_qldr: + case MOP_xstp: + case MOP_dstp: ++ case MOP_qstr: + return k16ByteSize; + + default: +@@ -1290,6 +1304,10 @@ bool AArch64Insn::IsCall() const { + return AArch64CG::kMd[mOp].IsCall(); + } + ++bool AArch64Insn::IsVectorOp() const { ++ return AArch64CG::kMd[mOp].IsVectorOp(); ++} ++ + bool AArch64Insn::HasLoop() const { + return AArch64CG::kMd[mOp].HasLoop(); + } +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index ff138448..e96b5742 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -194,6 +194,9 @@ void AArch64MemLayout::LayoutFormalParams() { + bool noStackPara = false; + MIRType *ty = mirFunction->GetNthParamType(i); + uint32 ptyIdx = ty->GetTypeIndex(); ++ if (GetPrimTypeLanes(ty->GetPrimType()) > 0) { ++ segArgsRegPassed.SetContainVector(); ++ } + parmLocator.LocateNextParm(*ty, ploc, i == 0); + if (ploc.reg0 != kRinvalid) { /* register */ + symLoc->SetRegisters(ploc.reg0, ploc.reg1, ploc.reg2, ploc.reg3); +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp +index f77bbea1..99643da9 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_operand.cpp +@@ -84,11 +84,28 @@ void AArch64RegOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + break; + } + case kRegTyFloat: { +- ASSERT((opndSize == k8BitSize || opndSize == k16BitSize || opndSize == k32BitSize || opndSize == k64BitSize), +- "illegal register size"); +- /* FP reg cannot be reffield. 8~0, 16~1, 32~2, 64~3. 8 is 1000b, has 3 zero. */ +- uint32 regSet = __builtin_ctz(opndSize) - 3; +- emitter.Emit(AArch64CG::intRegNames[regSet][regNO]); ++ ASSERT((opndSize == k8BitSize || opndSize == k16BitSize || opndSize == k32BitSize || opndSize == k64BitSize ++ || opndSize == k128BitSize), "illegal register size"); ++ int32 laneSize = GetVecLaneSize(); ++ if (static_cast(opndProp)->IsVectorOperand() && laneSize != 0) { ++ std::string width; ++ if (opndSize == k128BitSize) { ++ width = laneSize == 16 ? "b" : (laneSize == 8 ? "h" : (laneSize == 4 ? "s" : "d")); ++ } else if (opndSize == k64BitSize) { ++ width = laneSize == 8 ? "b" : (laneSize == 4 ? "h" : "s"); ++ } ++ int16 lanePos = GetVecLanePosition(); ++ emitter.Emit(AArch64CG::vectorRegNames[regNO]); ++ if (lanePos == -1) { ++ emitter.Emit("." + std::to_string(laneSize) + width); ++ } else { ++ emitter.Emit("." + width + "[" + std::to_string(lanePos) + "]"); ++ } ++ } else { ++ /* FP reg cannot be reffield. 8~0, 16~1, 32~2, 64~3. 8 is 1000b, has 3 zero. */ ++ uint32 regSet = __builtin_ctz(opndSize) - 3; ++ emitter.Emit(AArch64CG::intRegNames[regSet][regNO]); ++ } + break; + } + default: +@@ -213,7 +230,7 @@ void AArch64MemOperand::Emit(Emitter &emitter, const OpndProp *opndProp) const { + #if DEBUG + const AArch64MD *md = &AArch64CG::kMd[emitter.GetCurrentMOP()]; + bool isLDSTpair = md->IsLoadStorePair(); +- ASSERT(md->Is64Bit() || md->GetOperandSize() <= k32BitSize, "unexpected opnd size"); ++ ASSERT(md->Is64Bit() || md->GetOperandSize() <= k32BitSize || md->GetOperandSize() == k128BitSize, "unexpected opnd size"); + #endif + if (addressMode == AArch64MemOperand::kAddrModeBOi) { + emitter.Emit("["); +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def b/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def +index 1c638228..74693422 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_opnd.def +@@ -28,6 +28,12 @@ AArch64OpndProp mopdF32RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRe + AArch64OpndProp mopdF64RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse}, 64}; + AArch64OpndProp mopdF64RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef}, 64}; + AArch64OpndProp mopdF64RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kRegPropUse}, 64}; ++AArch64OpndProp mopdV128RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse | kVector}, 128}; ++AArch64OpndProp mopdV128RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kVector}, 128}; ++AArch64OpndProp mopdV128RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kRegPropUse | kVector}, 128}; ++AArch64OpndProp mopdV64RegSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropUse | kVector}, 64}; ++AArch64OpndProp mopdV64RegDest = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kVector}, 64}; ++AArch64OpndProp mopdV64RegDestSrc = {Operand::kOpdRegister, {kRegTyFloat, kAllRegNum, kRegPropDef | kVector}, 64}; + AArch64OpndProp mopdIntImm4Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 4}; + AArch64OpndProp mopdIntImm5Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 5}; + AArch64OpndProp mopdIntImm6Src = {Operand::kOpdImmediate, {kRegTyUndef, kAllRegNum, kRegPropUse}, 6}; +@@ -135,6 +141,7 @@ AArch64ImmOpndProp mopdMemPair32Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNu + AArch64OpndProp mopdMem32SrcH = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 16}; + AArch64OpndProp mopdMem32SrcL = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 16}; + AArch64ImmOpndProp mopdMem64Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64, StrLdr64ImmValid}; ++AArch64OpndProp mopdMem128Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 128}; + AArch64ImmOpndProp mopdMemPair64Src = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse}, 64, StrLdr64PairImmValid}; + AArch64OpndProp mopdMem64SrcL = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropUse | kMemLow12}, 12}; + +@@ -142,6 +149,7 @@ AArch64ImmOpndProp mopdMem8Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, + AArch64ImmOpndProp mopdMem16Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 16, StrLdr16ImmValid}; + AArch64ImmOpndProp mopdMem32Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 32, StrLdr32ImmValid}; + AArch64ImmOpndProp mopdMem64Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 64, StrLdr64ImmValid}; ++AArch64OpndProp mopdMem128Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef}, 128}; + AArch64ImmOpndProp mopdMemPair32Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 32, StrLdr32PairImmValid}; + AArch64ImmOpndProp mopdMemPair64Dest = {Operand::kOpdMem, {kRegTyUndef, kAllRegNum, kRegPropDef }, 64, StrLdr64PairImmValid}; + +@@ -193,6 +201,12 @@ AArch64OpndProp *mopdReg32FDS = &mopdF32RegDestSrc; + AArch64OpndProp *mopdReg64FS = &mopdF64RegSrc; + AArch64OpndProp *mopdReg64FD = &mopdF64RegDest; + AArch64OpndProp *mopdReg64FDS = &mopdF64RegDestSrc; ++AArch64OpndProp *mopdReg128VS = &mopdV128RegSrc; ++AArch64OpndProp *mopdReg128VD = &mopdV128RegDest; ++AArch64OpndProp *mopdReg128VDS = &mopdV128RegDestSrc; ++AArch64OpndProp *mopdReg64VS = &mopdV64RegSrc; ++AArch64OpndProp *mopdReg64VD = &mopdV64RegDest; ++AArch64OpndProp *mopdReg64VDS = &mopdV64RegDestSrc; + AArch64OpndProp *mopdMem = &mopdMem32Src; + AArch64OpndProp *mopdMem8S = &mopdMem8Src; + AArch64OpndProp *mopdMem16S = &mopdMem16Src; +@@ -202,12 +216,14 @@ AArch64OpndProp *mopdMem32SL = &mopdMem32SrcL; + AArch64OpndProp *mopdMem32SH = &mopdMem32SrcH; + AArch64OpndProp *mopdMem64PS = &mopdMemPair64Src; + AArch64OpndProp *mopdMem64S = &mopdMem64Src; ++AArch64OpndProp *mopdMem128S = &mopdMem128Src; + AArch64OpndProp *mopdMem64SL = &mopdMem64SrcL; + AArch64OpndProp *mopdMem8D = &mopdMem8Dest; + AArch64OpndProp *mopdMem16D = &mopdMem16Dest; + AArch64OpndProp *mopdMem32D = &mopdMem32Dest; + AArch64OpndProp *mopdMem32PD = &mopdMemPair32Dest; + AArch64OpndProp *mopdMem64D = &mopdMem64Dest; ++AArch64OpndProp *mopdMem128D = &mopdMem128Dest; + AArch64OpndProp *mopdMem64PD = &mopdMemPair64Dest; + AArch64OpndProp *mopdMem32SPRE = &mopdMem32SrcPre; + AArch64OpndProp *mopdMem32SPOST = &mopdMem32SrcPost; +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp +index 81cc38f8..ca22f12a 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp +@@ -75,7 +75,9 @@ void AArch64PeepHole::Run(BB &bb, Insn &insn) { + case MOP_wmovrr: + case MOP_xmovrr: + case MOP_xvmovs: +- case MOP_xvmovd: { ++ case MOP_xvmovd: ++ case MOP_vmovuu: ++ case MOP_vmovvv: { + (static_cast(optimizations[kRemoveMovingtoSameRegOpt]))->Run(bb, insn); + break; + } +diff --git a/src/mapleall/maple_be/src/cg/cgfunc.cpp b/src/mapleall/maple_be/src/cg/cgfunc.cpp +index 33225735..ca51b564 100644 +--- a/src/mapleall/maple_be/src/cg/cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/cgfunc.cpp +@@ -375,6 +375,28 @@ Operand *HandleIntrinOp(const BaseNode &parent, BaseNode &expr, CGFunc &cgFunc) + case INTRN_C_ctz32: + case INTRN_C_ctz64: + return cgFunc.SelectCctz(intrinsicopNode); ++ case INTRN_vector_from_scalar_v4i32: ++ case INTRN_vector_from_scalar_v16u8: ++ return cgFunc.SelectVectorFromScalar(intrinsicopNode); ++ case INTRN_vector_merge_v16u8: ++ return cgFunc.SelectVectorMerge(intrinsicopNode); ++ case INTRN_vector_store_v4i32: ++ case INTRN_vector_store_v16u8: ++ return cgFunc.SelectVectorStore(intrinsicopNode); ++ case INTRN_vector_get_high_v2u64: ++ return cgFunc.SelectVectorGetHigh(intrinsicopNode); ++ case INTRN_vector_get_low_v2u64: ++ return cgFunc.SelectVectorGetLow(intrinsicopNode); ++ case INTRN_vector_get_element_v2u32: ++ case INTRN_vector_get_element_v4u32: ++ return cgFunc.SelectVectorGetElement(intrinsicopNode); ++ case INTRN_vector_pairwise_add_v8u16: ++ case INTRN_vector_pairwise_add_v4u32: ++ return cgFunc.SelectVectorPairwiseAdd(intrinsicopNode); ++ case INTRN_vector_set_element_v4u32: ++ return cgFunc.SelectVectorSetElement(intrinsicopNode); ++ case INTRN_vector_reverse_v16u8: ++ return cgFunc.SelectVectorReverse(intrinsicopNode, 32); + default: + ASSERT(false, "Should not reach here."); + return nullptr; +diff --git a/src/mapleall/maple_ir/include/mir_type.h b/src/mapleall/maple_ir/include/mir_type.h +index 3a483326..f4c384aa 100644 +--- a/src/mapleall/maple_ir/include/mir_type.h ++++ b/src/mapleall/maple_ir/include/mir_type.h +@@ -40,6 +40,7 @@ extern bool VerifyPrimType(PrimType primType1, PrimType primType2); // ver + extern uint32 GetPrimTypeSize(PrimType primType); // answer in bytes; 0 if unknown + extern uint32 GetPrimTypeP2Size(PrimType primType); // answer in bytes in power-of-two. + extern PrimType GetSignedPrimType(PrimType pty); // return signed version ++extern uint32 GetPrimTypeLanes(PrimType pty); // lane size if vector + extern const char *GetPrimTypeName(PrimType primType); + extern const char *GetPrimTypeJavaName(PrimType primType); + +diff --git a/src/mapleall/maple_ir/src/mir_type.cpp b/src/mapleall/maple_ir/src/mir_type.cpp +index c8aff17d..f0960147 100644 +--- a/src/mapleall/maple_ir/src/mir_type.cpp ++++ b/src/mapleall/maple_ir/src/mir_type.cpp +@@ -296,6 +296,34 @@ uint32 GetPrimTypeP2Size(PrimType primType) { + } + } + ++uint32 GetPrimTypeLanes(PrimType pty) { ++ switch (pty) { ++ case PTY_v2i32: ++ case PTY_v2u32: ++ case PTY_v2f32: ++ case PTY_v2i64: ++ case PTY_v2u64: ++ case PTY_v2f64: ++ return 2; ++ case PTY_v4i16: ++ case PTY_v4u16: ++ case PTY_v4i32: ++ case PTY_v4u32: ++ case PTY_v4f32: ++ return 4; ++ case PTY_v8i8: ++ case PTY_v8u8: ++ case PTY_v8i16: ++ case PTY_v8u16: ++ return 8; ++ case PTY_v16i8: ++ case PTY_v16u8: ++ return 16; ++ default: ++ return 0; ++ } ++} ++ + // return the signed version that has the same size + PrimType GetSignedPrimType(PrimType pty) { + switch (pty) { diff --git a/Patches/memlayout.h b/Patches/memlayout.h new file mode 100644 index 0000000000000000000000000000000000000000..d440f518c321cc4de01a064d44eb8e281769cb33 --- /dev/null +++ b/Patches/memlayout.h @@ -0,0 +1,32 @@ +diff --git a/src/mapleall/maple_be/include/cg/memlayout.h b/src/mapleall/maple_be/include/cg/memlayout.h +index 018150e8..616a7761 100644 +--- a/src/mapleall/maple_be/include/cg/memlayout.h ++++ b/src/mapleall/maple_be/include/cg/memlayout.h +@@ -73,7 +73,7 @@ class CGFunc; + /* keeps track of the allocation of a memory segment */ + class MemSegment { + public: +- explicit MemSegment(MemSegmentKind memSegKind) : kind(memSegKind), size(0) {} ++ explicit MemSegment(MemSegmentKind memSegKind) : kind(memSegKind), size(0), containVector(false) {} + + ~MemSegment() = default; + +@@ -89,9 +89,18 @@ class MemSegment { + return kind; + } + ++ void SetContainVector() { ++ containVector = true; ++ } ++ ++ bool ContainVector() const { ++ return containVector; ++ } ++ + private: + MemSegmentKind kind; + int32 size; /* size is negative if allocated offsets are negative */ ++ bool containVector; + }; /* class MemSegment */ + + /* describes where a symbol is allocated */ diff --git a/Patches/mir_type.cpp b/Patches/mir_type.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2819fd4c30a60b74ae5d1ac1f8ffacdd7121a890 --- /dev/null +++ b/Patches/mir_type.cpp @@ -0,0 +1,39 @@ +diff --git a/src/mapleall/maple_ir/src/mir_type.cpp b/src/mapleall/maple_ir/src/mir_type.cpp +index c8aff17d..f0960147 100644 +--- a/src/mapleall/maple_ir/src/mir_type.cpp ++++ b/src/mapleall/maple_ir/src/mir_type.cpp +@@ -296,6 +296,34 @@ uint32 GetPrimTypeP2Size(PrimType primType) { + } + } + ++uint32 GetPrimTypeLanes(PrimType pty) { ++ switch (pty) { ++ case PTY_v2i32: ++ case PTY_v2u32: ++ case PTY_v2f32: ++ case PTY_v2i64: ++ case PTY_v2u64: ++ case PTY_v2f64: ++ return 2; ++ case PTY_v4i16: ++ case PTY_v4u16: ++ case PTY_v4i32: ++ case PTY_v4u32: ++ case PTY_v4f32: ++ return 4; ++ case PTY_v8i8: ++ case PTY_v8u8: ++ case PTY_v8i16: ++ case PTY_v8u16: ++ return 8; ++ case PTY_v16i8: ++ case PTY_v16u8: ++ return 16; ++ default: ++ return 0; ++ } ++} ++ + // return the signed version that has the same size + PrimType GetSignedPrimType(PrimType pty) { + switch (pty) { diff --git a/Patches/mir_type.h b/Patches/mir_type.h new file mode 100644 index 0000000000000000000000000000000000000000..3377205d3c9935bc30904de6f499dca9f96cb506 --- /dev/null +++ b/Patches/mir_type.h @@ -0,0 +1,12 @@ +diff --git a/src/mapleall/maple_ir/include/mir_type.h b/src/mapleall/maple_ir/include/mir_type.h +index 3a483326..f4c384aa 100644 +--- a/src/mapleall/maple_ir/include/mir_type.h ++++ b/src/mapleall/maple_ir/include/mir_type.h +@@ -40,6 +40,7 @@ extern bool VerifyPrimType(PrimType primType1, PrimType primType2); // ver + extern uint32 GetPrimTypeSize(PrimType primType); // answer in bytes; 0 if unknown + extern uint32 GetPrimTypeP2Size(PrimType primType); // answer in bytes in power-of-two. + extern PrimType GetSignedPrimType(PrimType pty); // return signed version ++extern uint32 GetPrimTypeLanes(PrimType pty); // lane size if vector + extern const char *GetPrimTypeName(PrimType primType); + extern const char *GetPrimTypeJavaName(PrimType primType); + diff --git a/a b/a new file mode 100644 index 0000000000000000000000000000000000000000..cafa280c985b7c89b8fc079391e69c1b6b2c2ff2 --- /dev/null +++ b/a @@ -0,0 +1,549 @@ +commit 78bf941857556cd9755c5d9aa42d714cf503de23 +Author: Alfred Huang +Date: Mon Jun 20 15:49:32 2022 -0700 + + Updating to "new" lmbc + +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +index 525a62031..6e6e1dd88 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +@@ -104,6 +104,7 @@ class AArch64CGFunc : public CGFunc { + + MIRType *LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector **parmList); + RegOperand &GetOrCreateResOperand(const BaseNode &parent, PrimType primType); ++ MIRStructType *GetLmbcStructArgType(BaseNode &stmt, int32 argNo); + + void IntrinsifyGetAndAddInt(ListOperand &srcOpnds, PrimType pty); + void IntrinsifyGetAndSetInt(ListOperand &srcOpnds, PrimType pty); +@@ -191,7 +192,8 @@ class AArch64CGFunc : public CGFunc { + Operand *SelectIread(const BaseNode &parent, IreadNode &expr, + int extraOffset = 0, PrimType finalBitFieldDestType = kPtyInvalid) override; + Operand *SelectIreadoff(const BaseNode &parent, IreadoffNode &ireadoff) override; +- Operand *SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff) override; ++ Operand *SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff /*, MIRType &ty, ++ ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator */) override; + Operand *SelectIntConst(MIRIntConst &intConst) override; + Operand *HandleFmovImm(PrimType stype, int64 val, MIRConst &mirConst, const BaseNode &parent); + Operand *SelectFloatConst(MIRFloatConst &floatConst, const BaseNode &parent) override; +@@ -866,7 +868,7 @@ class AArch64CGFunc : public CGFunc { + void SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRType &structType, + ListOperand &srcOpnds, + int32 offset, AArch64CallConvImpl &parmLocator, FieldID fieldID); +- void SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, ListOperand &srcOpnds, ++ void SelectParmListIreadSmallAggregate(BaseNode &iread, MIRType &structType, ListOperand &srcOpnds, + int32 offset, AArch64CallConvImpl &parmLocator); + void SelectParmListDreadLargeAggregate(const MIRSymbol &sym, MIRType &structType, + ListOperand &srcOpnds, +@@ -875,12 +877,12 @@ class AArch64CGFunc : public CGFunc { + AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 fromOffset); + void CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, AArch64CallConvImpl &parmLocator, + ListOperand &srcOpnds); +- void SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator, +- int32 &structCopyOffset); ++ void SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, ++ AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 argNo); + size_t SelectParmListGetStructReturnSize(StmtNode &naryNode); + bool MarkParmListCall(BaseNode &expr); +- void SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset); +- void SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs); ++ void SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int32 argNo); ++ void SelectParmListPreprocess(StmtNode &naryNode, size_t start, std::set &specialArgs); + void SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bool isCallNative = false); + Operand *SelectClearStackCallParam(const AddrofNode &expr, int64 &offsetValue); + void SelectClearStackCallParmList(const StmtNode &naryNode, ListOperand &srcOpnds, +diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h +index a4aed235e..43cc353ec 100644 +--- a/src/mapleall/maple_be/include/cg/cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/cgfunc.h +@@ -1154,8 +1154,8 @@ class CGFunc { + #endif + MapleMap vregsToPregsMap; + uint32 totalInsns = 0; +- int32 structCopySize; +- int32 maxParamStackSize; ++ int32 structCopySize = 0; ++ int32 maxParamStackSize = 0; + static constexpr int kRegIncrStepLen = 80; /* reg number increate step length */ + + bool hasVLAOrAlloca = false; +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index ee0b7293b..47c814fb7 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -161,6 +161,28 @@ bool IsBlkassignForPush(BlkassignoffNode &bNode) { + return spBased; + } + ++MIRStructType *AArch64CGFunc::GetLmbcStructArgType(BaseNode &stmt, int32 argNo) { ++ MIRType *ty = nullptr; ++ if (stmt.GetOpCode() == OP_call) { ++ CallNode &callNode = static_cast(stmt); ++ MIRFunction *callFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); ++ if (callFunc->GetFormalCount() < (argNo + 1)) { ++ return nullptr; /* formals less than actuals */ ++ } ++ ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); ++ } else if (stmt.GetOpCode() == OP_icallproto) { ++ IcallNode &icallproto = static_cast(stmt); ++ MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallproto.GetRetTyIdx()); ++ MIRFuncType *fType = static_cast(type); ++ if (fType->GetParamTypeList().size() < (argNo + 1)) { ++ return nullptr; ++ } ++ ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(fType->GetNthParamType(argNo)); ++ } ++ CHECK_FATAL(ty && ty->IsStructType(), "lmbc agg arg error"); ++ return static_cast(ty); ++} ++ + RegOperand &AArch64CGFunc::GetOrCreateResOperand(const BaseNode &parent, PrimType primType) { + RegOperand *resOpnd = nullptr; + if (parent.GetOpCode() == OP_regassign) { +@@ -3090,14 +3112,18 @@ RegOperand *AArch64CGFunc::LmbcStructReturnLoad(int32 offset) { + return result; + } + +-Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff) { ++Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff /*, MIRType &ty, ++ ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator*/) { + int32 offset = ireadoff.GetOffset(); + PrimType primType = ireadoff.GetPrimType(); + uint32 bytelen = GetPrimTypeSize(primType); ++#if 1 + uint32 bitlen = bytelen * kBitsPerByte; ++#endif + RegType regty = GetRegTyFromPrimTy(primType); + RegOperand *result = nullptr; + if (offset >= 0) { ++#if 1 + LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); + if (info->GetPrimType() == PTY_agg) { + if (info->IsOnStack()) { +@@ -3119,10 +3145,23 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode + result = &GetOrCreatePhysicalRegisterOperand((AArch64reg)(info->GetRegNO()), bitlen, regty); + } + } ++#endif ++ CHECK_FATAL(0, "Invalid ireadfpoff offset"); + } else { + if (primType == PTY_agg) { +- CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); +- result = LmbcStructReturnLoad(offset); ++#if OLD_LMBC // TBD ++ if (parent.GetOpCode() == OP_call || parent.GetOpCode() == OP_icallproto) { ++ /* ireadfpoff is now for loading locals under calls. Large agg arg is handled via ++ SelectParmListPreprocess, Hence only small agg is handled here */ ++ SelectParmListIreadSmallAggregate(ireadoff, ty, srcOpnds, 0, parmLocator); ++ } else { ++#endif ++ /* agg return */ ++ CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); ++ result = LmbcStructReturnLoad(offset); ++#if OLD_LMBC // TBD ++ } ++#endif + } else { + result = GenLmbcParamLoad(offset, bytelen, regty, primType); + } +@@ -7355,12 +7394,26 @@ void AArch64CGFunc::SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRT + } + } + +-void AArch64CGFunc::SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, ++void AArch64CGFunc::SelectParmListIreadSmallAggregate(BaseNode &iread, MIRType &structType, + ListOperand &srcOpnds, int32 offset, + AArch64CallConvImpl &parmLocator) { + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); +- RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); +- RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); ++ RegOperand *addrOpnd1; ++ if (iread.GetOpCode() == OP_iread) { ++ RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); ++ addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); ++ } else if (iread.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &ireadoff = static_cast(iread); ++ RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); ++ RegOperand *addrOpnd0 = &CreateRegisterOperandOfType(PTY_a64); ++ ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); ++ GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd0, *rfp, immOpnd)); ++ addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); ++ } else if (iread.GetOpCode() == OP_ireadoff) { ++ IreadoffNode &ireadoff = static_cast(iread); ++ RegOperand *addrOpnd0 = static_cast(HandleExpr(ireadoff, *(ireadoff.Opnd(0)))); ++ addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); ++ } + CCLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + if (ploc.reg0 == 0) { +@@ -7734,8 +7787,8 @@ void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 + } + } + +-void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, +- AArch64CallConvImpl &parmLocator, int32 &structCopyOffset) { ++void AArch64CGFunc::SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, ++ AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 argNo) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { +@@ -7783,6 +7836,32 @@ void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &s + } else { + SelectParmListIreadLargeAggregate(iread, *ty, srcOpnds, parmLocator, structCopyOffset, rhsOffset); + } ++ } else if (argExpr.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &iread = static_cast(argExpr); ++ MIRStructType *ty = GetLmbcStructArgType(parent, argNo); ++ if (ty == nullptr) { /* param < arg */ ++ return; ++ } ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); ++ if (symSize <= k16ByteSize) { ++ SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, rhsOffset, parmLocator); ++ } else { ++ CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } ++ } else if (argExpr.GetOpCode() == OP_ireadoff) { ++ IreadoffNode &iread = static_cast(argExpr); ++ MIRStructType *ty = GetLmbcStructArgType(parent, argNo); ++ if (ty == nullptr) { ++ return; ++ } ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); ++ if (symSize <= k16ByteSize) { ++ SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, rhsOffset, parmLocator); ++ } else { ++ CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } + } else { + CHECK_FATAL(0, "NYI"); + } +@@ -7825,7 +7904,7 @@ size_t AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { + return 0; + } + +-void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset) { ++void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int32 argNo) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { +@@ -7872,6 +7951,22 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 + uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); + structCopyOffset += static_cast(numMemOp * kSizeOfPtr); + } ++ } else if (argExpr.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &ireadoff = static_cast(argExpr); ++ MIRStructType *ty = GetLmbcStructArgType(parent, argNo); ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); ++ if (symSize > 16 /*kParmMemcpySize*/) { ++#if OLD_LMBC // TBD ++ MemOperand *addrOpnd0 = GenLmbcFpMemOperand(ireadoff.GetOffset(), kSizeOfPtr, RFP); ++ RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); ++#endif ++ RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); ++ RegOperand *addrOpnd = &CreateRegisterOperandOfType(PTY_a64); ++ ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); ++ GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd, *rfp, immOpnd)); ++ CreateCallStructParamMemcpy(nullptr, addrOpnd, static_cast(symSize), structCopyOffset, 0); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } + } + } + +@@ -7903,7 +7998,7 @@ bool AArch64CGFunc::MarkParmListCall(BaseNode &expr) { + return false; + } + +-void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs) { ++void AArch64CGFunc::SelectParmListPreprocess(StmtNode &naryNode, size_t start, std::set &specialArgs) { + size_t i = start; + int32 structCopyOffset = GetMaxParamStackSize() - GetStructCopySize(); + for (; i < naryNode.NumOpnds(); ++i) { +@@ -7916,7 +8011,7 @@ void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t st + if (primType != PTY_agg) { + continue; + } +- SelectParmListPreprocessLargeStruct(*argExpr, structCopyOffset); ++ SelectParmListPreprocessLargeStruct(naryNode, *argExpr, structCopyOffset, i); + } + } + +@@ -8016,7 +8111,7 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bo + } + /* use alloca */ + if (primType == PTY_agg) { +- SelectParmListForAggregate(*argExpr, srcOpnds, parmLocator, structCopyOffset); ++ SelectParmListForAggregate(naryNode, *argExpr, srcOpnds, parmLocator, structCopyOffset, i); + continue; + } + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; +@@ -9434,7 +9529,11 @@ Operand *AArch64CGFunc::GetBaseReg(const AArch64SymbolAlloc &symAlloc) { + } + + if (fsp == nullptr) { +- fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); ++ if (GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { ++ fsp = &GetOrCreatePhysicalRegisterOperand(RSP, kSizeOfPtr * kBitsPerByte, kRegTyInt); ++ } else { ++ fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); ++ } + } + return fsp; + } +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index 48054ebe8..16d4884f2 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -58,7 +58,9 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(opnd->GetPrimType())]; + } else { + Opcode opndOpcode = opnd->GetOpCode(); +- ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); ++ if (be.GetMIRModule().GetFlavor() != kFlavorLmbc) { ++ ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); ++ } + if (opndOpcode == OP_dread) { + DreadNode *dread = static_cast(opnd); + MIRSymbol *sym = be.GetMIRModule().CurFunction()->GetLocalOrGlobalSymbol(dread->GetStIdx()); +@@ -72,8 +74,7 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in + ty = static_cast(ty)->GetFieldType(dread->GetFieldID()); + } + } +- } else { +- /* OP_iread */ ++ } else if (opndOpcode == OP_iread) { + IreadNode *iread = static_cast(opnd); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); + ASSERT(ty->GetKind() == kTypePointer, "expect pointer"); +@@ -87,6 +88,11 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in + ty = static_cast(ty)->GetFieldType(iread->GetFieldID()); + } + } ++ } else if ((opndOpcode == OP_ireadfpoff || opndOpcode == OP_ireadoff || opndOpcode == OP_dreadoff) && opnd->GetPrimType() == PTY_agg) { ++ ty = static_cast(cgFunc)->GetLmbcStructArgType(stmt, i); ++ } ++ if (ty == nullptr) { /* type mismatch */ ++ continue; + } + } + CCLocInfo ploc; +@@ -190,6 +196,7 @@ void AArch64MemLayout::LayoutVarargParams() { + } + + void AArch64MemLayout::LayoutFormalParams() { ++#if OLD_LMBC // TBD + bool isLmbc = (be.GetMIRModule().GetFlavor() == kFlavorLmbc); + if (isLmbc && mirFunction->GetFormalCount() == 0) { + /* +@@ -201,6 +208,7 @@ void AArch64MemLayout::LayoutFormalParams() { + segArgsRegPassed.SetSize(mirFunction->GetOutParmSize()); + return; + } ++#endif + + AArch64CallConvImpl parmLocator(be); + CCLocInfo ploc; +@@ -255,8 +263,10 @@ void AArch64MemLayout::LayoutFormalParams() { + segArgsRegPassed.SetSize(static_cast(RoundUp(segArgsRegPassed.GetSize(), align))); + symLoc->SetOffset(segArgsRegPassed.GetSize()); + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + size); ++#if OLD_LMBC // TBD + } else if (isLmbc) { + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + k8ByteSize); ++#endif + } + } else { /* stack */ + uint32 size; +@@ -371,11 +381,15 @@ void AArch64MemLayout::LayoutReturnRef(std::vector &returnDelays, + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } ++#if OLD_LMBC // TBD + if (be.GetMIRModule().GetFlavor() == kFlavorLmbc) { + segArgsToStkPass.SetSize(mirFunction->GetOutParmSize() + kDivide2 * k8ByteSize); + } else { + segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); + } ++#else ++ segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); ++#endif + maxParmStackSize = static_cast(segArgsToStkPass.GetSize()); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + AssignSpillLocationsToPseudoRegisters(); +@@ -527,14 +541,18 @@ uint64 AArch64MemLayout::StackFrameSize() const { + uint64 total = segArgsRegPassed.GetSize() + static_cast(cgFunc)->SizeOfCalleeSaved() + + GetSizeOfRefLocals() + locals().GetSize() + GetSizeOfSpillReg(); + ++#if OLD_LMBC // TBD + if (cgFunc->GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + if (GetSizeOfGRSaveArea() > 0) { + total += RoundUp(GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (GetSizeOfVRSaveArea() > 0) { + total += RoundUp(GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } ++#if OLD_LMBC // TBD + } ++#endif + + /* + * if the function does not have VLA nor alloca, +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +index 7e6a33f40..d6208e2f4 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +@@ -1115,9 +1115,11 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); ++#if OLD_LMBC //TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + argsToStkPassSize -= (kDivide2 * k8ByteSize); + } ++#endif + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); +@@ -1217,7 +1219,9 @@ void AArch64GenProEpilog::GeneratePushRegs() { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); ++#if 1 + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; ++#endif + if ((argsToStkPassSize > 0) || isLmbc) { + Operand *immOpnd; + if (isLmbc) { +@@ -1255,14 +1259,18 @@ void AArch64GenProEpilog::GeneratePushRegs() { + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); + } else { ++#endif + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass()); ++#if OLD_LMBC // TBD + } ++#endif + + if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) { + offset -= static_cast(kAarch64StackPtrAlignment); +@@ -1323,11 +1331,14 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + uint32 dataSizeBits = size * kBitsPerByte; + uint32 offset; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { + offset += size; /* End of area should be aligned. Hole between VR and GR area */ + } ++#if OLD_LMBC // TBD + } else { + offset = -memlayout->GetSizeOfGRSaveArea(); /* FP reference */ + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { +@@ -1335,6 +1346,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + } + uint32 grSize = -offset; ++#endif + uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { +@@ -1345,11 +1357,15 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + } + Operand *stackLoc; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); ++#if OLD_LMBC // TBD + } else { + stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); + } ++#endif + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = +@@ -1358,11 +1374,15 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + offset += size; + } + if (!CGOptions::UseGeneralRegOnly()) { ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); ++#if OLD_LMBC // TBD + } else { + offset = -(memlayout->GetSizeOfVRSaveArea() + grSize); + } ++#endif + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { +@@ -1373,11 +1393,15 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + } + Operand *stackLoc; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); ++#if OLD_LMBC // TBD + } else { + stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); + } ++#endif + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + Insn &inst = +@@ -1479,7 +1503,9 @@ void AArch64GenProEpilog::GenerateProlog(BB &bb) { + } + if (useFP) { + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); ++#if 1 + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; ++#endif + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + if ((argsToStkPassSize > 0) || isLmbc) { + Operand *immOpnd; +@@ -1682,7 +1708,10 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ ++#if OLD_LMBC // TBD + bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc); ++#endif ++ bool isLmbc = false; + if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0 || isLmbc) { + int lmbcOffset = 0; + if (isLmbc == false) { +@@ -1769,14 +1798,18 @@ void AArch64GenProEpilog::GeneratePopRegs() { + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); + } else { ++#endif + offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass(); ++#if OLD_LMBC // TBD + } ++#endif + + if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) { + offset -= static_cast(kAarch64StackPtrAlignment); diff --git a/a.diff b/a.diff new file mode 100644 index 0000000000000000000000000000000000000000..1140cde277450b373b5ac7fd59acc6169dd3205d --- /dev/null +++ b/a.diff @@ -0,0 +1,86 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index 1b5dab3ca..72e396ca1 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -171,6 +171,7 @@ MIRStructType *AArch64CGFunc::GetLmbcStructArgType(BaseNode &stmt, int32 argNo) + } + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); + } else if (stmt.GetOpCode() == OP_icallproto) { ++ argNo--; /* 1st opnd of icallproto is funcname, skip it relative to param list */ + IcallNode &icallproto = static_cast(stmt); + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallproto.GetRetTyIdx()); + MIRFuncType *fType = static_cast(type); +@@ -3180,47 +3181,12 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode + RegType regty = GetRegTyFromPrimTy(primType); + RegOperand *result = nullptr; + if (offset >= 0) { +-#if OLD_LMBC // TBD +- uint32 bitlen = bytelen * kBitsPerByte; +- LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); +- if (info->GetPrimType() == PTY_agg) { +- if (info->IsOnStack()) { +- result = GenLmbcParamLoad(info->GetOnStackOffset(), GetPrimTypeSize(PTY_a64), kRegTyInt, PTY_a64); +- regno_t baseRegno = result->GetRegisterNumber(); +- result = GenLmbcParamLoad(offset - info->GetOffset(), bytelen, regty, primType, (AArch64reg)baseRegno); +- } else if (primType == PTY_agg) { +- CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); +- result = LmbcStructReturnLoad(offset); +- } else { +- result = GenLmbcParamLoad(offset, bytelen, regty, primType); +- } +- } else { +- CHECK_FATAL(primType == info->GetPrimType(), "Incorrect primtype"); +- CHECK_FATAL(offset == info->GetOffset(), "Incorrect offset"); +- if (info->GetRegNO() == 0 || info->HasRegassign() == false) { +- result = GenLmbcParamLoad(offset, bytelen, regty, primType); +- } else { +- result = &GetOrCreatePhysicalRegisterOperand(static_cast(info->GetRegNO()), bitlen, regty); +- } +- } +-#else + CHECK_FATAL(0, "Invalid ireadfpoff offset"); +-#endif + } else { + if (primType == PTY_agg) { +-#if OLD_LMBC // TBD +- if (parent.GetOpCode() == OP_call || parent.GetOpCode() == OP_icallproto) { +- /* ireadfpoff is now for loading locals under calls. Large agg arg is handled via +- SelectParmListPreprocess, Hence only small agg is handled here */ +- SelectParmListIreadSmallAggregate(ireadoff, ty, srcOpnds, 0, parmLocator); +- } else { +-#endif +- /* agg return */ +- CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); +- result = LmbcStructReturnLoad(offset); +-#if OLD_LMBC // TBD +- } +-#endif ++ /* agg return */ ++ CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); ++ result = LmbcStructReturnLoad(offset); + } else { + result = GenLmbcParamLoad(offset, bytelen, regty, primType); + } +@@ -8016,10 +7982,6 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNo + MIRStructType *ty = GetLmbcStructArgType(parent, argNo); + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); + if (symSize > 16 /*kParmMemcpySize*/) { +-#if OLD_LMBC // TBD +- MemOperand *addrOpnd0 = GenLmbcFpMemOperand(ireadoff.GetOffset(), kSizeOfPtr, RFP); +- RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); +-#endif + RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + RegOperand *addrOpnd = &CreateRegisterOperandOfType(PTY_a64); + ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); +@@ -10243,11 +10205,6 @@ void AArch64CGFunc::SelectCVaStart(const IntrinsiccallNode &intrnNode) { + inReg++; + } + } +-#if OLD_LMBC // TBD +- if (GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +- stkSize += (inReg * k8ByteSize); +- } +-#endif + if (CGOptions::IsArm64ilp32()) { + stkSize = static_cast(RoundUp(stkSize, k8ByteSize)); + } else { diff --git a/a1 b/a1 new file mode 100644 index 0000000000000000000000000000000000000000..bdeb5c5d56bd0ce6d7c9b315d177d069d03ea540 --- /dev/null +++ b/a1 @@ -0,0 +1,205 @@ +commit 891d4c177de1737371d3f5bbc4e29ee19e6f1687 +Author: Alfred Huang +Date: Mon Jun 27 14:41:54 2022 -0700 + + Update for agg ret, caller/callee save offset. + +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +index 3485c2efe..26765eb1a 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +@@ -130,7 +130,7 @@ class AArch64CGFunc : public CGFunc { + uint32 LmbcFindTotalStkUsed(std::vector* paramList); + uint32 LmbcTotalRegsUsed(); + void LmbcSelectParmList(ListOperand *srcOpnds, bool isArgReturn); +- bool LmbcSmallAggForRet(BlkassignoffNode &bNode, Operand *src); ++ bool LmbcSmallAggForRet(BaseNode &bNode, Operand *src); + bool LmbcSmallAggForCall(BlkassignoffNode &bNode, Operand *src, std::vector **parmList); + void SelectAggDassign(DassignNode &stmt) override; + void SelectIassign(IassignNode &stmt) override; +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index 36ae40010..5fb274ba8 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -1482,6 +1482,12 @@ void AArch64CGFunc::SelectRegassign(RegassignNode &stmt, Operand &opnd0) { + prev->SetMOP(MOP_xldrsw); + } + } ++ if (lhsSize == PTY_agg && (stmt.Opnd(0)->GetOpCode() == OP_ireadoff || stmt.Opnd(0)->GetOpCode() == OP_ireadfpoff)) { ++ PregIdx pregIdx = stmt.GetRegIdx(); ++ if ((-pregIdx) == kSregRetval0) { ++ return; // Already loaded to correct return registers ++ } ++ } + } + RegOperand *regOpnd = nullptr; + PregIdx pregIdx = stmt.GetRegIdx(); +@@ -2128,7 +2134,7 @@ MIRType *AArch64CGFunc::LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector(static_cast(src)->GetRegisterNumber()); +@@ -2138,9 +2144,9 @@ bool AArch64CGFunc::LmbcSmallAggForRet(BlkassignoffNode &bNode, Operand *src) { + /* This blkassignoff is for struct return? */ + uint32 loadSize; + uint32 numRegs = 0; +- if (bNode.GetNext()->GetOpCode() == OP_return) { +- MIRStructType *ty = static_cast( +- GlobalTables::GetTypeTable().GetTypeFromTyIdx(func->GetFuncRetStructTyIdx())); ++ if (static_cast(bNode).GetNext()->GetOpCode() == OP_return) { ++ MIRStructType *ty = static_cast(func->GetReturnType()); ++ uint32 tySize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); + uint32 fpregs = FloatParamRegRequired(ty, size); + if (fpregs > 0) { + /* pure floating point in agg */ +@@ -2159,7 +2165,7 @@ bool AArch64CGFunc::LmbcSmallAggForRet(BlkassignoffNode &bNode, Operand *src) { + numRegs = 2; + pTy = PTY_i64; + size = k4ByteSize; +- switch (bNode.blockSize) { ++ switch (tySize) { + case 1: + pTy = PTY_i8; + break; +@@ -2177,7 +2183,7 @@ bool AArch64CGFunc::LmbcSmallAggForRet(BlkassignoffNode &bNode, Operand *src) { + MemOperand &mem = CreateMemOpnd(regno, 0, size * kBitsPerByte); + RegOperand *res = &GetOrCreatePhysicalRegisterOperand(R0, loadSize, kRegTyInt); + SelectCopy(*res, pTy, mem, pTy); +- if (bNode.blockSize > static_cast(k8ByteSize)) { ++ if (tySize > static_cast(k8ByteSize)) { + MemOperand &newMem = CreateMemOpnd(regno, k8ByteSize, size * kBitsPerByte); + res = &GetOrCreatePhysicalRegisterOperand(R1, loadSize, kRegTyInt); + SelectCopy(*res, pTy, newMem, pTy); +@@ -3097,9 +3103,20 @@ Operand *AArch64CGFunc::SelectIreadoff(const BaseNode &parent, IreadoffNode &ire + auto *baseAddr = ireadoff.Opnd(0); + auto *result = &CreateRegisterOperandOfType(primType); + auto *addrOpnd = HandleExpr(ireadoff, *baseAddr); +- auto &memOpnd = CreateMemOpnd(LoadIntoRegister(*addrOpnd, PTY_a64), offset, bitSize); +- auto mop = PickLdInsn(bitSize, primType); +- GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *result, memOpnd)); ++ if (primType == PTY_agg && parent.GetOpCode() == OP_regassign) { ++ auto &memOpnd = CreateMemOpnd(LoadIntoRegister(*addrOpnd, PTY_a64), offset, bitSize); ++ auto mop = PickLdInsn(64, PTY_a64); ++ GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *result, memOpnd)); ++ auto ®AssignNode = static_cast(parent); ++ PregIdx pIdx = regAssignNode.GetRegIdx(); ++ CHECK_FATAL(IsSpecialPseudoRegister(pIdx), "SelectIreadfpoff of agg"); ++ LmbcSmallAggForRet(const_cast(parent), addrOpnd); ++ // result not used ++ } else { ++ auto &memOpnd = CreateMemOpnd(LoadIntoRegister(*addrOpnd, PTY_a64), offset, bitSize); ++ auto mop = PickLdInsn(bitSize, primType); ++ GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *result, memOpnd)); ++ } + return result; + } + +@@ -3152,13 +3169,11 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode + int32 offset = ireadoff.GetOffset(); + PrimType primType = ireadoff.GetPrimType(); + uint32 bytelen = GetPrimTypeSize(primType); +-#if 1 +- uint32 bitlen = bytelen * kBitsPerByte; +-#endif + RegType regty = GetRegTyFromPrimTy(primType); + RegOperand *result = nullptr; + if (offset >= 0) { +-#if 1 ++#if OLD_LMBC // TBD ++ uint32 bitlen = bytelen * kBitsPerByte; + LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); + if (info->GetPrimType() == PTY_agg) { + if (info->IsOnStack()) { +@@ -3180,8 +3195,9 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode + result = &GetOrCreatePhysicalRegisterOperand((AArch64reg)(info->GetRegNO()), bitlen, regty); + } + } +-#endif ++#else + CHECK_FATAL(0, "Invalid ireadfpoff offset"); ++#endif + } else { + if (primType == PTY_agg) { + #if OLD_LMBC // TBD +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index 9612ce2c9..c4c99f610 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -17,8 +17,6 @@ + #include "becommon.h" + #include "mir_nodes.h" + +-#define OLD_LMBC 1 +- + namespace maplebe { + using namespace maple; + +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +index 2a1996031..040857f33 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +@@ -16,8 +16,6 @@ + #include "cg_option.h" + #include "cgfunc.h" + +-#define OLD_LMBC 1 +- + namespace maplebe { + using namespace maple; + +@@ -1261,18 +1259,21 @@ void AArch64GenProEpilog::GeneratePushRegs() { + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; +-#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { ++#if OLD_LMBC // TBD + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); +- } else { ++#else ++ offset = static_cast(memLayout->RealStackFrameSize() - ++ (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - ++ memLayout->GetSizeOfLocals() - ++ memLayout->SizeOfArgsToStackPass()); + #endif ++ } else { + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass()); +-#if OLD_LMBC // TBD + } +-#endif + + if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) { + offset -= kAarch64StackPtrAlignment; +@@ -1801,18 +1802,21 @@ void AArch64GenProEpilog::GeneratePopRegs() { + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; +-#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { ++#if OLD_LMBC // TBD + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); +- } else { ++#else ++ offset = static_cast(memLayout->RealStackFrameSize() - ++ (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - ++ memLayout->GetSizeOfLocals() - ++ memLayout->SizeOfArgsToStackPass()); + #endif ++ } else { + offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass(); +-#if OLD_LMBC // TBD + } +-#endif + + if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) { + offset -= kAarch64StackPtrAlignment; diff --git a/b b/b new file mode 100644 index 0000000000000000000000000000000000000000..87c9f9af138d8bd344055b27d3527f9daa8d1bc9 --- /dev/null +++ b/b @@ -0,0 +1,552 @@ +commit c4ab3ada2a8f9d388af26fd036063868f7589eb1 +Author: Alfred Huang +Date: Mon Jun 20 15:49:32 2022 -0700 + + Updating to "new" lmbc + +diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +index bf154e765..be3a3bae6 100644 +--- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +@@ -104,6 +104,7 @@ class AArch64CGFunc : public CGFunc { + + MIRType *LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector **parmList) const; + RegOperand &GetOrCreateResOperand(const BaseNode &parent, PrimType primType); ++ MIRStructType *GetLmbcStructArgType(BaseNode &stmt, int32 argNo); + + void IntrinsifyGetAndAddInt(ListOperand &srcOpnds, PrimType pty); + void IntrinsifyGetAndSetInt(ListOperand &srcOpnds, PrimType pty); +@@ -191,7 +192,8 @@ class AArch64CGFunc : public CGFunc { + Operand *SelectIread(const BaseNode &parent, IreadNode &expr, + int extraOffset = 0, PrimType finalBitFieldDestType = kPtyInvalid) override; + Operand *SelectIreadoff(const BaseNode &parent, IreadoffNode &ireadoff) override; +- Operand *SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff) override; ++ Operand *SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff /*, MIRType &ty, ++ ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator */) override; + Operand *SelectIntConst(MIRIntConst &intConst) override; + Operand *HandleFmovImm(PrimType stype, int64 val, MIRConst &mirConst, const BaseNode &parent); + Operand *SelectFloatConst(MIRFloatConst &floatConst, const BaseNode &parent) override; +@@ -869,7 +871,7 @@ class AArch64CGFunc : public CGFunc { + void SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRType &structType, + ListOperand &srcOpnds, + int32 offset, AArch64CallConvImpl &parmLocator, FieldID fieldID); +- void SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, ListOperand &srcOpnds, ++ void SelectParmListIreadSmallAggregate(BaseNode &iread, MIRType &structType, ListOperand &srcOpnds, + int32 offset, AArch64CallConvImpl &parmLocator); + void SelectParmListDreadLargeAggregate(const MIRSymbol &sym, MIRType &structType, + ListOperand &srcOpnds, +@@ -878,12 +880,12 @@ class AArch64CGFunc : public CGFunc { + AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 fromOffset); + void CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, AArch64CallConvImpl &parmLocator, + ListOperand &srcOpnds); +- void SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator, +- int32 &structCopyOffset); ++ void SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, ++ AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 argNo); + size_t SelectParmListGetStructReturnSize(StmtNode &naryNode); + bool MarkParmListCall(BaseNode &expr); +- void SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset); +- void SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs); ++ void SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int32 argNo); ++ void SelectParmListPreprocess(StmtNode &naryNode, size_t start, std::set &specialArgs); + void SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bool isCallNative = false); + Operand *SelectClearStackCallParam(const AddrofNode &expr, int64 &offsetValue); + void SelectClearStackCallParmList(const StmtNode &naryNode, ListOperand &srcOpnds, +diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h +index 0a29d3b5a..21e5e938e 100644 +--- a/src/mapleall/maple_be/include/cg/cgfunc.h ++++ b/src/mapleall/maple_be/include/cg/cgfunc.h +@@ -1150,8 +1150,8 @@ class CGFunc { + #endif + MapleMap vregsToPregsMap; + uint32 totalInsns = 0; +- int32 structCopySize; +- int32 maxParamStackSize; ++ int32 structCopySize = 0; ++ int32 maxParamStackSize = 0; + static constexpr int kRegIncrStepLen = 80; /* reg number increate step length */ + + bool hasVLAOrAlloca = false; +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +index c2f1a470e..305c9f033 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +@@ -161,6 +161,28 @@ bool IsBlkassignForPush(const BlkassignoffNode &bNode) { + return spBased; + } + ++MIRStructType *AArch64CGFunc::GetLmbcStructArgType(BaseNode &stmt, int32 argNo) { ++ MIRType *ty = nullptr; ++ if (stmt.GetOpCode() == OP_call) { ++ CallNode &callNode = static_cast(stmt); ++ MIRFunction *callFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); ++ if (callFunc->GetFormalCount() < (argNo + 1)) { ++ return nullptr; /* formals less than actuals */ ++ } ++ ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); ++ } else if (stmt.GetOpCode() == OP_icallproto) { ++ IcallNode &icallproto = static_cast(stmt); ++ MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallproto.GetRetTyIdx()); ++ MIRFuncType *fType = static_cast(type); ++ if (fType->GetParamTypeList().size() < (argNo + 1)) { ++ return nullptr; ++ } ++ ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(fType->GetNthParamType(argNo)); ++ } ++ CHECK_FATAL(ty && ty->IsStructType(), "lmbc agg arg error"); ++ return static_cast(ty); ++} ++ + RegOperand &AArch64CGFunc::GetOrCreateResOperand(const BaseNode &parent, PrimType primType) { + RegOperand *resOpnd = nullptr; + if (parent.GetOpCode() == OP_regassign) { +@@ -3095,15 +3117,19 @@ RegOperand *AArch64CGFunc::LmbcStructReturnLoad(int32 offset) { + return result; + } + +-Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff) { ++Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode &ireadoff /*, MIRType &ty, ++ ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator*/) { + int32 offset = ireadoff.GetOffset(); + PrimType primType = ireadoff.GetPrimType(); + uint32 bytelen = GetPrimTypeSize(primType); ++#if 1 + uint32 bitlen = bytelen * kBitsPerByte; ++#endif + RegType regty = GetRegTyFromPrimTy(primType); + RegOperand *result = nullptr; + if (offset >= 0) { +- LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(static_cast(offset)); ++#if 1 ++ LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); + if (info->GetPrimType() == PTY_agg) { + if (info->IsOnStack()) { + result = GenLmbcParamLoad(info->GetOnStackOffset(), GetPrimTypeSize(PTY_a64), kRegTyInt, PTY_a64); +@@ -3124,10 +3150,23 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode + result = &GetOrCreatePhysicalRegisterOperand(static_cast(info->GetRegNO()), bitlen, regty); + } + } ++#endif ++ CHECK_FATAL(0, "Invalid ireadfpoff offset"); + } else { + if (primType == PTY_agg) { +- CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); +- result = LmbcStructReturnLoad(offset); ++#if OLD_LMBC // TBD ++ if (parent.GetOpCode() == OP_call || parent.GetOpCode() == OP_icallproto) { ++ /* ireadfpoff is now for loading locals under calls. Large agg arg is handled via ++ SelectParmListPreprocess, Hence only small agg is handled here */ ++ SelectParmListIreadSmallAggregate(ireadoff, ty, srcOpnds, 0, parmLocator); ++ } else { ++#endif ++ /* agg return */ ++ CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); ++ result = LmbcStructReturnLoad(offset); ++#if OLD_LMBC // TBD ++ } ++#endif + } else { + result = GenLmbcParamLoad(offset, bytelen, regty, primType); + } +@@ -7361,12 +7400,26 @@ void AArch64CGFunc::SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRT + } + } + +-void AArch64CGFunc::SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, ++void AArch64CGFunc::SelectParmListIreadSmallAggregate(BaseNode &iread, MIRType &structType, + ListOperand &srcOpnds, int32 offset, + AArch64CallConvImpl &parmLocator) { + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); +- RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); +- RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); ++ RegOperand *addrOpnd1; ++ if (iread.GetOpCode() == OP_iread) { ++ RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); ++ addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); ++ } else if (iread.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &ireadoff = static_cast(iread); ++ RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); ++ RegOperand *addrOpnd0 = &CreateRegisterOperandOfType(PTY_a64); ++ ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); ++ GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd0, *rfp, immOpnd)); ++ addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); ++ } else if (iread.GetOpCode() == OP_ireadoff) { ++ IreadoffNode &ireadoff = static_cast(iread); ++ RegOperand *addrOpnd0 = static_cast(HandleExpr(ireadoff, *(ireadoff.Opnd(0)))); ++ addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); ++ } + CCLocInfo ploc; + parmLocator.LocateNextParm(structType, ploc); + if (ploc.reg0 == 0) { +@@ -7740,8 +7793,8 @@ void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 + } + } + +-void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, +- AArch64CallConvImpl &parmLocator, int32 &structCopyOffset) { ++void AArch64CGFunc::SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, ++ AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 argNo) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { +@@ -7789,6 +7842,32 @@ void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &s + } else { + SelectParmListIreadLargeAggregate(iread, *ty, srcOpnds, parmLocator, structCopyOffset, rhsOffset); + } ++ } else if (argExpr.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &iread = static_cast(argExpr); ++ MIRStructType *ty = GetLmbcStructArgType(parent, argNo); ++ if (ty == nullptr) { /* param < arg */ ++ return; ++ } ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); ++ if (symSize <= k16ByteSize) { ++ SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, rhsOffset, parmLocator); ++ } else { ++ CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } ++ } else if (argExpr.GetOpCode() == OP_ireadoff) { ++ IreadoffNode &iread = static_cast(argExpr); ++ MIRStructType *ty = GetLmbcStructArgType(parent, argNo); ++ if (ty == nullptr) { ++ return; ++ } ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); ++ if (symSize <= k16ByteSize) { ++ SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, rhsOffset, parmLocator); ++ } else { ++ CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } + } else { + CHECK_FATAL(0, "NYI"); + } +@@ -7831,7 +7910,7 @@ size_t AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { + return 0; + } + +-void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset) { ++void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int32 argNo) { + uint64 symSize; + int32 rhsOffset = 0; + if (argExpr.GetOpCode() == OP_dread) { +@@ -7878,6 +7957,22 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 + uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); + structCopyOffset += static_cast(numMemOp * kSizeOfPtr); + } ++ } else if (argExpr.GetOpCode() == OP_ireadfpoff) { ++ IreadFPoffNode &ireadoff = static_cast(argExpr); ++ MIRStructType *ty = GetLmbcStructArgType(parent, argNo); ++ symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); ++ if (symSize > 16 /*kParmMemcpySize*/) { ++#if OLD_LMBC // TBD ++ MemOperand *addrOpnd0 = GenLmbcFpMemOperand(ireadoff.GetOffset(), kSizeOfPtr, RFP); ++ RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); ++#endif ++ RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); ++ RegOperand *addrOpnd = &CreateRegisterOperandOfType(PTY_a64); ++ ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); ++ GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd, *rfp, immOpnd)); ++ CreateCallStructParamMemcpy(nullptr, addrOpnd, static_cast(symSize), structCopyOffset, 0); ++ structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); ++ } + } + } + +@@ -7909,7 +8004,7 @@ bool AArch64CGFunc::MarkParmListCall(BaseNode &expr) { + return false; + } + +-void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs) { ++void AArch64CGFunc::SelectParmListPreprocess(StmtNode &naryNode, size_t start, std::set &specialArgs) { + size_t i = start; + int32 structCopyOffset = GetMaxParamStackSize() - GetStructCopySize(); + for (; i < naryNode.NumOpnds(); ++i) { +@@ -7922,7 +8017,7 @@ void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t st + if (primType != PTY_agg) { + continue; + } +- SelectParmListPreprocessLargeStruct(*argExpr, structCopyOffset); ++ SelectParmListPreprocessLargeStruct(naryNode, *argExpr, structCopyOffset, i); + } + } + +@@ -8022,7 +8117,7 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bo + } + /* use alloca */ + if (primType == PTY_agg) { +- SelectParmListForAggregate(*argExpr, srcOpnds, parmLocator, structCopyOffset); ++ SelectParmListForAggregate(naryNode, *argExpr, srcOpnds, parmLocator, structCopyOffset, i); + continue; + } + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; +@@ -9428,7 +9523,11 @@ Operand *AArch64CGFunc::GetBaseReg(const AArch64SymbolAlloc &symAlloc) { + } + + if (fsp == nullptr) { +- fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); ++ if (GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { ++ fsp = &GetOrCreatePhysicalRegisterOperand(RSP, kSizeOfPtr * kBitsPerByte, kRegTyInt); ++ } else { ++ fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); ++ } + } + return fsp; + } +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index b7403f414..b7ef7ae1e 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -58,7 +58,9 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in + ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(opnd->GetPrimType())]; + } else { + Opcode opndOpcode = opnd->GetOpCode(); +- ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); ++ if (be.GetMIRModule().GetFlavor() != kFlavorLmbc) { ++ ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); ++ } + if (opndOpcode == OP_dread) { + DreadNode *dread = static_cast(opnd); + MIRSymbol *sym = be.GetMIRModule().CurFunction()->GetLocalOrGlobalSymbol(dread->GetStIdx()); +@@ -72,8 +74,7 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in + ty = static_cast(ty)->GetFieldType(dread->GetFieldID()); + } + } +- } else { +- /* OP_iread */ ++ } else if (opndOpcode == OP_iread) { + IreadNode *iread = static_cast(opnd); + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); + ASSERT(ty->GetKind() == kTypePointer, "expect pointer"); +@@ -87,6 +88,11 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in + ty = static_cast(ty)->GetFieldType(iread->GetFieldID()); + } + } ++ } else if ((opndOpcode == OP_ireadfpoff || opndOpcode == OP_ireadoff || opndOpcode == OP_dreadoff) && opnd->GetPrimType() == PTY_agg) { ++ ty = static_cast(cgFunc)->GetLmbcStructArgType(stmt, i); ++ } ++ if (ty == nullptr) { /* type mismatch */ ++ continue; + } + } + CCLocInfo ploc; +@@ -190,6 +196,7 @@ void AArch64MemLayout::LayoutVarargParams() { + } + + void AArch64MemLayout::LayoutFormalParams() { ++#if OLD_LMBC // TBD + bool isLmbc = (be.GetMIRModule().GetFlavor() == kFlavorLmbc); + if (isLmbc && mirFunction->GetFormalCount() == 0) { + /* +@@ -201,6 +208,7 @@ void AArch64MemLayout::LayoutFormalParams() { + segArgsRegPassed.SetSize(mirFunction->GetOutParmSize()); + return; + } ++#endif + + AArch64CallConvImpl parmLocator(be); + CCLocInfo ploc; +@@ -255,8 +263,10 @@ void AArch64MemLayout::LayoutFormalParams() { + segArgsRegPassed.SetSize(static_cast(RoundUp(segArgsRegPassed.GetSize(), align))); + symLoc->SetOffset(segArgsRegPassed.GetSize()); + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + size); ++#if OLD_LMBC // TBD + } else if (isLmbc) { + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + k8ByteSize); ++#endif + } + } else { /* stack */ + uint32 size; +@@ -371,11 +381,15 @@ void AArch64MemLayout::LayoutReturnRef(std::vector &returnDelays, + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } ++#if OLD_LMBC // TBD + if (be.GetMIRModule().GetFlavor() == kFlavorLmbc) { + segArgsToStkPass.SetSize(mirFunction->GetOutParmSize() + kDivide2 * k8ByteSize); + } else { + segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); + } ++#else ++ segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); ++#endif + maxParmStackSize = static_cast(segArgsToStkPass.GetSize()); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + AssignSpillLocationsToPseudoRegisters(); +@@ -527,14 +541,18 @@ uint64 AArch64MemLayout::StackFrameSize() const { + uint64 total = segArgsRegPassed.GetSize() + static_cast(cgFunc)->SizeOfCalleeSaved() + + GetSizeOfRefLocals() + locals().GetSize() + GetSizeOfSpillReg(); + ++#if OLD_LMBC // TBD + if (cgFunc->GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + if (GetSizeOfGRSaveArea() > 0) { + total += RoundUp(GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (GetSizeOfVRSaveArea() > 0) { + total += RoundUp(GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } ++#if OLD_LMBC // TBD + } ++#endif + + /* + * if the function does not have VLA nor alloca, +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +index 546c386c7..9a0a3456b 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +@@ -1115,9 +1115,11 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); ++#if OLD_LMBC //TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + argsToStkPassSize -= (kDivide2 * k8ByteSize); + } ++#endif + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); +@@ -1217,7 +1219,9 @@ void AArch64GenProEpilog::GeneratePushRegs() { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); ++#if 1 + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; ++#endif + if ((argsToStkPassSize > 0) || isLmbc) { + Operand *immOpnd; + if (isLmbc) { +@@ -1255,14 +1259,18 @@ void AArch64GenProEpilog::GeneratePushRegs() { + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); + } else { ++#endif + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass()); ++#if OLD_LMBC // TBD + } ++#endif + + if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) { + offset -= kAarch64StackPtrAlignment; +@@ -1323,18 +1331,22 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + uint32 dataSizeBits = size * kBitsPerByte; + uint32 offset; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { + offset += size; /* End of area should be aligned. Hole between VR and GR area */ + } ++#if OLD_LMBC // TBD + } else { + offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */ + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { + offset -= size; + } + } +- uint32 grSize = (UINT32_MAX - offset) + 1; ++ uint32 grSize = -offset; ++#endif + uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { +@@ -1345,11 +1357,15 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + } + Operand *stackLoc; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); ++#if OLD_LMBC // TBD + } else { + stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); + } ++#endif + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = +@@ -1358,11 +1374,15 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + offset += size; + } + if (!CGOptions::UseGeneralRegOnly()) { ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); ++#if OLD_LMBC // TBD + } else { + offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1; + } ++#endif + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { +@@ -1373,11 +1393,15 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + } + } + Operand *stackLoc; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { ++#endif + stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); ++#if OLD_LMBC // TBD + } else { + stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); + } ++#endif + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + Insn &inst = +@@ -1479,7 +1503,9 @@ void AArch64GenProEpilog::GenerateProlog(BB &bb) { + } + if (useFP) { + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); ++#if 1 + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; ++#endif + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + if ((argsToStkPassSize > 0) || isLmbc) { + Operand *immOpnd; +@@ -1682,7 +1708,10 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ ++#if OLD_LMBC // TBD + bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc); ++#endif ++ bool isLmbc = false; + if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0 || isLmbc) { + int lmbcOffset = 0; + if (isLmbc == false) { +@@ -1769,14 +1798,18 @@ void AArch64GenProEpilog::GeneratePopRegs() { + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; ++#if OLD_LMBC // TBD + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); + } else { ++#endif + offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass(); ++#if OLD_LMBC // TBD + } ++#endif + + if (cgFunc.GetCG()->IsStackProtectorStrong() || cgFunc.GetCG()->IsStackProtectorAll()) { + offset -= kAarch64StackPtrAlignment; diff --git a/b.diff b/b.diff new file mode 100644 index 0000000000000000000000000000000000000000..ce0da53021f5a60b864c35a6d0fd3d91ddc9691b --- /dev/null +++ b/b.diff @@ -0,0 +1,52 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +index bd5ad4975..28b33f9d5 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +@@ -196,20 +196,6 @@ void AArch64MemLayout::LayoutVarargParams() { + } + + void AArch64MemLayout::LayoutFormalParams() { +-#if OLD_LMBC // TBD +- bool isLmbc = (be.GetMIRModule().GetFlavor() == kFlavorLmbc); +- if (isLmbc && mirFunction->GetFormalCount() == 0) { +- /* +- * lmbc : upformalsize - size of formals passed from caller's frame into current function +- * framesize - total frame size of current function used by Maple IR +- * outparmsize - portion of frame size of current function used by call parameters +- */ +- segArgsStkPassed.SetSize(mirFunction->GetOutParmSize()); +- segArgsRegPassed.SetSize(mirFunction->GetOutParmSize()); +- return; +- } +-#endif +- + AArch64CallConvImpl parmLocator(be); + CCLocInfo ploc; + for (size_t i = 0; i < mirFunction->GetFormalCount(); ++i) { +@@ -263,10 +249,6 @@ void AArch64MemLayout::LayoutFormalParams() { + segArgsRegPassed.SetSize(static_cast(RoundUp(segArgsRegPassed.GetSize(), align))); + symLoc->SetOffset(segArgsRegPassed.GetSize()); + segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + size); +-#if OLD_LMBC // TBD +- } else if (isLmbc) { +- segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + k8ByteSize); +-#endif + } + } else { /* stack */ + uint32 size; +@@ -381,15 +363,7 @@ void AArch64MemLayout::LayoutReturnRef(std::vector &returnDelays, + symLoc->SetOffset(segRefLocals.GetSize()); + segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); + } +-#if OLD_LMBC // TBD +- if (be.GetMIRModule().GetFlavor() == kFlavorLmbc) { +- segArgsToStkPass.SetSize(mirFunction->GetOutParmSize() + kDivide2 * k8ByteSize); +- } else { +- segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); +- } +-#else + segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); +-#endif + maxParmStackSize = static_cast(segArgsToStkPass.GetSize()); + if (Globals::GetInstance()->GetOptimLevel() == 0) { + AssignSpillLocationsToPseudoRegisters(); diff --git a/build/envsetup.sh+ b/build/envsetup.sh+ new file mode 100644 index 0000000000000000000000000000000000000000..17553e9f77a6e959f5d5166369e2a13bf6919544 --- /dev/null +++ b/build/envsetup.sh+ @@ -0,0 +1,118 @@ +#!/bin/bash +# +# Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. +# +# OpenArkCompiler is licensed under Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# +# http://license.coscl.org.cn/MulanPSL2 +# +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +# FIT FOR A PARTICULAR PURPOSE. +# See the Mulan PSL v2 for more details. +# + +function print_usage { + echo " " + echo "usage: source envsetup.sh arm/ark/engine/riscv release/debug" + echo " " +} + +if [ "$#" -lt 2 ]; then + print_usage +# return +fi + +curdir=$(pwd) +export MAPLE_ROOT=${curdir} +export SPEC=${MAPLE_ROOT}/testsuite/c_test/spec_test +export LD_LIBRARY_PATH=${MAPLE_ROOT}/tools/gcc-linaro-7.5.0/aarch64-linux-gnu/libc/lib:${MAPLE_ROOT}/tools/clang+llvm-12.0.0-x86_64-linux-gnu-ubuntu-18.04/lib:${LD_LIBRARY_PATH} +export SPECPERLLIB=${SPEC}/bin/lib:${SPEC}/bin:${SPEC}/SPEC500-perlbench_r/data/all/input/lib:${SPEC}/SPEC500-perlbench_r/t/lib +export CASE_ROOT=${curdir}/testsuite +export OUT_ROOT=${curdir}/output +export ANDROID_ROOT=${curdir}/android +export MAPLE_BUILD_CORE=${MAPLE_ROOT}/build/core +if [ -d ${MAPLE_ROOT}/src/ast2mpl ]; then + export IS_AST2MPL_EXISTS=1 +else + export IS_AST2MPL_EXISTS=0 +fi +export GCOV_PREFIX=${MAPLE_ROOT}/report/gcda +export GCOV_PREFIX_STRIP=7 + +# display OS version +lsb_release -d + +export TOOL_BIN_PATH=${MAPLE_ROOT}/tools/bin +if [ -d ${MAPLE_ROOT}/testsuite/driver/.config ];then + rm -rf ${MAPLE_ROOT}/testsuite/driver/config + rm -rf ${MAPLE_ROOT}/testsuite/driver/src/api + rm -rf ${MAPLE_ROOT}/testsuite/driver/src/mode + cd ${MAPLE_ROOT}/testsuite/driver + ln -s -f .config config + cd ${MAPLE_ROOT}/testsuite/driver/src + ln -s -f .api api + ln -s -f .mode mode +fi + +cd ${MAPLE_ROOT} + +OS_VERSION=`lsb_release -r | sed -e "s/^[^0-9]*//" -e "s/\..*//"` +if [ "$OS_VERSION" = "16" ] || [ "$OS_VERSION" = "18" ]; then + export OLD_OS=1 +else + export OLD_OS=0 +fi + +# support multiple ARCH and BUILD_TYPE + +if [ $1 = "arm" ]; then + PLATFORM=aarch64 + USEOJ=0 +elif [ $1 = "riscv" ]; then + PLATFORM=riscv64 + USEOJ=0 +elif [ $1 = "engine" ]; then + PLATFORM=ark + USEOJ=1 +elif [ $1 = "ark" ]; then + PLATFORM=ark + USEOJ=1 +else + print_usage + return +fi + +if [ "$2" = "release" ]; then + TYPE=release + DEBUG=0 +elif [ "$2" = "debug" ]; then + TYPE=debug + DEBUG=1 +else + print_usage + return +fi + +export MAPLE_DEBUG=${DEBUG} +export TARGET_PROCESSOR=${PLATFORM} +export TARGET_SCOPE=${TYPE} +export USE_OJ_LIBCORE=${USEOJ} +export TARGET_TOOLCHAIN=clang +export MAPLE_BUILD_TYPE=${TARGET_PROCESSOR}-${TARGET_TOOLCHAIN}-${TARGET_SCOPE} +echo "Build: $MAPLE_BUILD_TYPE" +export MAPLE_BUILD_OUTPUT=${MAPLE_ROOT}/output/${MAPLE_BUILD_TYPE} +export MAPLE_EXECUTE_BIN=${MAPLE_ROOT}/output/${MAPLE_BUILD_TYPE}/bin +export TEST_BIN=${CASE_ROOT}/driver/script +export PATH=$PATH:${MAPLE_EXECUTE_BIN}:${TEST_BIN} + +if [ ! -f $MAPLE_ROOT/tools/qemu/usr/bin/qemu-aarch64 ] && [ "$OLD_OS" = "0" ]; then + echo " " + echo "!!! please run \"make setup\" to get proper qemu-aarch64" + echo " " +fi + + + diff --git a/c b/c new file mode 100644 index 0000000000000000000000000000000000000000..ed4b1123f0000559d84b951573207f968d994e72 --- /dev/null +++ b/c @@ -0,0 +1,124 @@ +1c1 +< commit 78bf941857556cd9755c5d9aa42d714cf503de23 +--- +> commit c4ab3ada2a8f9d388af26fd036063868f7589eb1 +8c8 +< index 525a62031..6e6e1dd88 100644 +--- +> index bf154e765..be3a3bae6 100644 +13c13 +< MIRType *LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector **parmList); +--- +> MIRType *LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector **parmList) const; +29c29 +< @@ -866,7 +868,7 @@ class AArch64CGFunc : public CGFunc { +--- +> @@ -869,7 +871,7 @@ class AArch64CGFunc : public CGFunc { +38c38 +< @@ -875,12 +877,12 @@ class AArch64CGFunc : public CGFunc { +--- +> @@ -878,12 +880,12 @@ class AArch64CGFunc : public CGFunc { +56c56 +< index a4aed235e..43cc353ec 100644 +--- +> index 0a29d3b5a..21e5e938e 100644 +59c59 +< @@ -1154,8 +1154,8 @@ class CGFunc { +--- +> @@ -1150,8 +1150,8 @@ class CGFunc { +71c71 +< index ee0b7293b..47c814fb7 100644 +--- +> index c2f1a470e..305c9f033 100644 +74c74 +< @@ -161,6 +161,28 @@ bool IsBlkassignForPush(BlkassignoffNode &bNode) { +--- +> @@ -161,6 +161,28 @@ bool IsBlkassignForPush(const BlkassignoffNode &bNode) { +103c103 +< @@ -3090,14 +3112,18 @@ RegOperand *AArch64CGFunc::LmbcStructReturnLoad(int32 offset) { +--- +> @@ -3095,15 +3117,19 @@ RegOperand *AArch64CGFunc::LmbcStructReturnLoad(int32 offset) { +118a119 +> - LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(static_cast(offset)); +120c121 +< LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); +--- +> + LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(offset); +123,124c124,126 +< @@ -3119,10 +3145,23 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode +< result = &GetOrCreatePhysicalRegisterOperand((AArch64reg)(info->GetRegNO()), bitlen, regty); +--- +> result = GenLmbcParamLoad(info->GetOnStackOffset(), GetPrimTypeSize(PTY_a64), kRegTyInt, PTY_a64); +> @@ -3124,10 +3150,23 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode +> result = &GetOrCreatePhysicalRegisterOperand(static_cast(info->GetRegNO()), bitlen, regty); +149c151 +< @@ -7355,12 +7394,26 @@ void AArch64CGFunc::SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRT +--- +> @@ -7361,12 +7400,26 @@ void AArch64CGFunc::SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRT +179c181 +< @@ -7734,8 +7787,8 @@ void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 +--- +> @@ -7740,8 +7793,8 @@ void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 +190c192 +< @@ -7783,6 +7836,32 @@ void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &s +--- +> @@ -7789,6 +7842,32 @@ void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &s +223c225 +< @@ -7825,7 +7904,7 @@ size_t AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { +--- +> @@ -7831,7 +7910,7 @@ size_t AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { +232c234 +< @@ -7872,6 +7951,22 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 +--- +> @@ -7878,6 +7957,22 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 +255c257 +< @@ -7903,7 +7998,7 @@ bool AArch64CGFunc::MarkParmListCall(BaseNode &expr) { +--- +> @@ -7909,7 +8004,7 @@ bool AArch64CGFunc::MarkParmListCall(BaseNode &expr) { +264c266 +< @@ -7916,7 +8011,7 @@ void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t st +--- +> @@ -7922,7 +8017,7 @@ void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t st +273c275 +< @@ -8016,7 +8111,7 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bo +--- +> @@ -8022,7 +8117,7 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bo +282c284 +< @@ -9434,7 +9529,11 @@ Operand *AArch64CGFunc::GetBaseReg(const AArch64SymbolAlloc &symAlloc) { +--- +> @@ -9428,7 +9523,11 @@ Operand *AArch64CGFunc::GetBaseReg(const AArch64SymbolAlloc &symAlloc) { +296c298 +< index 48054ebe8..16d4884f2 100644 +--- +> index b7403f414..b7ef7ae1e 100644 +395c397 +< index 7e6a33f40..d6208e2f4 100644 +--- +> index 546c386c7..9a0a3456b 100644 +438,439c440,441 +< offset -= static_cast(kAarch64StackPtrAlignment); +< @@ -1323,11 +1331,14 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { +--- +> offset -= kAarch64StackPtrAlignment; +> @@ -1323,18 +1331,22 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { +452c454 +< offset = -memlayout->GetSizeOfGRSaveArea(); /* FP reference */ +--- +> offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */ +454c456 +< @@ -1335,6 +1346,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { +--- +> offset -= size; +457c459,460 +< uint32 grSize = -offset; +--- +> - uint32 grSize = (UINT32_MAX - offset) + 1; +> + uint32 grSize = -offset; +488c491 +< offset = -(memlayout->GetSizeOfVRSaveArea() + grSize); +--- +> offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1; +549c552 +< offset -= static_cast(kAarch64StackPtrAlignment); +--- +> offset -= kAarch64StackPtrAlignment; diff --git a/c.diff b/c.diff new file mode 100644 index 0000000000000000000000000000000000000000..3d7e5e313a42f4c0a5dd26b896055b491a0d802f --- /dev/null +++ b/c.diff @@ -0,0 +1,140 @@ +diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +index 02f27d5e0..1bb2260e5 100644 +--- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp ++++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +@@ -1119,11 +1119,6 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); +-#if OLD_LMBC //TBD +- if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +- argsToStkPassSize -= (kDivide2 * k8ByteSize); +- } +-#endif + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); +@@ -1262,15 +1257,10 @@ void AArch64GenProEpilog::GeneratePushRegs() { + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +-#if OLD_LMBC // TBD +- offset = static_cast(memLayout->RealStackFrameSize() - +- aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); +-#else + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - + memLayout->GetSizeOfLocals() - + memLayout->SizeOfArgsToStackPass()); +-#endif + } else { + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - +@@ -1337,23 +1327,10 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + size = kSizeOfPtr; + } + uint32 dataSizeBits = size * kBitsPerByte; +- uint32 offset; +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ +- if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { +- offset += size; /* End of area should be aligned. Hole between VR and GR area */ +- } +-#if OLD_LMBC // TBD +- } else { +- offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */ +- if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { +- offset -= size; +- } ++ uint32 offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ ++ if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { ++ offset += size; /* End of area should be aligned. Hole between VR and GR area */ + } +- uint32 grSize = -offset; +-#endif + uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { +@@ -1363,16 +1340,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + tmpOffset += 8U - (dataSizeBits >> 3); + } + } +- Operand *stackLoc; +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); +-#if OLD_LMBC // TBD +- } else { +- stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); +- } +-#endif ++ Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = +@@ -1381,15 +1349,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + offset += size; + } + if (!CGOptions::UseGeneralRegOnly()) { +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); +-#if OLD_LMBC // TBD +- } else { +- offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1; +- } +-#endif ++ offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { +@@ -1399,16 +1359,7 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + tmpOffset += 16U - (dataSizeBits >> 3); + } + } +- Operand *stackLoc; +-#if OLD_LMBC // TBD +- if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { +-#endif +- stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); +-#if OLD_LMBC // TBD +- } else { +- stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); +- } +-#endif ++ Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + Insn &inst = +@@ -1713,11 +1664,7 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ +-#if OLD_LMBC // TBD +- bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc); +-#else + bool isLmbc = false; +-#endif + if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0 || isLmbc) { + int lmbcOffset = 0; + if (isLmbc == false) { +@@ -1805,15 +1752,8 @@ void AArch64GenProEpilog::GeneratePopRegs() { + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { +-#if OLD_LMBC // TBD + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); +-#else +- offset = static_cast(memLayout->RealStackFrameSize() - +- (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - +- memLayout->GetSizeOfLocals() - +- memLayout->SizeOfArgsToStackPass()); +-#endif + } else { + offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - diff --git a/src/mapleall/maple_be/include/be/common_utils.h+ b/src/mapleall/maple_be/include/be/common_utils.h+ new file mode 100644 index 0000000000000000000000000000000000000000..f2f0cd9b107c1948bbf93f1e5535871512c7da49 --- /dev/null +++ b/src/mapleall/maple_be/include/be/common_utils.h+ @@ -0,0 +1,201 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_BE_COMMON_UTILS_H +#define MAPLEBE_INCLUDE_BE_COMMON_UTILS_H +#include +#include "types_def.h" +#include "mpl_logging.h" + +namespace maplebe { +using namespace maple; +constexpr uint32 kOffsetAlignmentOf8Bit = 0; +constexpr uint32 kOffsetAlignmentOf16Bit = 1; +constexpr uint32 kOffsetAlignmentOf32Bit = 2; +constexpr uint32 kOffsetAlignmentOf64Bit = 3; +constexpr uint32 kOffsetAlignmentOf128Bit = 4; +constexpr uint32 kBaseOffsetAlignment = 3; +/* + * The constexpr implementations, without assertions. Suitable for using in + * constants. + */ +constexpr uint32 k1FConst = 31; +constexpr uint32 k0BitSize = 0; +constexpr uint32 k1BitSize = 1; +constexpr uint32 k2BitSize = 2; +constexpr uint32 k3BitSize = 3; +constexpr uint32 k4BitSize = 4; +constexpr uint32 k5BitSize = 5; +constexpr uint32 k6BitSize = 6; +constexpr uint32 k7BitSize = 7; +constexpr uint32 k8BitSize = 8; +constexpr uint32 k16BitSize = 16; +constexpr uint32 k24BitSize = 24; +constexpr uint32 k32BitSize = 32; +constexpr uint32 k48BitSize = 48; +constexpr uint32 k56BitSize = 56; +constexpr uint32 k64BitSize = 64; +constexpr uint32 k128BitSize = 128; +constexpr uint32 k256BitSize = 256; +constexpr uint32 k512BitSize = 512; +constexpr uint32 k1024BitSize = 1024; + +constexpr int32 kNegative256BitSize = -256; +constexpr int32 kNegative512BitSize = -512; +constexpr int32 kNegative1024BitSize = -1024; + +constexpr uint32 k1ByteSize = 1; +constexpr uint32 k2ByteSize = 2; +constexpr uint32 k3ByteSize = 3; +constexpr uint32 k4ByteSize = 4; +constexpr uint32 k8ByteSize = 8; +constexpr uint32 k9ByteSize = 9; +constexpr uint32 k12ByteSize = 12; +constexpr uint32 k14ByteSize = 14; +constexpr uint32 k15ByteSize = 15; +constexpr uint32 k16ByteSize = 16; + +constexpr uint32 k4BitShift = 2; /* 4 is 1 << 2; */ +constexpr uint32 k8BitShift = 3; /* 8 is 1 << 3; */ +constexpr uint32 k16BitShift = 4; /* 16 is 1 << 4 */ + +constexpr uint32 kDwordSizeTwo = 2; + +constexpr uint32 k4ByteFloatSize = 4; +constexpr uint32 k8ByteDoubleSize = 8; + +/* Storage location of operands in one insn */ +constexpr int32 kInsnFirstOpnd = 0; +constexpr int32 kInsnSecondOpnd = 1; +constexpr int32 kInsnThirdOpnd = 2; +constexpr int32 kInsnFourthOpnd = 3; +constexpr int32 kInsnFifthOpnd = 4; +constexpr int32 kInsnSixthOpnd = 5; +constexpr int32 kInsnSeventhOpnd = 6; +constexpr int32 kInsnEighthOpnd = 7; +constexpr int32 kInsnMaxOpnd = 8; + +/* inline asm operand designations */ +constexpr uint32 kAsmStringOpnd = 0; +constexpr uint32 kAsmOutputListOpnd = 1; +constexpr uint32 kAsmClobberListOpnd = 2; +constexpr uint32 kAsmInputListOpnd = 3; +constexpr uint32 kAsmOutputConstraintOpnd = 4; +constexpr uint32 kAsmInputConstraintOpnd = 5; +constexpr uint32 kAsmOutputRegPrefixOpnd = 6; +constexpr uint32 kAsmInputRegPrefixOpnd = 7; + +/* Number of registers */ +constexpr uint32 kOneRegister = 1; +constexpr uint32 kTwoRegister = 2; +constexpr uint32 kThreeRegister = 3; +constexpr uint32 kFourRegister = 4; + +/* position of an operand within an instruction */ +constexpr uint32 kOperandPosition0 = 0; +constexpr uint32 kOperandPosition1 = 1; +constexpr uint32 kOperandPosition2 = 2; + +/* Size of struct for memcpy */ +constexpr uint32 kParmMemcpySize = 40; + +/* Check whether the value is an even number. */ +constexpr int32 kDivide2 = 2; +constexpr int32 kRegNum2 = 2; +constexpr int32 kStepNum2 = 2; +constexpr int32 kSign4ByteSize = 4; + +/* + * if the number of local refvar is less than 12, use stp or str to init local refvar + * else call function MCC_InitializeLocalStackRef to init. + */ +constexpr int32 kRefNum12 = 12; + +/* mod function max argument size */ +constexpr uint32 kMaxModFuncArgSize = 8; + +/* string length of spacial name "__EARetTemp__" */ +constexpr int32 kEARetTempNameSize = 10; + +/* + * Aarch64 data processing instructions have 12 bits of space for values in their instuction word + * This is arranged as a four-bit rotate value and an eight-bit immediate value: + */ +constexpr uint32 kMaxImmVal5Bits = 5; +constexpr uint32 kMaxImmVal6Bits = 6; +constexpr uint32 kMaxImmVal8Bits = 8; +constexpr uint32 kMaxImmVal12Bits = 12; +constexpr uint32 kMaxImmVal13Bits = 13; +constexpr uint32 kMaxImmVal16Bits = 16; + +constexpr int32 kMaxPimm8 = 4095; +constexpr int32 kMaxPimm16 = 8190; +constexpr int32 kMaxPimm32 = 16380; +constexpr int32 kMaxPimm64 = 32760; +constexpr int32 kMaxPimm128 = 65520; + +constexpr int32 kMaxPimm[k5BitSize] = {kMaxPimm8, kMaxPimm16, kMaxPimm32, kMaxPimm64, kMaxPimm128}; +constexpr int32 kMaxPairPimm[k3BitSize] = {k256BitSize, k512BitSize, k512BitSize}; + +constexpr int32 kMaxSimm32 = 255; +constexpr int32 kMaxSimm32Pair = 252; +constexpr int32 kMinSimm32 = kNegative256BitSize; +constexpr int32 kMaxSimm64Pair = 504; +constexpr int32 kMinSimm64 = kNegative512BitSize; + +constexpr int32 kMax12UnsignedImm = 4096; +constexpr int32 kMax13UnsignedImm = 8192; +constexpr int32 kMax16UnsignedImm = 65535; + +/* Dedicated for Vector */ +constexpr int32 kMinImmVal = -128; +constexpr int32 kMaxImmVal = 255; + +/* aarch64 assembly takes up to 24-bits */ +constexpr uint32 kMaxImmVal24Bits = 24; + +constexpr uint32 kDecimalMax = 10; + +constexpr double kMicroSecPerMilliSec = 1000.0; + +constexpr double kPercent = 100.0; + +inline bool IsPowerOf2Const(uint64 i) { + return (i & (i - 1)) == 0; +} + +inline uint64 RoundUpConst(uint64 offset, uint64 align) { + return (-align) & (offset + align - 1); +} + +inline bool IsPowerOf2(uint64 i) { + return IsPowerOf2Const(i); +} + +/* align must be a power of 2 */ +inline uint64 RoundUp(uint64 offset, uint64 align) { + if (align == 0) { + return offset; + } + ASSERT(IsPowerOf2(align), "align must be power of 2!"); + return RoundUpConst(offset, align); +} + +inline bool IsAlignedTo(uint64 offset, uint64 align) { + ASSERT(IsPowerOf2(align), "align must be power of 2!"); + return (offset & (align - 1)) == 0; +} +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_BE_COMMON_UTILS_H */ diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h index fd12a8307cb7e08227d5cb7b0889dc3abc89712c..8835a97bd9ad4122ed17d19024b719713a88eaa4 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -104,6 +104,7 @@ class AArch64CGFunc : public CGFunc { MIRType *LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector **parmList) const; RegOperand &GetOrCreateResOperand(const BaseNode &parent, PrimType primType); + MIRStructType *GetLmbcStructArgType(BaseNode &stmt, int32 argNo); void IntrinsifyGetAndAddInt(ListOperand &srcOpnds, PrimType pty); void IntrinsifyGetAndSetInt(ListOperand &srcOpnds, PrimType pty); @@ -128,7 +129,7 @@ class AArch64CGFunc : public CGFunc { MemOperand *FixLargeMemOpnd(MOperator mOp, MemOperand &memOpnd, uint32 dSize, uint32 opndIdx); uint32 LmbcFindTotalStkUsed(std::vector *paramList); uint32 LmbcTotalRegsUsed(); - bool LmbcSmallAggForRet(const BlkassignoffNode &bNode, const Operand *src); + bool LmbcSmallAggForRet(BaseNode &bNode, Operand *src); bool LmbcSmallAggForCall(BlkassignoffNode &bNode, const Operand *src, std::vector **parmList); void SelectAggDassign(DassignNode &stmt) override; void SelectIassign(IassignNode &stmt) override; @@ -874,7 +875,7 @@ class AArch64CGFunc : public CGFunc { void SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRType &structType, ListOperand &srcOpnds, int32 offset, AArch64CallConvImpl &parmLocator, FieldID fieldID); - void SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, ListOperand &srcOpnds, + void SelectParmListIreadSmallAggregate(BaseNode &iread, MIRType &structType, ListOperand &srcOpnds, int32 offset, AArch64CallConvImpl &parmLocator); void SelectParmListDreadLargeAggregate(const MIRSymbol &sym, MIRType &structType, ListOperand &srcOpnds, @@ -883,12 +884,12 @@ class AArch64CGFunc : public CGFunc { AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 fromOffset); void CreateCallStructMemcpyToParamReg(MIRType &structType, int32 structCopyOffset, AArch64CallConvImpl &parmLocator, ListOperand &srcOpnds); - void SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, AArch64CallConvImpl &parmLocator, - int32 &structCopyOffset); + void SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, + AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 argNo); size_t SelectParmListGetStructReturnSize(StmtNode &naryNode); bool MarkParmListCall(BaseNode &expr); - void SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset); - void SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs); + void SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int32 argNo); + void SelectParmListPreprocess(StmtNode &naryNode, size_t start, std::set &specialArgs); void SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bool isCallNative = false); Operand *SelectClearStackCallParam(const AddrofNode &expr, int64 &offsetValue); void SelectClearStackCallParmList(const StmtNode &naryNode, ListOperand &srcOpnds, diff --git a/src/mapleall/maple_be/include/cg/cg_ssa_pre.h+ b/src/mapleall/maple_be/include/cg/cg_ssa_pre.h+ new file mode 100644 index 0000000000000000000000000000000000000000..2effcdd651eb868fc01484ec750bab0fac353f70 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/cg_ssa_pre.h+ @@ -0,0 +1,210 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_CG_INCLUDE_CG_SSU_PRE_H +#define MAPLEBE_CG_INCLUDE_CG_SSU_PRE_H +#include +#include "mempool.h" +#include "mempool_allocator.h" +#include "cg_dominance.h" + +// Use SSAPRE to determine where to insert saves for callee-saved registers. +// The external interface is DoSavePlacementOpt(). Class SsaPreWorkCand is used +// as input/output interface. + +namespace maplebe { + +typedef uint32 BBId; + +// This must have been constructed by the caller of DoSavePlacementOpt() and +// passed to it as parameter. The caller of DoSavePlacementOpt() describes +// the problem via occBBs. DoSavePlacementOpt()'s outputs are returned to the +// caller by setting saveAtEntryBBs. +class SsaPreWorkCand { + public: + explicit SsaPreWorkCand(MapleAllocator *alloc): + occBBs(alloc->Adapter()), + saveAtEntryBBs(alloc->Adapter()) {} + // inputs + MapleSet occBBs; // Id's of BBs with appearances of the callee-saved reg + // outputs + MapleSet saveAtEntryBBs; // Id's of BBs to insert saves of the register at BB entry + bool saveAtProlog = false; // if true, no shrinkwrapping can be done and + // the other outputs can be ignored +}; + +extern void DoSavePlacementOpt(CGFunc *f, DomAnalysis *dom, SsaPreWorkCand *workCand); + +enum AOccType { + kAOccUndef, + kAOccReal, + kAOccPhi, + kAOccPhiOpnd, + kAOccExit, +}; + +class Occ { + public: + Occ(AOccType ty, BB *bb) : occTy(ty), cgbb(bb) {} + virtual ~Occ() = default; + + virtual void Dump() const = 0; + bool IsDominate(DomAnalysis *dom, const Occ *occ) const { + return dom->Dominate(*cgbb, *occ->cgbb); + } + + AOccType occTy; + uint32 classId = 0; + BB *cgbb; // the BB it occurs in + Occ *def = nullptr; // points to its single def +}; + +class RealOcc : public Occ { + public: + RealOcc(BB *bb): Occ(kAOccReal, bb) {} + virtual ~RealOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "RealOcc at bb" << cgbb->GetId(); + LogInfo::MapleLogger() << " classId" << classId; + } + + bool redundant = true; +}; + +class PhiOcc; + +class PhiOpndOcc : public Occ { + public: + explicit PhiOpndOcc(BB *bb): Occ(kAOccPhiOpnd, bb) {} + virtual ~PhiOpndOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "PhiOpndOcc at bb" << cgbb->GetId() << " classId" << classId; + } + + + PhiOcc *defPhiOcc = nullptr; // its lhs definition + bool hasRealUse = false; + bool insertHere = false; +}; + +class PhiOcc : public Occ { + public: + PhiOcc(BB *bb, MapleAllocator &alloc) + : Occ(kAOccPhi, bb), phiOpnds(alloc.Adapter()) {} + virtual ~PhiOcc() = default; + + bool WillBeAvail() const { + return isCanBeAvail && !isLater; + } + + void Dump() const { + LogInfo::MapleLogger() << "PhiOcc at bb" << cgbb->GetId() << " classId" << classId << " Phi["; + for (size_t i = 0; i < phiOpnds.size(); i++) { + phiOpnds[i]->Dump(); + if (i != phiOpnds.size() - 1) { + LogInfo::MapleLogger() << ", "; + } + } + LogInfo::MapleLogger() << "]"; + } + + + bool isDownsafe = true; + bool isCanBeAvail = true; + bool isLater = true; + MapleVector phiOpnds; +}; + +class ExitOcc : public Occ { + public: + explicit ExitOcc(BB *bb) : Occ(kAOccExit, bb) {} + virtual ~ExitOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "ExitOcc at bb" << cgbb->GetId(); + } +}; + +class SSAPre { + public: + SSAPre(CGFunc *cgfunc, DomAnalysis *dm, MemPool *memPool, SsaPreWorkCand *wkcand, bool enDebug) + : cgFunc(cgfunc), + dom(dm), + preMp(memPool), + preAllocator(memPool), + workCand(wkcand), + fullyAntBBs(cgfunc->GetAllBBs().size(), true, preAllocator.Adapter()), + phiDfns(std::less(), preAllocator.Adapter()), + classCount(0), + realOccs(preAllocator.Adapter()), + allOccs(preAllocator.Adapter()), + phiOccs(preAllocator.Adapter()), + exitOccs(preAllocator.Adapter()), + enabledDebug(enDebug) {} + ~SSAPre() = default; + + void ApplySSAPre(); + + private: + // step 6 methods + void CodeMotion(); + // step 5 methods + void Finalize(); + // step 4 methods + void ResetCanBeAvail(PhiOcc *phi) const; + void ComputeCanBeAvail() const; + void ResetLater(PhiOcc *phi) const; + void ComputeLater() const; + // step 3 methods + void ResetDownsafe(const PhiOpndOcc *phiOpnd) const; + void ComputeDownsafe() const; + // step 2 methods + void Rename(); + // step 1 methods + void GetIterDomFrontier(const BB *bb, MapleSet *dfset) const { + for (BBId bbid : dom->GetIdomFrontier(bb->GetId())) { + (void)dfset->insert(dom->GetDtDfnItem(bbid)); + } + } + void FormPhis(); + void CreateSortedOccs(); + // step 0 methods + void PropagateNotAnt(BB *bb, std::set *visitedBBs); + void FormRealsNExits(); + + CGFunc *cgFunc; + DomAnalysis *dom; + MemPool *preMp; + MapleAllocator preAllocator; + SsaPreWorkCand *workCand; + // step 0 + MapleVector fullyAntBBs; // index is BBid; true if occ is fully anticipated at BB entry + // step 1 phi insertion data structures: + MapleSet phiDfns; // set by FormPhis(); set of BBs in terms of their + // dfn's; index into dominance->dt_preorder to get + // their bbid's + // step 2 renaming + uint32 classCount; // for assigning new class id + // the following 4 lists are all maintained in order of dt_preorder + MapleVector realOccs; + MapleVector allOccs; + MapleVector phiOccs; + MapleVector exitOccs; + bool enabledDebug; +}; + +}; // namespace maplabe +#endif // MAPLEBE_CG_INCLUDE_CG_SSA_PRE_H diff --git a/src/mapleall/maple_be/include/cg/cg_ssu_pre.h+ b/src/mapleall/maple_be/include/cg/cg_ssu_pre.h+ new file mode 100644 index 0000000000000000000000000000000000000000..d2bc7c65d0a3ee4ee3fba9a174b43cdd4483778a --- /dev/null +++ b/src/mapleall/maple_be/include/cg/cg_ssu_pre.h+ @@ -0,0 +1,234 @@ +/* + * Copyright (c) [2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_CG_INCLUDE_CGSSUPRE_H +#define MAPLEBE_CG_INCLUDE_CGSSUPRE_H +#include +#include "mempool.h" +#include "mempool_allocator.h" +#include "cg_dominance.h" + +// Use SSUPRE to determine where to insert restores for callee-saved registers. +// The external interface is DoRestorePlacementOpt(). Class SPreWorkCand is used +// as input/output interface. + +namespace maplebe { + +typedef uint32 BBId; + +// This must have been constructed by the caller of DoRestorePlacementOpt() and +// passed to it as parameter. The caller of DoRestorePlacementOpt() describes +// the problem via occBBs and saveBBs. DoRestorePlacementOpt()'s outputs are +// returned to the caller by setting restoreAtEntryBBs and restoreAtExitBBs. +class SPreWorkCand { + public: + explicit SPreWorkCand(MapleAllocator *alloc): + occBBs(alloc->Adapter()), saveBBs(alloc->Adapter()), + restoreAtEntryBBs(alloc->Adapter()), restoreAtExitBBs(alloc->Adapter()) {} + // inputs + MapleSet occBBs; // Id's of BBs with appearances of the callee-saved reg + MapleSet saveBBs; // Id's of BBs with saves of the callee-saved reg + // outputs + MapleSet restoreAtEntryBBs; // Id's of BBs to insert restores of the register at BB entry + MapleSet restoreAtExitBBs; // Id's of BBs to insert restores of the register at BB exit + bool restoreAtEpilog = false; // if true, no shrinkwrapping can be done and + // the other outputs can be ignored +}; + +extern void DoRestorePlacementOpt(CGFunc *f, PostDomAnalysis *pdom, SPreWorkCand *workCand); + +enum SOccType { + kSOccUndef, + kSOccReal, + kSOccLambda, + kSOccLambdaRes, + kSOccEntry, + kSOccKill, +}; + +class SOcc { + public: + SOcc(SOccType ty, BB *bb) : occTy(ty), cgbb(bb) {} + virtual ~SOcc() = default; + + virtual void Dump() const = 0; + bool IsPostDominate(PostDomAnalysis *pdom, const SOcc *occ) const { + return pdom->PostDominate(*cgbb, *occ->cgbb); + } + + public: + SOccType occTy; + uint32 classId = 0; + BB *cgbb; // the BB it occurs in + SOcc *use = nullptr; // points to its single use +}; + +class SRealOcc : public SOcc { + public: + SRealOcc(BB *bb): SOcc(kSOccReal, bb) {} + virtual ~SRealOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "RealOcc at bb" << cgbb->GetId(); + LogInfo::MapleLogger() << " classId" << classId; + } + + public: + bool redundant = true; +}; + +class SLambdaOcc; + +class SLambdaResOcc : public SOcc { + public: + explicit SLambdaResOcc(BB *bb): SOcc(kSOccLambdaRes, bb) {} + virtual ~SLambdaResOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "LambdaResOcc at bb" << cgbb->GetId() << " classId" << classId; + } + + public: + SLambdaOcc *useLambdaOcc = nullptr; // its rhs use + bool hasRealUse = false; + bool insertHere = false; +}; + +class SLambdaOcc : public SOcc { + public: + SLambdaOcc(BB *bb, MapleAllocator &alloc) + : SOcc(kSOccLambda, bb), lambdaRes(alloc.Adapter()) {} + virtual ~SLambdaOcc() = default; + + bool WillBeAnt() const { + return isCanBeAnt && !isEarlier; + } + + void Dump() const { + LogInfo::MapleLogger() << "LambdaOcc at bb" << cgbb->GetId() << " classId" << classId << " Lambda["; + for (size_t i = 0; i < lambdaRes.size(); i++) { + lambdaRes[i]->Dump(); + if (i != lambdaRes.size() - 1) { + LogInfo::MapleLogger() << ", "; + } + } + LogInfo::MapleLogger() << "]"; + } + + public: + bool isUpsafe = true; + bool isCanBeAnt = true; + bool isEarlier = true; + MapleVector lambdaRes; +}; + +class SEntryOcc : public SOcc { + public: + explicit SEntryOcc(BB *bb) : SOcc(kSOccEntry, bb) {} + virtual ~SEntryOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "EntryOcc at bb" << cgbb->GetId(); + } +}; + +class SKillOcc : public SOcc { + public: + explicit SKillOcc(BB *bb) : SOcc(kSOccKill, bb) {} + virtual ~SKillOcc() = default; + + void Dump() const { + LogInfo::MapleLogger() << "KillOcc at bb" << cgbb->GetId(); + } +}; + +class SSUPre { + public: + SSUPre(CGFunc *cgfunc, PostDomAnalysis *pd, MemPool *memPool, SPreWorkCand *wkcand, bool enDebug) + : cgFunc(cgfunc), + pdom(pd), + spreMp(memPool), + spreAllocator(memPool), + workCand(wkcand), + fullyAvailBBs(cgfunc->GetAllBBs().size(), true, spreAllocator.Adapter()), + realOccDfns(std::less(), spreAllocator.Adapter()), + lambdaDfns(std::less(), spreAllocator.Adapter()), + classCount(0), + realOccs(spreAllocator.Adapter()), + allOccs(spreAllocator.Adapter()), + lambdaOccs(spreAllocator.Adapter()), + entryOccs(spreAllocator.Adapter()), + enabledDebug(enDebug) { + CreateEntryOcc(cgfunc->GetFirstBB()); + } + ~SSUPre() = default; + + void ApplySSUPre(); + + private: + // step 6 methods + void CodeMotion(); + // step 5 methods + void Finalize(); + // step 4 methods + void ResetCanBeAnt(SLambdaOcc *lambda) const; + void ComputeCanBeAnt() const; + void ResetEarlier(SLambdaOcc *lambda) const; + void ComputeEarlier() const; + // step 3 methods + void ResetUpsafe(const SLambdaResOcc *lambdaRes) const; + void ComputeUpsafe() const; + // step 2 methods + void Rename(); + // step 1 methods + void GetIterPdomFrontier(const BB *bb, MapleSet *pdfset) const { + for (BBId bbid : pdom->GetIpdomFrontier(bb->GetId())) { + (void)pdfset->insert(pdom->GetPdtDfnItem(bbid)); + } + } + void FormLambdas(); + void FormLambdaRes(); + void CreateSortedOccs(); + // step 0 methods + void CreateEntryOcc(BB *bb) { + SEntryOcc *entryOcc = spreMp->New(bb); + entryOccs.push_back(entryOcc); + } + void PropagateNotAvail(BB *bb, std::set *visitedBBs); + void FormReals(); + + CGFunc *cgFunc; + PostDomAnalysis *pdom; + MemPool *spreMp; + MapleAllocator spreAllocator; + SPreWorkCand *workCand; + // following are set of BBs in terms of their dfn's; index into + // dominance->pdt_preorder to get their bbid's + // step 0 + MapleVector fullyAvailBBs; // index is BBid; true if occ is fully available at BB exit + MapleSet realOccDfns; // set by FormReals() + // step 1 lambda insertion data structures: + MapleSet lambdaDfns; // set by FormLambdas() + // step 2 renaming + uint32 classCount; // for assigning new class id + // the following 4 lists are all maintained in order of pdt_preorder + MapleVector realOccs; // both real and kill occurrences + MapleVector allOccs; + MapleVector lambdaOccs; + MapleVector entryOccs; + bool enabledDebug; +}; + +}; // namespace maplabe +#endif // MAPLEBE_CG_INCLUDE_CGSSUPRE_H diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h b/src/mapleall/maple_be/include/cg/cgfunc.h index 039656f5079b6da72c4cfa447142899509696c40..d36a7dceb63abbffd1dfa8af8f6f65716fb89b1c 100644 --- a/src/mapleall/maple_be/include/cg/cgfunc.h +++ b/src/mapleall/maple_be/include/cg/cgfunc.h @@ -1163,7 +1163,7 @@ class CGFunc { MapleMap vregsToPregsMap; uint32 totalInsns = 0; int32 structCopySize = 0; - int32 maxParamStackSize; + int32 maxParamStackSize = 0; static constexpr int kRegIncrStepLen = 80; /* reg number increate step length */ bool hasVLAOrAlloca = false; diff --git a/src/mapleall/maple_be/include/cg/cgfunc.h+ b/src/mapleall/maple_be/include/cg/cgfunc.h+ new file mode 100644 index 0000000000000000000000000000000000000000..ff3b2c8766e37bff130851f5d9193b03af40489c --- /dev/null +++ b/src/mapleall/maple_be/include/cg/cgfunc.h+ @@ -0,0 +1,1075 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_CGFUNC_H +#define MAPLEBE_INCLUDE_CG_CGFUNC_H + +#include "becommon.h" +#include "operand.h" +#include "eh_func.h" +#include "memlayout.h" +#include "cgbb.h" +#include "reg_alloc.h" +#include "cfi.h" +#include "dbg.h" +#include "reaching.h" +#include "cg_cfg.h" +/* MapleIR headers. */ +#include "mir_parser.h" +#include "mir_function.h" +#include "debug_info.h" + +/* Maple MP header */ +#include "mempool_allocator.h" + +namespace maplebe { +constexpr int32 kBBLimit = 10000; +constexpr int32 kFreqBase = 10000; +struct MemOpndCmp { + bool operator()(const MemOperand *lhs, const MemOperand *rhs) const { + CHECK_FATAL(lhs != nullptr, "null ptr check"); + CHECK_FATAL(rhs != nullptr, "null ptr check"); + if (lhs == rhs) { + return false; + } + return (lhs->Less(*rhs)); + } +}; + +class SpillMemOperandSet { + public: + explicit SpillMemOperandSet(MapleAllocator &mallocator) : reuseSpillLocMem(mallocator.Adapter()) {} + + virtual ~SpillMemOperandSet() = default; + + void Add(MemOperand &op) { + (void)reuseSpillLocMem.insert(&op); + } + + void Remove(MemOperand &op) { + reuseSpillLocMem.erase(&op); + } + + MemOperand *GetOne() { + if (!reuseSpillLocMem.empty()) { + MemOperand *res = *reuseSpillLocMem.begin(); + reuseSpillLocMem.erase(res); + return res; + } + return nullptr; + } + + private: + MapleSet reuseSpillLocMem; +}; + +#if TARGARM32 +class LiveRange; +#endif /* TARGARM32 */ +constexpr uint32 kVRegisterNumber = 80; +class CGFunc { + public: + enum ShiftDirection : uint8 { + kShiftLeft, + kShiftAright, + kShiftLright + }; + + CGFunc(MIRModule &mod, CG &cg, MIRFunction &mirFunc, BECommon &beCommon, MemPool &memPool, + StackMemPool &stackMp, MapleAllocator &mallocator, uint32 funcId); + virtual ~CGFunc(); + + const std::string &GetName() const { + return func.GetName(); + } + + const MapleMap &GetLabelAndValueMap() const { + return labelMap; + } + + void InsertLabelMap(LabelIdx idx, uint64 value) { + ASSERT(labelMap.find(idx) == labelMap.end(), "idx already exist"); + labelMap[idx] = value; + } + + void LayoutStackFrame() { + CHECK_FATAL(memLayout != nullptr, "memLayout should has been initialized in constructor"); + memLayout->LayoutStackFrame(structCopySize, maxParamStackSize); + } + + bool HasCall() const { + return func.HasCall(); + } + + bool HasVLAOrAlloca() const { + return hasVLAOrAlloca; + } + + void SetRD(ReachingDefinition *paramRd) { + reachingDef = paramRd; + } + + bool GetRDStatus() const { + return (reachingDef != nullptr); + } + + ReachingDefinition *GetRD() { + return reachingDef; + } + + EHFunc *BuildEHFunc(); + virtual void GenSaveMethodInfoCode(BB &bb) = 0; + virtual void GenerateCleanupCode(BB &bb) = 0; + virtual bool NeedCleanup() = 0; + virtual void GenerateCleanupCodeForExtEpilog(BB &bb) = 0; + + void GenerateLoc(StmtNode *stmt, unsigned &lastSrcLoc, unsigned &lastMplLoc); + void GenerateInstruction(); + bool MemBarOpt(StmtNode &membar); + void UpdateCallBBFrequency(); + void HandleFunction(); + void ProcessExitBBVec(); + virtual void MergeReturn() = 0; + void TraverseAndClearCatchMark(BB &bb); + void MarkCatchBBs(); + void MarkCleanupEntryBB(); + void SetCleanupLabel(BB &cleanupEntry); + bool ExitbbNotInCleanupArea(const BB &bb) const; + uint32 GetMaxRegNum() const { + return maxRegCount; + }; + void DumpCFG() const; + void DumpCGIR() const; + void DumpLoop() const; + void ClearLoopInfo(); + Operand *HandleExpr(const BaseNode &parent, BaseNode &expr); + virtual void DetermineReturnTypeofCall() = 0; + /* handle rc reset */ + virtual void HandleRCCall(bool begin, const MIRSymbol *retRef = nullptr) = 0; + virtual void HandleRetCleanup(NaryStmtNode &retNode) = 0; + /* select stmt */ + virtual void SelectDassign(DassignNode &stmt, Operand &opnd0) = 0; + virtual void SelectRegassign(RegassignNode &stmt, Operand &opnd0) = 0; + virtual void SelectAssertNull(UnaryStmtNode &stmt) = 0; + virtual void SelectAsm(AsmNode &node) = 0; + virtual void SelectAggDassign(DassignNode &stmt) = 0; + virtual void SelectIassign(IassignNode &stmt) = 0; + virtual void SelectAggIassign(IassignNode &stmt, Operand &lhsAddrOpnd) = 0; + virtual void SelectReturn(Operand *opnd) = 0; + virtual void SelectIgoto(Operand *opnd0) = 0; + virtual void SelectCondGoto(CondGotoNode &stmt, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectCondSpecialCase1(CondGotoNode &stmt, BaseNode &opnd0) = 0; + virtual void SelectCondSpecialCase2(const CondGotoNode &stmt, BaseNode &opnd0) = 0; + virtual void SelectGoto(GotoNode &stmt) = 0; + virtual void SelectCall(CallNode &callNode) = 0; + virtual void SelectIcall(IcallNode &icallNode, Operand &fptrOpnd) = 0; + virtual void SelectIntrinCall(IntrinsiccallNode &intrinsiccallNode) = 0; + virtual Operand *SelectIntrinsicOpWithOneParam(IntrinsicopNode &intrinopNode, std::string name) = 0; + virtual Operand *SelectCclz(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCctz(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCpopcount(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCparity(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCclrsb(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCisaligned(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCalignup(IntrinsicopNode &intrinopNode) = 0; + virtual Operand *SelectCaligndown(IntrinsicopNode &intrinopNode) = 0; + virtual void SelectMembar(StmtNode &membar) = 0; + virtual void SelectComment(CommentNode &comment) = 0; + virtual void HandleCatch() = 0; + + /* select expr */ + virtual Operand *SelectDread(const BaseNode &parent, AddrofNode &expr) = 0; + virtual RegOperand *SelectRegread(RegreadNode &expr) = 0; + virtual Operand *SelectAddrof(AddrofNode &expr) = 0; + virtual Operand &SelectAddrofFunc(AddroffuncNode &expr) = 0; + virtual Operand &SelectAddrofLabel(AddroflabelNode &expr) = 0; + virtual Operand *SelectIread(const BaseNode &parent, IreadNode &expr) = 0; + virtual Operand *SelectIntConst(MIRIntConst &intConst) = 0; + virtual Operand *SelectFloatConst(MIRFloatConst &floatConst) = 0; + virtual Operand *SelectDoubleConst(MIRDoubleConst &doubleConst) = 0; + virtual Operand *SelectStrConst(MIRStrConst &strConst) = 0; + virtual Operand *SelectStr16Const(MIRStr16Const &strConst) = 0; + virtual void SelectAdd(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectAdd(BinaryNode &node, Operand &opnd0, Operand &opnd1, const BaseNode &parent) = 0; + virtual void SelectMadd(Operand &resOpnd, Operand &opndM0, Operand &opndM1, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectMadd(BinaryNode &node, Operand &opndM0, Operand &opndM1, Operand &opnd1) = 0; + virtual Operand &SelectCGArrayElemAdd(BinaryNode &node) = 0; + virtual Operand *SelectShift(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectMpy(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectMpy(BinaryNode &node, Operand &opnd0, Operand &opnd1, const BaseNode &parent) = 0; + virtual Operand *SelectRem(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectDiv(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectDiv(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual Operand *SelectSub(BinaryNode &node, Operand &opnd0, Operand &opnd1, const BaseNode &parent) = 0; + virtual void SelectSub(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectBand(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectBand(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectLand(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual Operand *SelectLor(BinaryNode &node, Operand &opnd0, Operand &opnd1, bool parentIsBr = false) = 0; + virtual void SelectMin(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectMin(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectMax(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectMax(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual Operand *SelectCmpOp(CompareNode &node, Operand &opnd0, Operand &opnd1, const BaseNode &parent) = 0; + virtual Operand *SelectBior(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectBior(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectBxor(BinaryNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual void SelectBxor(Operand &resOpnd, Operand &opnd0, Operand &opnd1, PrimType primType) = 0; + virtual Operand *SelectAbs(UnaryNode &node, Operand &opnd0) = 0; + virtual Operand *SelectBnot(UnaryNode &node, Operand &opnd0) = 0; + virtual Operand *SelectExtractbits(ExtractbitsNode &node, Operand &opnd0, const BaseNode &parent) = 0; + virtual Operand *SelectDepositBits(DepositbitsNode &node, Operand &opnd0, Operand &opnd1) = 0; + virtual Operand *SelectLnot(UnaryNode &node, Operand &opnd0) = 0; + virtual Operand *SelectNeg(UnaryNode &node, Operand &opnd0) = 0; + virtual Operand *SelectRecip(UnaryNode &node, Operand &opnd0) = 0; + virtual Operand *SelectSqrt(UnaryNode &node, Operand &opnd0) = 0; + virtual Operand *SelectCeil(TypeCvtNode &node, Operand &opnd0) = 0; + virtual Operand *SelectFloor(TypeCvtNode &node, Operand &opnd0) = 0; + virtual Operand *SelectRetype(TypeCvtNode &node, Operand &opnd0) = 0; + virtual Operand *SelectRound(TypeCvtNode &node, Operand &opnd0) = 0; + virtual Operand *SelectCvt(const BaseNode &parent, TypeCvtNode &node, Operand &opnd0) = 0; + virtual Operand *SelectTrunc(TypeCvtNode &node, Operand &opnd0) = 0; + virtual Operand *SelectSelect(TernaryNode &node, Operand &opnd0, Operand &opnd1, Operand &opnd2, + bool isCompare = false) = 0; + virtual Operand *SelectMalloc(UnaryNode &call, Operand &opnd0) = 0; + virtual RegOperand &SelectCopy(Operand &src, PrimType srcType, PrimType dstType) = 0; + virtual Operand *SelectAlloca(UnaryNode &call, Operand &opnd0) = 0; + virtual Operand *SelectGCMalloc(GCMallocNode &call) = 0; + virtual Operand *SelectJarrayMalloc(JarrayMallocNode &call, Operand &opnd0) = 0; + virtual void SelectRangeGoto(RangeGotoNode &rangeGotoNode, Operand &opnd0) = 0; + virtual Operand *SelectLazyLoad(Operand &opnd0, PrimType primType) = 0; + virtual Operand *SelectLazyLoadStatic(MIRSymbol &st, int64 offset, PrimType primType) = 0; + virtual Operand *SelectLoadArrayClassCache(MIRSymbol &st, int64 offset, PrimType primType) = 0; + virtual void GenerateYieldpoint(BB &bb) = 0; + virtual Operand &ProcessReturnReg(PrimType primType, int32 sReg) = 0; + + virtual Operand &GetOrCreateRflag() = 0; + virtual const Operand *GetRflag() const = 0; + virtual const Operand *GetFloatRflag() const = 0; + virtual const LabelOperand *GetLabelOperand(LabelIdx labIdx) const = 0; + virtual LabelOperand &GetOrCreateLabelOperand(LabelIdx labIdx) = 0; + virtual LabelOperand &GetOrCreateLabelOperand(BB &bb) = 0; + virtual RegOperand &CreateVirtualRegisterOperand(regno_t vRegNO) = 0; + virtual RegOperand &GetOrCreateVirtualRegisterOperand(regno_t vRegNO) = 0; + virtual RegOperand &GetOrCreateFramePointerRegOperand() = 0; + virtual RegOperand &GetOrCreateStackBaseRegOperand() = 0; + virtual int32 GetBaseOffset(const SymbolAlloc &symbolAlloc) = 0; + virtual Operand &GetZeroOpnd(uint32 size) = 0; + virtual Operand &CreateCfiRegOperand(uint32 reg, uint32 size) = 0; + virtual Operand &GetTargetRetOperand(PrimType primType, int32 sReg) = 0; + virtual Operand &CreateImmOperand(PrimType primType, int64 val) = 0; + virtual Operand *CreateZeroOperand(PrimType primType) = 0; + + virtual bool IsFrameReg(const RegOperand &opnd) const = 0; + + /* For Neon intrinsics */ + virtual RegOperand *SelectVectorBinOp(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, + PrimType oTyp2, Opcode opc) = 0; + virtual RegOperand *SelectVectorBitwiseOp(PrimType rType, Operand *o1, PrimType oty1, Operand *o2, + PrimType oty2, Opcode opc) = 0;; + virtual RegOperand *SelectVectorCompareZero(Operand *o1, PrimType oty1, Operand *o2, Opcode opc) = 0; + virtual RegOperand *SelectVectorCompare(Operand *o1, PrimType oty1, Operand *o2, PrimType oty2, Opcode opc) = 0; + virtual RegOperand *SelectVectorFromScalar(PrimType pType, Operand *opnd, PrimType sType) = 0; + virtual RegOperand *SelectVectorGetHigh(PrimType rType, Operand *src) = 0; + virtual RegOperand *SelectVectorGetLow(PrimType rType, Operand *src) = 0; + virtual RegOperand *SelectVectorGetElement(PrimType rType, Operand *src, PrimType sType, int32 lane) = 0; + virtual RegOperand *SelectVectorMadd(Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2, Operand *o3, + PrimType oTyp3) = 0; + virtual RegOperand *SelectVectorMerge(PrimType rTyp, Operand *o1, Operand *o2, int32 iNum) = 0; + virtual RegOperand *SelectVectorMull(PrimType rType, Operand *o1, PrimType oTyp1, Operand *o2, PrimType oTyp2) = 0; + virtual RegOperand *SelectVectorNarrow(PrimType rType, Operand *o1, PrimType otyp, bool isLow) = 0; + virtual RegOperand *SelectVectorNeg(PrimType rType, Operand *o1) = 0; + virtual RegOperand *SelectVectorNot(PrimType rType, Operand *o1) = 0; + virtual RegOperand *SelectVectorPairwiseAdd(PrimType rType, Operand *src, PrimType sType) = 0; + virtual RegOperand *SelectVectorReverse(PrimType rtype, Operand *src, PrimType stype, uint32 size) = 0; + virtual RegOperand *SelectVectorSetElement(Operand *eOp, PrimType eTyp, Operand *vOpd, PrimType vTyp, int32 lane) = 0; + virtual RegOperand *SelectVectorShift(PrimType rType, Operand *o1, Operand *o2, Opcode opc) = 0; + virtual RegOperand *SelectVectorShiftImm(PrimType rType, Operand *o1, Operand *imm, int32 sVal, bool isLeft) = 0; + virtual RegOperand *SelectVectorShiftRNarrow(PrimType rType, Operand *o1, PrimType oType, + Operand *o2, bool isLow) = 0; + virtual RegOperand *SelectVectorSum(PrimType rtype, Operand *o1, PrimType oType) = 0; + virtual RegOperand *SelectVectorTableLookup(PrimType rType, Operand *o1, Operand *o2) = 0; + + /* For ebo issue. */ + virtual Operand *GetTrueOpnd() { + return nullptr; + } + virtual void ClearUnreachableGotInfos(BB &bb) { + (void)bb; + }; + virtual void ClearUnreachableConstInfos(BB &bb) { + (void)bb; + }; + virtual void SplitStrLdrPair() {} + LabelIdx CreateLabel(); + + virtual Operand &CreateFPImmZero(PrimType primType) = 0; + + RegOperand *GetVirtualRegisterOperand(regno_t vRegNO) { + auto it = vRegOperandTable.find(vRegNO); + ASSERT(it != vRegOperandTable.end(), ""); + return it->second; + } + + Operand &CreateCfiImmOperand(int64 val, uint32 size) { + return *memPool->New(val, size); + } + + Operand &CreateCfiStrOperand(const std::string &str) { + return *memPool->New(str, *memPool); + } + + bool IsSpecialPseudoRegister(PregIdx spr) const { + return spr < 0; + } + + regno_t NewVReg(RegType regType, uint32 size) { + /* when vRegCount reach to maxRegCount, maxRegCount limit adds 80 every time */ + /* and vRegTable increases 80 elements. */ + if (vRegCount >= maxRegCount) { + maxRegCount += kRegIncrStepLen; + vRegTable.resize(maxRegCount); + } +#if TARGAARCH64 || TARGX86_64 || TARGRISCV64 + if (size < k4ByteSize) { + size = k4ByteSize; + } + ASSERT(size == k4ByteSize || size == k8ByteSize || size == k16ByteSize, "check size"); +#endif + new (&vRegTable[vRegCount]) VirtualRegNode(regType, size); + return vRegCount++; + } + + virtual regno_t NewVRflag() { + return 0; + } + + RegType GetRegTyFromPrimTy(PrimType primType) { + switch (primType) { + case PTY_u1: + case PTY_i8: + case PTY_u8: + case PTY_i16: + case PTY_u16: + case PTY_i32: + case PTY_u32: + case PTY_i64: + case PTY_u64: + case PTY_a32: + case PTY_a64: + case PTY_ptr: + case PTY_agg: + return kRegTyInt; + case PTY_f32: + case PTY_f64: + case PTY_v2i32: + case PTY_v2u32: + case PTY_v2i64: + case PTY_v2u64: + case PTY_v2f32: + case PTY_v2f64: + case PTY_v4i16: + case PTY_v4u16: + case PTY_v4i32: + case PTY_v4u32: + case PTY_v4f32: + case PTY_v8i8: + case PTY_v8u8: + case PTY_v8i16: + case PTY_v8u16: + case PTY_v16i8: + case PTY_v16u8: + return kRegTyFloat; + default: + ASSERT(false, "Unexpected pty"); + return kRegTyUndef; + } + } + + /* return Register Type */ + virtual RegType GetRegisterType(regno_t rNum) const { + CHECK(rNum < vRegTable.size(), "index out of range in GetVRegSize"); + return vRegTable[rNum].GetType(); + } + + uint32 GetMaxVReg() const { + return vRegCount; + } + + uint32 GetVRegSize(regno_t vregNum) { + CHECK(vregNum < vRegTable.size(), "index out of range in GetVRegSize"); + return vRegTable[vregNum].GetSize(); + } + + MIRSymbol *GetRetRefSymbol(BaseNode &expr); + void GenerateCfiPrologEpilog(); + + void PatchLongBranch(); + + virtual uint32 MaxCondBranchDistance() { + return INT_MAX; + } + + virtual void InsertJumpPad(Insn *) { + return; + } + + Operand *CreateDbgImmOperand(int64 val) { + return memPool->New(val); + } + + uint32 NumBBs() const { + return bbCnt; + } + +#if DEBUG + StIdx GetLocalVarReplacedByPreg(PregIdx reg) { + auto it = pregsToVarsMap->find(reg); + return it != pregsToVarsMap->end() ? it->second : StIdx(); + } +#endif + + void IncTotalNumberOfInstructions() { + totalInsns++; + } + + void DecTotalNumberOfInstructions() { + totalInsns--; + } + + int32 GetTotalNumberOfInstructions() const { + return totalInsns; + } + + int32 GetStructCopySize() const { + return structCopySize; + } + + int32 GetMaxParamStackSize() const { + return maxParamStackSize; + } + + virtual void ProcessLazyBinding() = 0; + + /* Debugging support */ + void SetDebugInfo(DebugInfo *dbgInfo) { + debugInfo = dbgInfo; + } + + void AddDIESymbolLocation(const MIRSymbol *sym, SymbolAlloc *loc); + + virtual void DBGFixCallFrameLocationOffsets() {}; + + /* Get And Set private members */ + CG *GetCG() { + return cg; + } + + const CG *GetCG() const { + return cg; + } + + const MIRModule &GetMirModule() const { + return mirModule; + } + + template + MIRConst *NewMirConst(T &mirConst) { + MIRConst *newConst = mirModule.GetMemPool()->New(mirConst.GetValue(), mirConst.GetType()); + return newConst; + } + + uint32 GetMIRSrcFileEndLineNum() const { + auto &srcFileInfo = mirModule.GetSrcFileInfo(); + if (!srcFileInfo.empty()) { + return srcFileInfo.back().second; + } else { + return 0; + } + } + + MIRFunction &GetFunction() { + return func; + } + + const MIRFunction &GetFunction() const { + return func; + } + + EHFunc *GetEHFunc() { + return ehFunc; + } + + const EHFunc *GetEHFunc() const { + return ehFunc; + } + + void SetEHFunc(EHFunc &ehFunction) { + ehFunc = &ehFunction; + } + + uint32 GetLabelIdx() { + return labelIdx; + } + + uint32 GetLabelIdx() const { + return labelIdx; + } + + void SetLabelIdx(uint32 idx) { + labelIdx = idx; + } + + LabelNode *GetStartLabel() { + return startLabel; + } + + const LabelNode *GetStartLabel() const { + return startLabel; + } + + void SetStartLabel(LabelNode &label) { + startLabel = &label; + } + + LabelNode *GetEndLabel() { + return endLabel; + } + + const LabelNode *GetEndLabel() const { + return endLabel; + } + + void SetEndLabel(LabelNode &label) { + endLabel = &label; + } + + LabelNode *GetCleanupLabel() { + return cleanupLabel; + } + + const LabelNode *GetCleanupLabel() const { + return cleanupLabel; + } + + void SetCleanupLabel(LabelNode &node) { + cleanupLabel = &node; + } + + BB *GetFirstBB() { + return firstBB; + } + + const BB *GetFirstBB() const { + return firstBB; + } + + void SetFirstBB(BB &bb) { + firstBB = &bb; + } + + BB *GetCleanupBB() { + return cleanupBB; + } + + const BB *GetCleanupBB() const { + return cleanupBB; + } + + void SetCleanupBB(BB &bb) { + cleanupBB = &bb; + } + + const BB *GetCleanupEntryBB() const { + return cleanupEntryBB; + } + + void SetCleanupEntryBB(BB &bb) { + cleanupEntryBB = &bb; + } + + BB *GetLastBB() { + return lastBB; + } + + const BB *GetLastBB() const { + return lastBB; + } + + void SetLastBB(BB &bb) { + lastBB = &bb; + } + + BB *GetCurBB() { + return curBB; + } + + const BB *GetCurBB() const { + return curBB; + } + + void SetCurBB(BB &bb) { + curBB = &bb; + } + + BB *GetDummyBB() { + return dummyBB; + } + + const BB *GetDummyBB() const { + return dummyBB; + } + + LabelIdx GetFirstCGGenLabelIdx() const { + return firstCGGenLabelIdx; + } + + MapleVector &GetExitBBsVec() { + return exitBBVec; + } + + const MapleVector GetExitBBsVec() const { + return exitBBVec; + } + + size_t ExitBBsVecSize() { + return exitBBVec.size(); + } + + bool IsExitBBsVecEmpty() const { + return exitBBVec.empty(); + } + + void EraseExitBBsVec(MapleVector::iterator it) { + exitBBVec.erase(it); + } + + void PushBackExitBBsVec(BB &bb) { + exitBBVec.emplace_back(&bb); + } + + void ClearExitBBsVec() { + exitBBVec.clear(); + } + + bool IsExitBB(const BB ¤tBB) { + for (BB *exitBB : exitBBVec) { + if (exitBB == ¤tBB) { + return true; + } + } + return false; + } + + BB *GetExitBB(int32 index) { + return exitBBVec.at(index); + } + + const BB *GetExitBB(int32 index) const { + return exitBBVec.at(index); + } + + void SetLab2BBMap(int32 index, BB &bb) { + lab2BBMap[index] = &bb; + } + + BB *GetBBFromLab2BBMap(int32 index) { + return lab2BBMap[index]; + } + + BECommon &GetBecommon() { + return beCommon; + } + + const BECommon GetBecommon() const { + return beCommon; + } + + MemLayout *GetMemlayout() { + return memLayout; + } + + const MemLayout *GetMemlayout() const { + return memLayout; + } + + void SetMemlayout(MemLayout &layout) { + memLayout = &layout; + } + + MemPool *GetMemoryPool() { + return memPool; + } + + const MemPool *GetMemoryPool() const { + return memPool; + } + + StackMemPool &GetStackMemPool() { + return stackMp; + } + + MapleAllocator *GetFuncScopeAllocator() { + return funcScopeAllocator; + } + + const MapleAllocator *GetFuncScopeAllocator() const { + return funcScopeAllocator; + } + + const MapleMap GetEmitStVec() const { + return emitStVec; + } + + MIRSymbol* GetEmitSt(uint32 id) { + return emitStVec[id]; + } + + void AddEmitSt(uint32 id, MIRSymbol &symbol) { + emitStVec[id] = &symbol; + } + + MapleVector &GetLoops() { + return loops; + } + + const MapleVector GetLoops() const { + return loops; + } + + void PushBackLoops(CGFuncLoops &loop) { + loops.emplace_back(&loop); + } + + MapleVector &GetAllBBs() { + return bbVec; + } + + BB *GetBBFromID(uint32 id) { + return bbVec[id]; + } + +#if TARGARM32 + MapleVector &GetSortedBBs() { + return sortedBBs; + } + + const MapleVector &GetSortedBBs() const { + return sortedBBs; + } + + void SetSortedBBs(const MapleVector &bbVec) { + sortedBBs = bbVec; + } + + MapleVector &GetLrVec() { + return lrVec; + } + + const MapleVector &GetLrVec() const { + return lrVec; + } + + void SetLrVec(const MapleVector &newLrVec) { + lrVec = newLrVec; + } +#endif /* TARGARM32 */ + + CGCFG *GetTheCFG() { + return theCFG; + } + + const CGCFG *GetTheCFG() const { + return theCFG; + } + + regno_t GetVirtualRegNOFromPseudoRegIdx(PregIdx idx) const { + return regno_t(idx + firstMapleIrVRegNO); + } + + bool GetHasProEpilogue() const { + return hasProEpilogue; + } + + void SetHasProEpilogue(bool state) { + hasProEpilogue = state; + } + + int32 GetDbgCallFrameOffset() const { + return dbgCallFrameOffset; + } + + void SetDbgCallFrameOffset(int32 val) { + dbgCallFrameOffset = val; + } + + BB *CreateNewBB() { + BB *bb = memPool->New(bbCnt++, *funcScopeAllocator); + bbVec.emplace_back(bb); + return bb; + } + + BB *CreateNewBB(bool unreachable, BB::BBKind kind, uint32 frequency) { + BB *newBB = CreateNewBB(); + newBB->SetKind(kind); + newBB->SetUnreachable(unreachable); + newBB->SetFrequency(frequency); + return newBB; + } + + BB *CreateNewBB(LabelIdx label, bool unreachable, BB::BBKind kind, uint32 frequency) { + BB *newBB = CreateNewBB(unreachable, kind, frequency); + newBB->AddLabel(label); + SetLab2BBMap(label, *newBB); + return newBB; + } + + void UpdateFrequency(StmtNode &stmt) { + bool withFreqInfo = func.HasFreqMap() && !func.GetFreqMap().empty(); + if (withFreqInfo && (func.GetFreqMap().find(stmt.GetStmtID()) != func.GetFreqMap().end())) { + frequency = func.GetFreqMap().at(stmt.GetStmtID()); + } + } + + BB *StartNewBBImpl(bool stmtIsCurBBLastStmt, StmtNode &stmt) { + BB *newBB = CreateNewBB(); + ASSERT(newBB != nullptr, "newBB should not be nullptr"); + if (stmtIsCurBBLastStmt) { + ASSERT(curBB != nullptr, "curBB should not be nullptr"); + curBB->SetLastStmt(stmt); + curBB->AppendBB(*newBB); + newBB->SetFirstStmt(*stmt.GetNext()); + } else { + newBB->SetFirstStmt(stmt); + if (curBB != nullptr) { + if (stmt.GetPrev() != nullptr) { + ASSERT(stmt.GetPrev()->GetNext() == &stmt, " the next of stmt's prev should be stmt self"); + } + curBB->SetLastStmt(*stmt.GetPrev()); + curBB->AppendBB(*newBB); + } + } + /* used for handle function, frequency is the laststmt->frequency. */ + if (curBB != nullptr) { + curBB->SetFrequency(frequency); + } else { + newBB->SetFrequency(frequency); + } + ASSERT(newBB->GetLastStmt() == nullptr, "newBB's lastStmt must be nullptr"); + return newBB; + } + + BB *StartNewBB(StmtNode &stmt) { + BB *bb = curBB; + if (stmt.GetNext() != nullptr && stmt.GetNext()->GetOpCode() != OP_label) { + bb = StartNewBBImpl(true, stmt); + } + return bb; + } + + void SetCurBBKind(BB::BBKind bbKind) { + curBB->SetKind(bbKind); + } + + void SetVolStore(bool val) { + isVolStore = val; + } + + void SetVolReleaseInsn(Insn *insn) { + volReleaseInsn = insn; + } + + bool IsAfterRegAlloc() const { + return isAfterRegAlloc; + } + + void SetIsAfterRegAlloc() { + isAfterRegAlloc = true; + } + + const MapleString &GetShortFuncName() const { + return shortFuncName; + } + + size_t GetLSymSize() const { + return lSymSize; + } + + bool HasTakenLabel() const{ + return hasTakenLabel; + } + + void SetHasTakenLabel() { + hasTakenLabel = true; + } + + virtual InsnVisitor *NewInsnModifier() = 0; + + bool GenCfi() const { + return (mirModule.GetSrcLang() != kSrcLangC); + } + + MapleVector &GetDbgCallFrameLocations() { + return dbgCallFrameLocations; + } + + bool HasAsm() { + return hasAsm; + } + + protected: + uint32 firstMapleIrVRegNO = 200; /* positioned after physical regs */ + uint32 firstNonPregVRegNO; + uint32 vRegCount; /* for assigning a number for each CG virtual register */ + uint32 maxRegCount; /* for the current virtual register number limit */ + size_t lSymSize; /* size of local symbol table imported */ + MapleVector vRegTable; /* table of CG's virtual registers indexed by v_reg no */ + MapleVector bbVec; + MapleUnorderedMap vRegOperandTable; + MapleUnorderedMap pRegSpillMemOperands; + MapleUnorderedMap spillRegMemOperands; + MapleUnorderedMap reuseSpillLocMem; + LabelIdx firstCGGenLabelIdx; + MapleMap labelMap; +#if DEBUG + MapleMap *pregsToVarsMap = nullptr; +#endif + int32 totalInsns = 0; + int32 structCopySize; + int32 maxParamStackSize; + bool hasVLAOrAlloca; + bool hasProEpilogue = false; + bool isVolLoad = false; + bool isVolStore = false; + bool isAfterRegAlloc = false; + bool isAggParamInReg = false; + bool hasTakenLabel = false; + uint32 frequency = 0; + DebugInfo *debugInfo = nullptr; /* debugging info */ + MapleVector dbgCallFrameLocations; + RegOperand *aggParamReg = nullptr; + ReachingDefinition *reachingDef = nullptr; + + int32 dbgCallFrameOffset = 0; + CG *cg; + MIRModule &mirModule; + MemPool *memPool; + StackMemPool &stackMp; + + PregIdx GetPseudoRegIdxFromVirtualRegNO(const regno_t vRegNO) const { + ASSERT(IsVRegNOForPseudoRegister(vRegNO), ""); + return PregIdx(vRegNO - firstMapleIrVRegNO); + } + + bool IsVRegNOForPseudoRegister(regno_t vRegNum) const { + /* 0 is not allowed for preg index */ + uint32 n = static_cast(vRegNum); + return (firstMapleIrVRegNO < n && n < firstNonPregVRegNO); + } + + VirtualRegNode &GetVirtualRegNodeFromPseudoRegIdx(PregIdx idx) { + return vRegTable.at(GetVirtualRegNOFromPseudoRegIdx(idx)); + } + + PrimType GetTypeFromPseudoRegIdx(PregIdx idx) { + VirtualRegNode &vRegNode = GetVirtualRegNodeFromPseudoRegIdx(idx); + RegType regType = vRegNode.GetType(); + ASSERT(regType == kRegTyInt || regType == kRegTyFloat, ""); + uint32 size = vRegNode.GetSize(); /* in bytes */ + ASSERT(size == sizeof(int32) || size == sizeof(int64), ""); + return (regType == kRegTyInt ? (size == sizeof(int32) ? PTY_i32 : PTY_i64) + : (size == sizeof(float) ? PTY_f32 : PTY_f64)); + } + + int64 GetPseudoRegisterSpillLocation(PregIdx idx) { + const SymbolAlloc *symLoc = memLayout->GetSpillLocOfPseduoRegister(idx); + return static_cast(GetBaseOffset(*symLoc)); + } + + virtual MemOperand *GetPseudoRegisterSpillMemoryOperand(PregIdx idx) = 0; + + uint32 GetSpillLocation(uint32 size) { + uint32 offset = RoundUp(nextSpillLocation, static_cast(size)); + nextSpillLocation = offset + size; + return offset; + } + + /* See if the symbol is a structure parameter that requires a copy. */ + bool IsParamStructCopy(const MIRSymbol &symbol) { + if (symbol.GetStorageClass() == kScFormal && + GetBecommon().GetTypeSize(symbol.GetTyIdx().GetIdx()) > k16ByteSize) { + return true; + } + return false; + } + + void SetHasAsm() { + hasAsm = true; + } + + private: + CGFunc &operator=(const CGFunc &cgFunc); + CGFunc(const CGFunc&); + StmtNode *HandleFirstStmt(); + bool CheckSkipMembarOp(StmtNode &stmt); + MIRFunction &func; + EHFunc *ehFunc = nullptr; + uint32 bbCnt = 0; + uint32 labelIdx = 0; /* local label index number */ + LabelNode *startLabel = nullptr; /* start label of the function */ + LabelNode *endLabel = nullptr; /* end label of the function */ + LabelNode *cleanupLabel = nullptr; /* label to indicate the entry of cleanup code. */ + BB *firstBB = nullptr; + BB *cleanupBB = nullptr; + BB *cleanupEntryBB = nullptr; + BB *lastBB = nullptr; + BB *curBB = nullptr; + BB *dummyBB; /* use this bb for add some instructions to bb that is no curBB. */ + Insn *volReleaseInsn = nullptr; /* use to record the release insn for volatile strore */ + MapleVector exitBBVec; + MapleUnorderedMap lab2BBMap; + BECommon &beCommon; + MemLayout *memLayout = nullptr; + MapleAllocator *funcScopeAllocator; + MapleMap emitStVec; /* symbol that needs to be emit as a local symbol. i.e, switch table */ +#if TARGARM32 + MapleVector sortedBBs; + MapleVector lrVec; +#endif /* TARGARM32 */ + MapleVector loops; + CGCFG *theCFG = nullptr; + uint32 nextSpillLocation = 0; + static constexpr int kRegIncrStepLen = 80; /* reg number increate step length */ + const MapleString shortFuncName; + bool hasAsm = false; +}; /* class CGFunc */ + +CGFUNCPHASE(CgDoLayoutSF, "layoutstackframe") +CGFUNCPHASE(CgDoHandleFunc, "handlefunction") +CGFUNCPHASE(CgDoFixCFLocOsft, "dbgfixcallframeoffsets") +CGFUNCPHASE(CgDoGenCfi, "gencfi") +CGFUNCPHASE(CgDoEmission, "emit") + +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgLayoutFrame, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgHandleFunction, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgFixCFLocOsft, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgGenCfi, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgEmission, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +} /* namespace maplebe */ +#endif /* MAPLEBE_INCLUDE_CG_CGFUNC_H */ diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp index 1363204d9805ccc6ca61daecb0ee3e476d28bc5f..fb3b776c5604cfc547075d954ed261f52c1e0c63 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_cgfunc.cpp @@ -160,6 +160,29 @@ bool IsBlkassignForPush(const BlkassignoffNode &bNode) { return spBased; } +MIRStructType *AArch64CGFunc::GetLmbcStructArgType(BaseNode &stmt, int32 argNo) { + MIRType *ty = nullptr; + if (stmt.GetOpCode() == OP_call) { + CallNode &callNode = static_cast(stmt); + MIRFunction *callFunc = GlobalTables::GetFunctionTable().GetFunctionFromPuidx(callNode.GetPUIdx()); + if (callFunc->GetFormalCount() < (argNo + 1)) { + return nullptr; /* formals less than actuals */ + } + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(callFunc->GetFormalDefVec()[argNo].formalTyIdx); + } else if (stmt.GetOpCode() == OP_icallproto) { + argNo--; /* 1st opnd of icallproto is funcname, skip it relative to param list */ + IcallNode &icallproto = static_cast(stmt); + MIRType *type = GlobalTables::GetTypeTable().GetTypeFromTyIdx(icallproto.GetRetTyIdx()); + MIRFuncType *fType = static_cast(type); + if (fType->GetParamTypeList().size() < (argNo + 1)) { + return nullptr; + } + ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(fType->GetNthParamType(argNo)); + } + CHECK_FATAL(ty && ty->IsStructType(), "lmbc agg arg error"); + return static_cast(ty); +} + RegOperand &AArch64CGFunc::GetOrCreateResOperand(const BaseNode &parent, PrimType primType) { RegOperand *resOpnd = nullptr; if (parent.GetOpCode() == OP_regassign) { @@ -2140,7 +2163,7 @@ MIRType *AArch64CGFunc::LmbcGetAggTyFromCallSite(StmtNode *stmt, std::vector(static_cast(src)->GetRegisterNumber()); @@ -2150,9 +2173,9 @@ bool AArch64CGFunc::LmbcSmallAggForRet(const BlkassignoffNode &bNode, const Oper /* This blkassignoff is for struct return? */ uint32 loadSize; uint32 numRegs = 0; - if (bNode.GetNext()->GetOpCode() == OP_return) { - MIRStructType *ty = static_cast( - GlobalTables::GetTypeTable().GetTypeFromTyIdx(func->GetFuncRetStructTyIdx())); + if (static_cast(bNode).GetNext()->GetOpCode() == OP_return) { + MIRStructType *ty = static_cast(func->GetReturnType()); + uint32 tySize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); uint32 fpregs = FloatParamRegRequired(ty, size); if (fpregs > 0) { /* pure floating point in agg */ @@ -2171,7 +2194,7 @@ bool AArch64CGFunc::LmbcSmallAggForRet(const BlkassignoffNode &bNode, const Oper numRegs = 2; pTy = PTY_i64; size = k4ByteSize; - switch (bNode.blockSize) { + switch (tySize) { case 1: pTy = PTY_i8; break; @@ -2189,7 +2212,7 @@ bool AArch64CGFunc::LmbcSmallAggForRet(const BlkassignoffNode &bNode, const Oper MemOperand &mem = CreateMemOpnd(regno, 0, size * kBitsPerByte); RegOperand *res = &GetOrCreatePhysicalRegisterOperand(R0, loadSize, kRegTyInt); SelectCopy(*res, pTy, mem, pTy); - if (bNode.blockSize > static_cast(k8ByteSize)) { + if (tySize > static_cast(k8ByteSize)) { MemOperand &newMem = CreateMemOpnd(regno, k8ByteSize, size * kBitsPerByte); res = &GetOrCreatePhysicalRegisterOperand(R1, loadSize, kRegTyInt); SelectCopy(*res, pTy, newMem, pTy); @@ -3165,9 +3188,20 @@ Operand *AArch64CGFunc::SelectIreadoff(const BaseNode &parent, IreadoffNode &ire auto *baseAddr = ireadoff.Opnd(0); auto *result = &CreateRegisterOperandOfType(primType); auto *addrOpnd = HandleExpr(ireadoff, *baseAddr); - auto &memOpnd = CreateMemOpnd(LoadIntoRegister(*addrOpnd, PTY_a64), offset, bitSize); - auto mop = PickLdInsn(bitSize, primType); - GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *result, memOpnd)); + if (primType == PTY_agg && parent.GetOpCode() == OP_regassign) { + auto &memOpnd = CreateMemOpnd(LoadIntoRegister(*addrOpnd, PTY_a64), offset, bitSize); + auto mop = PickLdInsn(64, PTY_a64); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *result, memOpnd)); + auto ®AssignNode = static_cast(parent); + PregIdx pIdx = regAssignNode.GetRegIdx(); + CHECK_FATAL(IsSpecialPseudoRegister(pIdx), "SelectIreadfpoff of agg"); + LmbcSmallAggForRet(const_cast(parent), addrOpnd); + // result not used + } else { + auto &memOpnd = CreateMemOpnd(LoadIntoRegister(*addrOpnd, PTY_a64), offset, bitSize); + auto mop = PickLdInsn(bitSize, primType); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(mop, *result, memOpnd)); + } return result; } @@ -3219,36 +3253,13 @@ Operand *AArch64CGFunc::SelectIreadfpoff(const BaseNode &parent, IreadFPoffNode int32 offset = ireadoff.GetOffset(); PrimType primType = ireadoff.GetPrimType(); uint32 bytelen = GetPrimTypeSize(primType); - uint32 bitlen = bytelen * kBitsPerByte; RegType regty = GetRegTyFromPrimTy(primType); RegOperand *result = nullptr; - if (offset >= 0) { - LmbcFormalParamInfo *info = GetLmbcFormalParamInfo(static_cast(offset)); - ASSERT(info != nullptr, "info should not be nullptr"); - if (info->GetPrimType() == PTY_agg) { - if (info->IsOnStack()) { - result = GenLmbcParamLoad(static_cast(info->GetOnStackOffset()), - GetPrimTypeSize(PTY_a64), kRegTyInt, PTY_a64); - regno_t baseRegno = result->GetRegisterNumber(); - result = GenLmbcParamLoad(offset - static_cast(info->GetOffset()), - bytelen, regty, primType, static_cast(baseRegno)); - } else if (primType == PTY_agg) { - CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); - result = LmbcStructReturnLoad(offset); - } else { - result = GenLmbcParamLoad(offset, bytelen, regty, primType); - } - } else { - CHECK_FATAL(primType == info->GetPrimType(), "Incorrect primtype"); - CHECK_FATAL(offset == static_cast(info->GetOffset()), "Incorrect offset"); - if (info->GetRegNO() == 0 || !info->HasRegassign()) { - result = GenLmbcParamLoad(offset, bytelen, regty, primType); - } else { - result = &GetOrCreatePhysicalRegisterOperand(static_cast(info->GetRegNO()), bitlen, regty); - } - } + if (offset > 0) { + CHECK_FATAL(0, "Invalid ireadfpoff offset"); } else { if (primType == PTY_agg) { + /* agg return */ CHECK_FATAL(parent.GetOpCode() == OP_regassign, "SelectIreadfpoff of agg"); result = LmbcStructReturnLoad(offset); } else { @@ -7480,12 +7491,26 @@ void AArch64CGFunc::SelectParmListDreadSmallAggregate(const MIRSymbol &sym, MIRT } } -void AArch64CGFunc::SelectParmListIreadSmallAggregate(const IreadNode &iread, MIRType &structType, +void AArch64CGFunc::SelectParmListIreadSmallAggregate(BaseNode &iread, MIRType &structType, ListOperand &srcOpnds, int32 offset, AArch64CallConvImpl &parmLocator) { - int32 symSize = static_cast(static_cast(GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()))); - RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); - RegOperand *addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); + int32 symSize = GetBecommon().GetTypeSize(structType.GetTypeIndex().GetIdx()); + RegOperand *addrOpnd1; + if (iread.GetOpCode() == OP_iread) { + RegOperand *addrOpnd0 = static_cast(HandleExpr(iread, *(iread.Opnd(0)))); + addrOpnd1 = &LoadIntoRegister(*addrOpnd0, iread.Opnd(0)->GetPrimType()); + } else if (iread.GetOpCode() == OP_ireadfpoff) { + IreadFPoffNode &ireadoff = static_cast(iread); + RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + RegOperand *addrOpnd0 = &CreateRegisterOperandOfType(PTY_a64); + ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd0, *rfp, immOpnd)); + addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); + } else if (iread.GetOpCode() == OP_ireadoff) { + IreadoffNode &ireadoff = static_cast(iread); + RegOperand *addrOpnd0 = static_cast(HandleExpr(ireadoff, *(ireadoff.Opnd(0)))); + addrOpnd1 = &LoadIntoRegister(*addrOpnd0, PTY_i64); + } CCLocInfo ploc; parmLocator.LocateNextParm(structType, ploc); if (ploc.reg0 == 0) { @@ -7861,8 +7886,8 @@ void AArch64CGFunc::CreateCallStructMemcpyToParamReg(MIRType &structType, int32 } } -void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &srcOpnds, - AArch64CallConvImpl &parmLocator, int32 &structCopyOffset) { +void AArch64CGFunc::SelectParmListForAggregate(BaseNode &parent, BaseNode &argExpr, ListOperand &srcOpnds, + AArch64CallConvImpl &parmLocator, int32 &structCopyOffset, int32 argNo) { uint64 symSize; int32 rhsOffset = 0; if (argExpr.GetOpCode() == OP_dread) { @@ -7910,6 +7935,32 @@ void AArch64CGFunc::SelectParmListForAggregate(BaseNode &argExpr, ListOperand &s } else { SelectParmListIreadLargeAggregate(iread, *ty, srcOpnds, parmLocator, structCopyOffset, rhsOffset); } + } else if (argExpr.GetOpCode() == OP_ireadfpoff) { + IreadFPoffNode &iread = static_cast(argExpr); + MIRStructType *ty = GetLmbcStructArgType(parent, argNo); + if (ty == nullptr) { /* param < arg */ + return; + } + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); + if (symSize <= k16ByteSize) { + SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, rhsOffset, parmLocator); + } else { + CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); + structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); + } + } else if (argExpr.GetOpCode() == OP_ireadoff) { + IreadoffNode &iread = static_cast(argExpr); + MIRStructType *ty = GetLmbcStructArgType(parent, argNo); + if (ty == nullptr) { + return; + } + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex().GetIdx()); + if (symSize <= k16ByteSize) { + SelectParmListIreadSmallAggregate(iread, *ty, srcOpnds, rhsOffset, parmLocator); + } else { + CreateCallStructMemcpyToParamReg(*ty, structCopyOffset, parmLocator, srcOpnds); + structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); + } } else { CHECK_FATAL(false, "NYI"); } @@ -7953,7 +8004,7 @@ size_t AArch64CGFunc::SelectParmListGetStructReturnSize(StmtNode &naryNode) { return 0; } -void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 &structCopyOffset) { +void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &parent, BaseNode &argExpr, int32 &structCopyOffset, int32 argNo) { uint64 symSize; int32 rhsOffset = 0; if (argExpr.GetOpCode() == OP_dread) { @@ -8000,6 +8051,29 @@ void AArch64CGFunc::SelectParmListPreprocessLargeStruct(BaseNode &argExpr, int32 uint32 numMemOp = static_cast(RoundUp(symSize, kSizeOfPtr) / kSizeOfPtr); structCopyOffset += static_cast(numMemOp * kSizeOfPtr); } + } else if (argExpr.GetOpCode() == OP_ireadfpoff) { + IreadFPoffNode &ireadoff = static_cast(argExpr); + MIRStructType *ty = GetLmbcStructArgType(parent, argNo); + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); + if (symSize > 16 /*kParmMemcpySize*/) { + RegOperand *rfp = &GetOrCreatePhysicalRegisterOperand(RFP, k64BitSize, kRegTyInt); + RegOperand *addrOpnd = &CreateRegisterOperandOfType(PTY_a64); + ImmOperand &immOpnd = CreateImmOperand(ireadoff.GetOffset(), k32BitSize, true); + GetCurBB()->AppendInsn(GetCG()->BuildInstruction(MOP_xaddrri12, *addrOpnd, *rfp, immOpnd)); + CreateCallStructParamMemcpy(nullptr, addrOpnd, static_cast(symSize), structCopyOffset, 0); + structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); + } + } else if (argExpr.GetOpCode() == OP_ireadoff) { + IreadoffNode &ireadoff = static_cast(argExpr); + MIRStructType *ty = GetLmbcStructArgType(parent, argNo); + symSize = GetBecommon().GetTypeSize(ty->GetTypeIndex()); + if (symSize > 16 /*kParmMemcpySize*/) { + RegOperand *addrOpnd = static_cast( + HandleExpr(ireadoff, *(ireadoff.Opnd(0)))); + CreateCallStructParamMemcpy(nullptr, addrOpnd, + static_cast(symSize), structCopyOffset, 0); + structCopyOffset += static_cast(RoundUp(symSize, kSizeOfPtr)); + } } } @@ -8031,7 +8105,7 @@ bool AArch64CGFunc::MarkParmListCall(BaseNode &expr) { return false; } -void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t start, std::set &specialArgs) { +void AArch64CGFunc::SelectParmListPreprocess(StmtNode &naryNode, size_t start, std::set &specialArgs) { size_t i = start; int32 structCopyOffset = GetMaxParamStackSize() - GetStructCopySize(); for (; i < naryNode.NumOpnds(); ++i) { @@ -8044,7 +8118,7 @@ void AArch64CGFunc::SelectParmListPreprocess(const StmtNode &naryNode, size_t st if (primType != PTY_agg) { continue; } - SelectParmListPreprocessLargeStruct(*argExpr, structCopyOffset); + SelectParmListPreprocessLargeStruct(naryNode, *argExpr, structCopyOffset, i); } } @@ -8145,7 +8219,7 @@ void AArch64CGFunc::SelectParmList(StmtNode &naryNode, ListOperand &srcOpnds, bo } /* use alloca */ if (primType == PTY_agg) { - SelectParmListForAggregate(*argExpr, srcOpnds, parmLocator, structCopyOffset); + SelectParmListForAggregate(naryNode, *argExpr, srcOpnds, parmLocator, structCopyOffset, i); continue; } ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(primType)]; @@ -9509,7 +9583,11 @@ Operand *AArch64CGFunc::GetBaseReg(const AArch64SymbolAlloc &symAlloc) { } if (fsp == nullptr) { - fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + if (GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + fsp = &GetOrCreatePhysicalRegisterOperand(RSP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + } else { + fsp = &GetOrCreatePhysicalRegisterOperand(RFP, kSizeOfPtr * kBitsPerByte, kRegTyInt); + } } return fsp; } @@ -9527,21 +9605,34 @@ int32 AArch64CGFunc::GetBaseOffset(const SymbolAlloc &symbolAlloc) { int32 offset = static_cast(symAlloc->GetOffset()); return offset; } else if (sgKind == kMsArgsRegPassed) { - int32 baseOffset = symAlloc->GetOffset() + - static_cast(memLayout->GetSizeOfLocals() + memLayout->GetSizeOfRefLocals()); + int32 baseOffset; + if (GetCG()->IsLmbc()) { + baseOffset = symAlloc->GetOffset() + memLayout->GetSizeOfRefLocals() + + memLayout->SizeOfArgsToStackPass(); /* SP relative */ + } else { + baseOffset = memLayout->GetSizeOfLocals() + symAlloc->GetOffset() + + memLayout->GetSizeOfRefLocals(); + } return baseOffset + sizeofFplr; } else if (sgKind == kMsRefLocals) { int32 baseOffset = symAlloc->GetOffset() + static_cast(memLayout->GetSizeOfLocals()); return baseOffset + sizeofFplr; } else if (sgKind == kMsLocals) { + if (GetCG()->IsLmbc()) { + CHECK_FATAL(false, "invalid lmbc's locals"); + } int32 baseOffset = symAlloc->GetOffset(); return baseOffset + sizeofFplr; } else if (sgKind == kMsSpillReg) { + int32 baseOffset; if (GetCG()->IsLmbc()) { - return symAlloc->GetOffset() + static_cast(memLayout->SizeOfArgsToStackPass()); + baseOffset = symAlloc->GetOffset() + memLayout->SizeOfArgsRegisterPassed() + + memLayout->GetSizeOfRefLocals() + + memLayout->SizeOfArgsToStackPass(); + } else { + baseOffset = symAlloc->GetOffset() + memLayout->SizeOfArgsRegisterPassed() + + memLayout->GetSizeOfLocals() + memLayout->GetSizeOfRefLocals(); } - int32 baseOffset = symAlloc->GetOffset() + memLayout->SizeOfArgsRegisterPassed() + memLayout->GetSizeOfLocals() + - memLayout->GetSizeOfRefLocals(); return baseOffset + sizeofFplr; } else if (sgKind == kMsArgsToStkPass) { /* this is for callers */ return static_cast(symAlloc->GetOffset()); @@ -10215,9 +10306,6 @@ void AArch64CGFunc::SelectCVaStart(const IntrinsiccallNode &intrnNode) { inReg++; } } - if (GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { - stkSize += (inReg * k8ByteSize); - } if (CGOptions::IsArm64ilp32()) { stkSize = static_cast(RoundUp(stkSize, k8ByteSize)); } else { diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp index bf2e20cbe480f14d58ecfbd05b42ab408144aee6..640eb7780ddbf172976970ca5fca792776174b51 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_memlayout.cpp @@ -58,7 +58,9 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in ty = GlobalTables::GetTypeTable().GetTypeTable()[static_cast(opnd->GetPrimType())]; } else { Opcode opndOpcode = opnd->GetOpCode(); - ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); + if (be.GetMIRModule().GetFlavor() != kFlavorLmbc) { + ASSERT(opndOpcode == OP_dread || opndOpcode == OP_iread, "opndOpcode should be OP_dread or OP_iread"); + } if (opndOpcode == OP_dread) { DreadNode *dread = static_cast(opnd); MIRSymbol *sym = be.GetMIRModule().CurFunction()->GetLocalOrGlobalSymbol(dread->GetStIdx()); @@ -72,8 +74,7 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in ty = static_cast(ty)->GetFieldType(dread->GetFieldID()); } } - } else { - /* OP_iread */ + } else if (opndOpcode == OP_iread) { IreadNode *iread = static_cast(opnd); ty = GlobalTables::GetTypeTable().GetTypeFromTyIdx(iread->GetTyIdx()); ASSERT(ty->GetKind() == kTypePointer, "expect pointer"); @@ -87,6 +88,11 @@ uint32 AArch64MemLayout::ComputeStackSpaceRequirementForCall(StmtNode &stmt, in ty = static_cast(ty)->GetFieldType(iread->GetFieldID()); } } + } else if ((opndOpcode == OP_ireadfpoff || opndOpcode == OP_ireadoff || opndOpcode == OP_dreadoff) && opnd->GetPrimType() == PTY_agg) { + ty = static_cast(cgFunc)->GetLmbcStructArgType(stmt, i); + } + if (ty == nullptr) { /* type mismatch */ + continue; } } CCLocInfo ploc; @@ -190,18 +196,6 @@ void AArch64MemLayout::LayoutVarargParams() { } void AArch64MemLayout::LayoutFormalParams() { - bool isLmbc = (be.GetMIRModule().GetFlavor() == kFlavorLmbc); - if (isLmbc && mirFunction->GetFormalCount() == 0) { - /* - * lmbc : upformalsize - size of formals passed from caller's frame into current function - * framesize - total frame size of current function used by Maple IR - * outparmsize - portion of frame size of current function used by call parameters - */ - segArgsStkPassed.SetSize(mirFunction->GetOutParmSize()); - segArgsRegPassed.SetSize(mirFunction->GetOutParmSize()); - return; - } - AArch64CallConvImpl parmLocator(be); CCLocInfo ploc; for (size_t i = 0; i < mirFunction->GetFormalCount(); ++i) { @@ -255,8 +249,6 @@ void AArch64MemLayout::LayoutFormalParams() { segArgsRegPassed.SetSize(static_cast(RoundUp(segArgsRegPassed.GetSize(), align))); symLoc->SetOffset(segArgsRegPassed.GetSize()); segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + size); - } else if (isLmbc) { - segArgsRegPassed.SetSize(segArgsRegPassed.GetSize() + k8ByteSize); } } else { /* stack */ uint32 size; @@ -371,11 +363,7 @@ void AArch64MemLayout::LayoutReturnRef(std::vector &returnDelays, symLoc->SetOffset(segRefLocals.GetSize()); segRefLocals.SetSize(segRefLocals.GetSize() + be.GetTypeSize(tyIdx)); } - if (be.GetMIRModule().GetFlavor() == kFlavorLmbc) { - segArgsToStkPass.SetSize(mirFunction->GetOutParmSize() + kDivide2 * k8ByteSize); - } else { - segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); - } + segArgsToStkPass.SetSize(FindLargestActualArea(structCopySize)); maxParmStackSize = static_cast(segArgsToStkPass.GetSize()); if (Globals::GetInstance()->GetOptimLevel() == 0) { AssignSpillLocationsToPseudoRegisters(); diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp index 49d372627319941ff61d056e420d03c168d36d74..cec85816ddb412609c2caaad8a02a8eb626fe8c9 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp @@ -2515,6 +2515,7 @@ void EnhanceStrLdrAArch64::Run(BB &bb, Insn &insn) { auto &ofstOpnd = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); OfstOperand &offOpnd = static_cast(cgFunc).GetOrCreateOfstOpnd( static_cast(ofstOpnd.GetValue()), k32BitSize); + offOpnd.SetVary(ofstOpnd.GetVary()); auto *origOffOpnd = concreteMemOpnd.GetOffsetImmediate(); concreteMemOpnd.SetOffsetOperand(offOpnd); if (!static_cast(cgFunc).IsOperandImmValid(insn.GetMachineOpcode(), &memOpnd, kInsnSecondOpnd)) { diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp.ldrh b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp.ldrh new file mode 100644 index 0000000000000000000000000000000000000000..48ca51f86e4dfb6e112d3437dbdf8f12823d4509 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_peep.cpp.ldrh @@ -0,0 +1,3356 @@ +/* + * Copyright (c) [2020] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "aarch64_peep.h" +#include "cg.h" +#include "mpl_logging.h" +#include "common_utils.h" + +namespace maplebe { +#define JAVALANG (cgFunc.GetMirModule().IsJavaModule()) +namespace { +const std::string kMccLoadRef = "MCC_LoadRefField"; +const std::string kMccLoadRefV = "MCC_LoadVolatileField"; +const std::string kMccLoadRefS = "MCC_LoadRefStatic"; +const std::string kMccLoadRefVS = "MCC_LoadVolatileStaticField"; +const std::string kMccDummy = "MCC_Dummy"; + +const uint32 kSizeOfSextMopTable = 5; +const uint32 kSizeOfUextMopTable = 3; + +MOperator sextMopTable[kSizeOfSextMopTable] = { + MOP_xsxtb32, MOP_xsxtb64, MOP_xsxth32, MOP_xsxth64, MOP_xsxtw64 +}; + +MOperator uextMopTable[kSizeOfUextMopTable] = { + MOP_xuxtb32, MOP_xuxth32, MOP_xuxtw64 +}; + +const std::string GetReadBarrierName(const Insn &insn) { + constexpr int32 totalBarrierNamesNum = 5; + std::array barrierNames = { + kMccLoadRef, kMccLoadRefV, kMccLoadRefS, kMccLoadRefVS, kMccDummy + }; + if (insn.GetMachineOpcode() == MOP_xbl || + insn.GetMachineOpcode() == MOP_tail_call_opt_xbl) { + auto &op = static_cast(insn.GetOperand(kInsnFirstOpnd)); + const std::string &funcName = op.GetName(); + for (const std::string &singleBarrierName : barrierNames) { + if (funcName == singleBarrierName) { + return singleBarrierName; + } + } + } + return ""; +} + +MOperator GetLoadOperator(uint32 refSize, bool isVolatile) { + if (refSize == k32BitSize) { + return isVolatile ? MOP_wldar : MOP_wldr; + } + return isVolatile ? MOP_xldar : MOP_xldr; +} +} + +void AArch64PeepHole::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveIdenticalLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveMovingtoSameRegOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCombineContiLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEliminateSpecifcSXTOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEliminateSpecifcUXTOpt] = optOwnMemPool->New(cgFunc); + optimizations[kFmovRegOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCbnzToCbzOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCsetCbzToBeqOpt] = optOwnMemPool->New(cgFunc); + optimizations[kContiLDRorSTRToSameMEMOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveIncDecRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kInlineReadBarriersOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceDivToMultiOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToCsetOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToTstOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCbzBranchesToTstOpt] = optOwnMemPool->New(cgFunc); + optimizations[kZeroCmpBranchesOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PeepHole::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_wmovrr: + case MOP_xmovrr: + case MOP_xvmovs: + case MOP_xvmovd: + case MOP_vmovuu: + case MOP_vmovvv: { + (static_cast(optimizations[kRemoveMovingtoSameRegOpt]))->Run(bb, insn); + break; + } + case MOP_wstrb: + case MOP_wldrb: + case MOP_wstrh: + case MOP_wldrh: + case MOP_xldr: + case MOP_xstr: + case MOP_wldr: + case MOP_wstr: + case MOP_dldr: + case MOP_dstr: + case MOP_sldr: + case MOP_sstr: + case MOP_qldr: + case MOP_qstr: { + (static_cast(optimizations[kCombineContiLoadAndStoreOpt]))->Run(bb, insn); + (static_cast(optimizations[kContiLDRorSTRToSameMEMOpt]))->Run(bb, insn); + (static_cast(optimizations[kRemoveIdenticalLoadAndStoreOpt]))->Run(bb, insn); + break; + } + case MOP_xsxtb32: + case MOP_xsxth32: + case MOP_xsxtb64: + case MOP_xsxth64: + case MOP_xsxtw64: { + (static_cast(optimizations[kEliminateSpecifcSXTOpt]))->Run(bb, insn); + break; + } + case MOP_xuxtb32: + case MOP_xuxth32: + case MOP_xuxtw64: { + (static_cast(optimizations[kEliminateSpecifcUXTOpt]))->Run(bb, insn); + break; + } + case MOP_xvmovrv: + case MOP_xvmovrd: { + (static_cast(optimizations[kFmovRegOpt]))->Run(bb, insn); + break; + } + case MOP_wcbnz: + case MOP_xcbnz: { + (static_cast(optimizations[kCbnzToCbzOpt]))->Run(bb, insn); + (static_cast(optimizations[kCsetCbzToBeqOpt]))->Run(bb, insn); + break; + } + case MOP_wcbz: + case MOP_xcbz: { + (static_cast(optimizations[kCsetCbzToBeqOpt]))->Run(bb, insn); + break; + } + case MOP_xbl: { + (static_cast(optimizations[kRemoveIncDecRefOpt]))->Run(bb, insn); + break; + } + case MOP_wsdivrrr: { + (static_cast(optimizations[kReplaceDivToMultiOpt]))->Run(bb, insn); + break; + } + case MOP_wcsetrc: + case MOP_xcsetrc: { + (static_cast(optimizations[kAndCmpBranchesToCsetOpt]))->Run(bb, insn); + break; + } + case MOP_xandrrr: + case MOP_wandrrr: + case MOP_wandrri12: + case MOP_xandrri13: { + (static_cast(optimizations[kAndCmpBranchesToTstOpt]))->Run(bb, insn); + (static_cast(optimizations[kAndCbzBranchesToTstOpt]))->Run(bb, insn); + break; + } + default: + break; + } + if (GetReadBarrierName(insn) != "") { /* skip if it is not a read barrier call. */ + (static_cast(optimizations[kInlineReadBarriersOpt]))->Run(bb, insn); + } + if (&insn == bb.GetLastInsn()) { + (static_cast(optimizations[kZeroCmpBranchesOpt]))->Run(bb, insn); + } +} + +void AArch64PeepHole0::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveIdenticalLoadAndStoreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kCmpCsetOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptAdd] = optOwnMemPool->New(cgFunc); + optimizations[kDeleteMovAfterCbzOrCbnzOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveSxtBeforeStrOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveMovingtoSameRegOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PeepHole0::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xstr: + case MOP_wstr: { + (static_cast(optimizations[kRemoveIdenticalLoadAndStoreOpt]))->Run(bb, insn); + break; + } + case MOP_wcmpri: + case MOP_xcmpri: { + (static_cast(optimizations[kCmpCsetOpt]))->Run(bb, insn); + break; + } + case MOP_xaddrrr: { + (static_cast(optimizations[kComplexMemOperandOptAdd]))->Run(bb, insn); + break; + } + case MOP_wcbz: + case MOP_xcbz: + case MOP_wcbnz: + case MOP_xcbnz: { + (static_cast(optimizations[kDeleteMovAfterCbzOrCbnzOpt]))->Run(bb, insn); + break; + } + case MOP_wstrh: + case MOP_wstrb: { + (static_cast(optimizations[kRemoveSxtBeforeStrOpt]))->Run(bb, insn); + break; + } + case MOP_wmovrr: + case MOP_xmovrr: + case MOP_xvmovs: + case MOP_xvmovd: + case MOP_vmovuu: + case MOP_vmovvv: { + (static_cast(optimizations[kRemoveMovingtoSameRegOpt]))->Run(bb, insn); + break; + } + default: + break; + } +} + +void AArch64PrePeepHole::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kOneHoleBranchesPreOpt] = optOwnMemPool->New(cgFunc); + optimizations[kLoadFloatPointOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceOrrToMovOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceCmpToCmnOpt] = optOwnMemPool->New(cgFunc); + optimizations[kRemoveIncRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kLongIntCompareWithZOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandPreOptAdd] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptLSL] = optOwnMemPool->New(cgFunc); + optimizations[kComplexMemOperandOptLabel] = optOwnMemPool->New(cgFunc); + optimizations[kWriteFieldCallOpt] = optOwnMemPool->New(cgFunc); + optimizations[kDuplicateExtensionOpt] = optOwnMemPool->New(cgFunc); + optimizations[kEnhanceStrLdrAArch64Opt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PrePeepHole::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xmovzri16: { + (static_cast(optimizations[kLoadFloatPointOpt]))->Run(bb, insn); + break; + } + case MOP_wiorri12r: + case MOP_wiorrri12: + case MOP_xiorri13r: + case MOP_xiorrri13: { + (static_cast(optimizations[kReplaceOrrToMovOpt]))->Run(bb, insn); + break; + } + case MOP_xmovri32: + case MOP_xmovri64: { + (static_cast(optimizations[kReplaceCmpToCmnOpt]))->Run(bb, insn); + break; + } + case MOP_xbl: { + (static_cast(optimizations[kRemoveIncRefOpt]))->Run(bb, insn); + if (CGOptions::IsGCOnly() && CGOptions::DoWriteRefFieldOpt()) { + (static_cast(optimizations[kWriteFieldCallOpt]))->Run(bb, insn); + } + break; + } + case MOP_xcmpri: { + (static_cast(optimizations[kLongIntCompareWithZOpt]))->Run(bb, insn); + break; + } + case MOP_xadrpl12: { + (static_cast(optimizations[kComplexMemOperandOpt]))->Run(bb, insn); + break; + } + case MOP_xaddrrr: { + (static_cast(optimizations[kComplexMemOperandPreOptAdd]))->Run(bb, insn); + break; + } + case MOP_xaddrrrs: { + (static_cast(optimizations[kComplexMemOperandOptLSL]))->Run(bb, insn); + break; + } + case MOP_xsxtb32: + case MOP_xsxth32: + case MOP_xsxtb64: + case MOP_xsxth64: + case MOP_xsxtw64: + case MOP_xuxtb32: + case MOP_xuxth32: + case MOP_xuxtw64: { + (static_cast(optimizations[kDuplicateExtensionOpt]))->Run(bb, insn); + break; + } + case MOP_xldli: { + (static_cast(optimizations[kComplexMemOperandOptLabel]))->Run(bb, insn); + break; + } + case MOP_xldr: + case MOP_xstr: + case MOP_wldr: + case MOP_wstr: + case MOP_dldr: + case MOP_dstr: + case MOP_sldr: + case MOP_sstr: { + (static_cast(optimizations[kEnhanceStrLdrAArch64Opt]))->Run(bb, insn); + break; + } + default: + break; + } + if (&insn == bb.GetLastInsn()) { + (static_cast(optimizations[kOneHoleBranchesPreOpt]))->Run(bb, insn); + if (CGOptions::IsGCOnly() && CGOptions::DoWriteRefFieldOpt()) { + (static_cast(optimizations[kWriteFieldCallOpt]))->Reset(); + } + } +} + +void AArch64PrePeepHole1::InitOpts() { + optimizations.resize(kPeepholeOptsNum); + optimizations[kRemoveDecRefOpt] = optOwnMemPool->New(cgFunc); + optimizations[kComputationTreeOpt] = optOwnMemPool->New(cgFunc); + optimizations[kOneHoleBranchesOpt] = optOwnMemPool->New(cgFunc); + optimizations[kReplaceIncDecWithIncOpt] = optOwnMemPool->New(cgFunc); + optimizations[kAndCmpBranchesToTbzOpt] = optOwnMemPool->New(cgFunc); +} + +void AArch64PrePeepHole1::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xbl: { + if (JAVALANG) { + (static_cast(optimizations[kRemoveDecRefOpt]))->Run(bb, insn); + (static_cast(optimizations[kReplaceIncDecWithIncOpt]))->Run(bb, insn); + } + break; + } + case MOP_xaddrri12: { + (static_cast(optimizations[kComputationTreeOpt]))->Run(bb, insn); + break; + } + default: + break; + } + if (&insn == bb.GetLastInsn()) { + switch (thisMop) { + case MOP_wcbz: + case MOP_wcbnz: + case MOP_xcbz: + case MOP_xcbnz: { + (static_cast(optimizations[kOneHoleBranchesOpt]))->Run(bb, insn); + break; + } + case MOP_beq: + case MOP_bne: { + (static_cast(optimizations[kAndCmpBranchesToTbzOpt]))->Run(bb, insn); + break; + } + default: + break; + } + } +} + +void RemoveIdenticalLoadAndStoreAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr) { + return; + } + MOperator mop1 = insn.GetMachineOpcode(); + MOperator mop2 = nextInsn->GetMachineOpcode(); + if ((mop1 == MOP_wstr && mop2 == MOP_wstr) || (mop1 == MOP_xstr && mop2 == MOP_xstr)) { + if (IsMemOperandsIdentical(insn, *nextInsn)) { + bb.RemoveInsn(insn); + } + } else if ((mop1 == MOP_wstr && mop2 == MOP_wldr) || (mop1 == MOP_xstr && mop2 == MOP_xldr)) { + if (IsMemOperandsIdentical(insn, *nextInsn)) { + bb.RemoveInsn(*nextInsn); + } + } +} + +bool RemoveIdenticalLoadAndStoreAArch64::IsMemOperandsIdentical(const Insn &insn1, const Insn &insn2) const { + regno_t regNO1 = static_cast(insn1.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + regno_t regNO2 = static_cast(insn2.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + if (regNO1 != regNO2) { + return false; + } + /* Match only [base + offset] */ + auto &memOpnd1 = static_cast(insn1.GetOperand(kInsnSecondOpnd)); + if (static_cast(memOpnd1).GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + auto &memOpnd2 = static_cast(insn2.GetOperand(kInsnSecondOpnd)); + if (static_cast(memOpnd2).GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return false; + } + Operand *base1 = memOpnd1.GetBaseRegister(); + Operand *base2 = memOpnd2.GetBaseRegister(); + if (!((base1 != nullptr) && base1->IsRegister()) || !((base2 != nullptr) && base2->IsRegister())) { + return false; + } + + regno_t baseRegNO1 = static_cast(base1)->GetRegisterNumber(); + /* First insn re-write base addr reg1 <- [ reg1 + offset ] */ + if (baseRegNO1 == regNO1) { + return false; + } + + regno_t baseRegNO2 = static_cast(base2)->GetRegisterNumber(); + if (baseRegNO1 != baseRegNO2) { + return false; + } + + if (static_cast(memOpnd1).GetOffsetImmediate()->GetOffsetValue() != + static_cast(memOpnd2).GetOffsetImmediate()->GetOffsetValue()) { + return false; + } + return true; +} + +void RemoveMovingtoSameRegAArch64::Run(BB &bb, Insn &insn) { + ASSERT(insn.GetOperand(kInsnFirstOpnd).IsRegister(), "expects registers"); + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsRegister(), "expects registers"); + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + /* remove mov x0,x0 when it cast i32 to i64 */ + if ((reg1.GetRegisterNumber() == reg2.GetRegisterNumber()) && (reg1.GetSize() >= reg2.GetSize())) { + bb.RemoveInsn(insn); + } +} + +void EnhanceStrLdrAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPrev(); + if (!cgFunc.GetMirModule().IsCModule()) { + return; + } + + if (prevInsn == nullptr) { + return; + } + Operand &memOpnd = insn.GetOperand(kInsnSecondOpnd); + CHECK_FATAL(memOpnd.GetKind() == Operand::kOpdMem, "Unexpected operand in EnhanceStrLdrAArch64"); + auto &a64MemOpnd = static_cast(memOpnd); + RegOperand *baseOpnd = a64MemOpnd.GetBaseRegister(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (IsEnhanceAddImm(prevMop) && a64MemOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + a64MemOpnd.GetOffsetImmediate()->GetValue() == 0) { + auto &addDestOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (baseOpnd == &addDestOpnd && !IfOperandIsLiveAfterInsn(addDestOpnd, insn)) { + auto &concreteMemOpnd = static_cast(memOpnd); + auto *origBaseReg = concreteMemOpnd.GetBaseRegister(); + concreteMemOpnd.SetBaseRegister( + static_cast(prevInsn->GetOperand(kInsnSecondOpnd))); + auto &ofstOpnd = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + AArch64OfstOperand &offOpnd = static_cast(cgFunc).GetOrCreateOfstOpnd( + ofstOpnd.GetValue(), k32BitSize); + auto *origOffOpnd = concreteMemOpnd.GetOffsetImmediate(); + concreteMemOpnd.SetOffsetImmediate(offOpnd); + if (!static_cast(cgFunc).IsOperandImmValid(insn.GetMachineOpcode(), &memOpnd, kInsnSecondOpnd)) { + // If new offset is invalid, undo it + concreteMemOpnd.SetBaseRegister(*static_cast(origBaseReg)); + concreteMemOpnd.SetOffsetImmediate(*origOffOpnd); + return; + } + bb.RemoveInsn(*prevInsn); + } + } +} + +bool EnhanceStrLdrAArch64::IsEnhanceAddImm(MOperator prevMop) { + return prevMop == MOP_xaddrri12 || prevMop == MOP_waddrri12; +} + +bool IsSameRegisterOperation(const RegOperand &desMovOpnd, + const RegOperand &uxtDestOpnd, + const RegOperand &uxtFromOpnd) { + return ((desMovOpnd.GetRegisterNumber() == uxtDestOpnd.GetRegisterNumber()) && + (uxtDestOpnd.GetRegisterNumber() == uxtFromOpnd.GetRegisterNumber())); +} + +bool CombineContiLoadAndStoreAArch64::IsRegDefUseInInsn(Insn &insn, regno_t regNO) { + uint32 opndNum = insn.GetOperandSize(); + for(uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto listElem : listOpnd.GetOperands()) { + RegOperand *regOpnd = static_cast(listElem); + ASSERT(regOpnd != nullptr, "parameter operand must be RegOperand"); + if (regNO == regOpnd->GetRegisterNumber()) { + return true; + } + } + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + RegOperand *index = memOpnd.GetIndexRegister(); + if ((base != nullptr && base->GetRegisterNumber() == regNO) || + (index != nullptr && index->GetRegisterNumber() == regNO)) { + return true; + } + } else if (opnd.IsConditionCode()) { + Operand &rflagOpnd = cgFunc.GetOrCreateRflag(); + RegOperand &rflagReg = static_cast(rflagOpnd); + if (rflagReg.GetRegisterNumber() == regNO) { + return true; + } + } else if (opnd.IsRegister()) { + if (static_cast(opnd).GetRegisterNumber() == regNO) { + return true; + } + } + } + return false; +} + +bool CombineContiLoadAndStoreAArch64::IsRegNotSameMemUseInInsn(Insn &insn, regno_t regNO, bool isStore, + int32 baseOfst, regno_t destRegNO) { + uint32 opndNum = insn.GetOperandSize(); + bool sameMemAccess = false; /* both store or load */ + if (insn.IsStore() == isStore) { + sameMemAccess = true; + } + for(uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn.GetOperand(i); + if (opnd.IsList()) { + auto &listOpnd = static_cast(opnd); + for (auto listElem : listOpnd.GetOperands()) { + RegOperand *regOpnd = static_cast(listElem); + ASSERT(regOpnd != nullptr, "parameter operand must be RegOperand"); + if (regNO == regOpnd->GetRegisterNumber()) { + return true; + } + } + } else if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + /* need check offset as well */ + regno_t stackBaseRegNO = cgFunc.UseFP() ? R29 : RSP; + if (!sameMemAccess && base != nullptr) { + regno_t curBaseRegNO = base->GetRegisterNumber(); + uint32 memBarrierRange = insn.IsLoadStorePair() ? k16BitSize : k8BitSize; + if (!(curBaseRegNO == regNO && memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + memOpnd.GetOffsetImmediate() != nullptr && + (memOpnd.GetOffsetImmediate()->GetOffsetValue() <= (baseOfst - memBarrierRange) || + memOpnd.GetOffsetImmediate()->GetOffsetValue() >= (baseOfst + memBarrierRange)))) { + return true; + } + } + /* do not trust the following situation : + * str x1, [x9] + * str x6, [x2] + * str x3, [x9, #8] + */ + if (isStore && regNO != stackBaseRegNO && base != nullptr && + base->GetRegisterNumber() != stackBaseRegNO && base->GetRegisterNumber() != regNO) { + return true; + } + if (isStore && base != nullptr && base->GetRegisterNumber() == regNO && destRegNO != RZR && + static_cast(insn.GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != RZR) { + if (memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && memOpnd.GetOffsetImmediate() != nullptr) { + int32 curOffset = memOpnd.GetOffsetImmediate()->GetOffsetValue(); + if (memOpnd.GetSize() == k64BitSize) { + uint32 memBarrierRange = insn.IsLoadStorePair() ? k16BitSize : k8BitSize; + if (curOffset < baseOfst + memBarrierRange && curOffset > baseOfst - memBarrierRange) { + return true; + } + } else if (memOpnd.GetSize() == k32BitSize) { + uint32 memBarrierRange = insn.IsLoadStorePair() ? k8BitSize : k4BitSize; + if (curOffset < baseOfst + memBarrierRange && curOffset > baseOfst - memBarrierRange) { + return true; + } + } + } + } + } else if (opnd.IsConditionCode()) { + Operand &rflagOpnd = cgFunc.GetOrCreateRflag(); + RegOperand &rflagReg = static_cast(rflagOpnd); + if (rflagReg.GetRegisterNumber() == regNO) { + return true; + } + } else if (opnd.IsRegister()) { + if (static_cast(opnd).GetRegisterNumber() == regNO) { + return true; + } + } + } + return false; +} + +std::vector CombineContiLoadAndStoreAArch64::FindPrevStrLdr(Insn &insn, regno_t destRegNO, + regno_t memBaseRegNO, int32 baseOfst) { + std::vector prevContiInsns; + bool isStr = insn.IsStore(); + for (Insn *curInsn = insn.GetPrev(); curInsn != nullptr; curInsn = curInsn->GetPrev()) { + if (!curInsn->IsMachineInstruction()) { + continue; + } + if (curInsn->IsRegDefined(memBaseRegNO)) { + return prevContiInsns; + } + if (IsRegNotSameMemUseInInsn(*curInsn, memBaseRegNO, insn.IsStore(), baseOfst, destRegNO)) { + return prevContiInsns; + } + /* return continuous STD/LDR insn */ + if (((isStr && curInsn->IsStore()) || (!isStr && curInsn->IsLoad())) && !curInsn->IsLoadStorePair()) { + auto *memOpnd = static_cast(curInsn->GetMemOpnd()); + /* do not combine ldr r0, label */ + if (memOpnd != nullptr) { + auto *BaseRegOpnd = static_cast(memOpnd->GetBaseRegister()); + ASSERT(BaseRegOpnd == nullptr || !BaseRegOpnd->IsVirtualRegister(), + "physical register has not been allocated?"); + if (memOpnd->GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + BaseRegOpnd->GetRegisterNumber() == memBaseRegNO) { + prevContiInsns.emplace_back(curInsn); + } + } + } + /* check insn that changes the data flow */ + regno_t stackBaseRegNO = cgFunc.UseFP() ? R29 : RSP; + /* ldr x8, [x21, #8] + * call foo() + * ldr x9, [x21, #16] + * although x21 is a calleeSave register, there is no guarantee data in memory [x21] is not changed + */ + if (curInsn->IsCall() && (!AArch64Abi::IsCalleeSavedReg(static_cast(destRegNO)) || + memBaseRegNO != stackBaseRegNO)) { + return prevContiInsns; + } + if (curInsn->GetMachineOpcode() == MOP_asm) { + return prevContiInsns; + } + if (IsRegDefUseInInsn(*curInsn, destRegNO)) { + return prevContiInsns; + } + } + return prevContiInsns; +} + +/* Combining 2 STRs into 1 stp or 2 LDRs into 1 ldp */ +void CombineContiLoadAndStoreAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + ASSERT(insn.GetOperand(kInsnFirstOpnd).IsRegister(), "unexpect operand"); + auto &destOpnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto *memOpnd = static_cast(insn.GetMemOpnd()); + ASSERT(memOpnd != nullptr, "get mem operand failed"); + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + if (!doAggressiveCombine) { + return; + } + auto *baseRegOpnd = static_cast(memOpnd->GetBaseRegister()); + AArch64OfstOperand *offsetOpnd = memOpnd->GetOffsetImmediate(); + CHECK_FATAL(offsetOpnd != nullptr, "offset opnd lost"); + ASSERT(baseRegOpnd == nullptr || !baseRegOpnd->IsVirtualRegister(), "physical register has not been allocated?"); + std::vector prevContiInsnVec = FindPrevStrLdr( + insn, destOpnd.GetRegisterNumber(), baseRegOpnd->GetRegisterNumber(), offsetOpnd->GetOffsetValue()); + for (auto prevContiInsn : prevContiInsnVec) { + ASSERT(prevContiInsn != nullptr, "get previous consecutive instructions failed"); + auto *prevMemOpnd = static_cast(prevContiInsn->GetMemOpnd()); + if (memOpnd->GetIndexOpt() != prevMemOpnd->GetIndexOpt()) { + continue; + } + AArch64OfstOperand *prevOffsetOpnd = prevMemOpnd->GetOffsetImmediate(); + CHECK_FATAL(offsetOpnd != nullptr && prevOffsetOpnd != nullptr, "both conti str/ldr have no offset"); + + auto &prevDestOpnd = static_cast(prevContiInsn->GetOperand(kInsnFirstOpnd)); + uint32 memSize = static_cast(insn).GetLoadStoreSize(); + uint32 prevMemSize = static_cast(*prevContiInsn).GetLoadStoreSize(); + if (memSize != prevMemSize || prevDestOpnd.GetRegisterType() != destOpnd.GetRegisterType() || + thisMop != prevContiInsn->GetMachineOpcode()) { + continue; + } + int offsetVal = offsetOpnd->GetOffsetValue(); + int prevOffsetVal = prevOffsetOpnd->GetOffsetValue(); + int diffVal = std::abs(offsetVal - prevOffsetVal); + /* do combination str/ldr -> stp/ldp */ + if ((insn.IsStore() || destOpnd.GetRegisterNumber() != prevDestOpnd.GetRegisterNumber()) || + (destOpnd.GetRegisterNumber() == RZR && prevDestOpnd.GetRegisterNumber() == RZR)) { + if ((memSize == k8ByteSize && diffVal == k8BitSize) || + (memSize == k4ByteSize && diffVal == k4BitSize) || + (memSize == k16ByteSize && diffVal == k16BitSize)) { + CG *cg = cgFunc.GetCG(); + MOperator mopPair = GetMopPair(thisMop); + if (offsetVal < prevOffsetVal) { + if (static_cast(cgFunc).IsOperandImmValid(mopPair, memOpnd, kInsnThirdOpnd)) { + bb.InsertInsnAfter(*prevContiInsn, + cg->BuildInstruction(mopPair, destOpnd, prevDestOpnd, *memOpnd)); + RemoveInsnAndKeepComment(bb, insn, *prevContiInsn); + return; + } + } else { + if (static_cast(cgFunc).IsOperandImmValid(mopPair, prevMemOpnd, kInsnThirdOpnd)) { + bb.InsertInsnAfter(*prevContiInsn, + cg->BuildInstruction(mopPair, prevDestOpnd, destOpnd, *prevMemOpnd)); + RemoveInsnAndKeepComment(bb, insn, *prevContiInsn); + return; + } + } + } + } + /* do combination strb/ldrb -> strh/ldrh -> str/ldr */ + if (destOpnd.GetRegisterNumber() == prevDestOpnd.GetRegisterNumber() && + destOpnd.GetRegisterNumber() == RZR && prevDestOpnd.GetRegisterNumber() == RZR) { + if ((memSize == k1ByteSize && diffVal == k1BitSize) || (memSize == k2ByteSize && diffVal == k2ByteSize)) { + CG *cg = cgFunc.GetCG(); + MOperator mopPair = GetMopHigherByte(thisMop); + if (offsetVal < prevOffsetVal) { + if (static_cast(cgFunc).IsOperandImmValid(mopPair, memOpnd, kInsnSecondOpnd)) { + bb.InsertInsnAfter(*prevContiInsn, cg->BuildInstruction(mopPair, destOpnd, *memOpnd)); + RemoveInsnAndKeepComment(bb, insn, *prevContiInsn); + return; + } + } else { + if (static_cast(cgFunc).IsOperandImmValid(mopPair, prevMemOpnd, kInsnSecondOpnd)) { + bb.InsertInsnAfter(*prevContiInsn, cg->BuildInstruction(mopPair, prevDestOpnd, *prevMemOpnd)); + RemoveInsnAndKeepComment(bb, insn, *prevContiInsn); + return; + } + } + } + } + } +} + +MOperator CombineContiLoadAndStoreAArch64::GetMopHigherByte(MOperator mop) { + switch (mop) { + case MOP_wldrb: + return MOP_wldrh; + case MOP_wstrb: + return MOP_wstrh; + case MOP_wldrh: + return MOP_wldr; + case MOP_wstrh: + return MOP_wstr; + default: + ASSERT(false, "should not run here"); + return MOP_undef; + } +} + +void CombineContiLoadAndStoreAArch64::RemoveInsnAndKeepComment(BB &bb, Insn &insn, Insn &prevInsn) { + /* keep the comment */ + Insn *nn = prevInsn.GetNext(); + std::string newComment = ""; + MapleString comment = insn.GetComment(); + if (comment.c_str() != nullptr && strlen(comment.c_str()) > 0) { + newComment += comment.c_str(); + } + comment = prevInsn.GetComment(); + if (comment.c_str() != nullptr && strlen(comment.c_str()) > 0) { + newComment = newComment + " " + comment.c_str(); + } + if (newComment.c_str() != nullptr && strlen(newComment.c_str()) > 0) { + nn->SetComment(newComment); + } + bb.RemoveInsn(insn); + bb.RemoveInsn(prevInsn); +} + +void EliminateSpecifcSXTAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPrev(); + while (prevInsn != nullptr && !prevInsn->GetMachineOpcode()) { + prevInsn = prevInsn->GetPrev(); + } + if (prevInsn == nullptr) { + return; + } + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®Opnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (&insn != bb.GetFirstInsn() && regOpnd0.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + prevInsn->IsMachineInstruction()) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + if (thisMop == MOP_xsxtb32) { + /* value should in range between -127 and 127 */ + if (value >= static_cast(0xFFFFFFFFFFFFFF80) && value <= 0x7F && + immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + bb.RemoveInsn(insn); + } + } else if (thisMop == MOP_xsxth32) { + /* value should in range between -32678 and 32678 */ + if (value >= static_cast(0xFFFFFFFFFFFF8000) && value <= 0x7FFF && + immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + bb.RemoveInsn(insn); + } + } else { + uint64 flag = 0xFFFFFFFFFFFFFF80; /* initialize the flag with fifty-nine 1s at top */ + if (thisMop == MOP_xsxth64) { + flag = 0xFFFFFFFFFFFF8000; /* specify the flag with forty-nine 1s at top in this case */ + } else if (thisMop == MOP_xsxtw64) { + flag = 0xFFFFFFFF80000000; /* specify the flag with thirty-three 1s at top in this case */ + } + if (!(static_cast(value) & flag) && immOpnd.IsSingleInstructionMovable(regOpnd0.GetSize())) { + auto *aarch64CGFunc = static_cast(&cgFunc); + RegOperand &dstOpnd = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand( + static_cast(dstMovOpnd.GetRegisterNumber()), k64BitSize, dstMovOpnd.GetRegisterType()); + prevInsn->SetOperand(kInsnFirstOpnd, dstOpnd); + prevInsn->SetMOperator(MOP_xmovri64); + bb.RemoveInsn(insn); + } + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrsb) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + if (thisMop == MOP_xsxtb32) { + bb.RemoveInsn(insn); + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrsh) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstMovOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + if (thisMop == MOP_xsxth32) { + bb.RemoveInsn(insn); + } + } + } +} + +void EliminateSpecifcUXTAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®Opnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (prevInsn->IsCall() && + regOpnd0.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + (regOpnd1.GetRegisterNumber() == R0 || regOpnd1.GetRegisterNumber() == V0)) { + uint32 retSize = prevInsn->GetRetSize(); + if (retSize > 0 && + ((thisMop == MOP_xuxtb32 && retSize <= k1ByteSize) || + (thisMop == MOP_xuxth32 && retSize <= k2ByteSize) || + (thisMop == MOP_xuxtw64 && retSize <= k4ByteSize))) { + bb.RemoveInsn(insn); + } + return; + } +#if 1 + /* Merge ldr+uxt[bh] into ldrb/ldrh */ + if (prevInsn->GetMachineOpcode() == MOP_wldr) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (thisMop == MOP_xuxtb32 || thisMop == MOP_xuxth32) { + if (dstOpnd.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + !IfOperandIsLiveAfterInsn(dstOpnd, insn) && + !IfOperandIsLiveAfterInsn(regOpnd0, insn)) { + prevInsn->SetMOP(thisMop == MOP_xuxtb32 ? MOP_wldrb : MOP_wldrh); + dstOpnd.SetRegisterNumber(regOpnd0.GetRegisterNumber()); + bb.RemoveInsn(insn); + return; + } + } + } +#endif + if (&insn == bb.GetFirstInsn() || regOpnd0.GetRegisterNumber() != regOpnd1.GetRegisterNumber() || + !prevInsn->IsMachineInstruction()) { + return; + } + if (cgFunc.GetMirModule().GetSrcLang() == kSrcLangC && prevInsn->IsCall() && prevInsn->GetIsCallReturnSigned()) { + return; + } + if (thisMop == MOP_xuxtb32) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (!IsSameRegisterOperation(dstMovOpnd, regOpnd1, regOpnd0)) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + /* check the top 56 bits of value */ + if (!(static_cast(value) & 0xFFFFFFFFFFFFFF00)) { + bb.RemoveInsn(insn); + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrb) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + bb.RemoveInsn(insn); +#if 0 + } else if (prevInsn->GetMachineOpcode() == MOP_wldr) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + !IfOperandIsLiveAfterInsn(dstOpnd, insn)) { + prevInsn->SetMOP(MOP_wldrb); + dstOpnd.SetRegisterNumber(regOpnd0.GetRegisterNumber()); + bb.RemoveInsn(insn); + } +#endif + } + } else if (thisMop == MOP_xuxth32) { + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_xmovri64) { + auto &dstMovOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (!IsSameRegisterOperation(dstMovOpnd, regOpnd1, regOpnd0)) { + return; + } + Operand &opnd = prevInsn->GetOperand(kInsnSecondOpnd); + if (opnd.IsIntImmediate()) { + auto &immOpnd = static_cast(opnd); + int64 value = immOpnd.GetValue(); + if (!(static_cast(value) & 0xFFFFFFFFFFFF0000)) { + bb.RemoveInsn(insn); + } + } + } else if (prevInsn->GetMachineOpcode() == MOP_wldrh) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() != regOpnd1.GetRegisterNumber()) { + return; + } + bb.RemoveInsn(insn); +#if 0 + } else if (prevInsn->GetMachineOpcode() == MOP_wldr) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (dstOpnd.GetRegisterNumber() == regOpnd1.GetRegisterNumber() && + !IfOperandIsLiveAfterInsn(dstOpnd, insn)) { + prevInsn->SetMOP(MOP_wldrh); + dstOpnd.SetRegisterNumber(regOpnd0.GetRegisterNumber()); + bb.RemoveInsn(insn); + } +#endif + } + } else { + /* this_mop == MOP_xuxtw64 */ + if (prevInsn->GetMachineOpcode() == MOP_xmovri32 || prevInsn->GetMachineOpcode() == MOP_wldrsb || + prevInsn->GetMachineOpcode() == MOP_wldrb || prevInsn->GetMachineOpcode() == MOP_wldrsh || + prevInsn->GetMachineOpcode() == MOP_wldrh || prevInsn->GetMachineOpcode() == MOP_wldr) { + auto &dstOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (!IsSameRegisterOperation(dstOpnd, regOpnd1, regOpnd0)) { + return; + } + /* 32-bit ldr does zero-extension by default, so this conversion can be skipped */ + bb.RemoveInsn(insn); + } + } +} + +void FmovRegAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + Insn *nextInsn = insn.GetNext(); + if (&insn == bb.GetFirstInsn()) { + return; + } + Insn *prevInsn = insn.GetPrev(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + MOperator newMop; + uint32 doOpt = 0; + if (prevMop == MOP_xvmovrv && thisMop == MOP_xvmovrv) { + doOpt = k32BitSize; + newMop = MOP_wmovrr; + } else if (prevMop == MOP_xvmovrd && thisMop == MOP_xvmovrd) { + doOpt = k64BitSize; + newMop = MOP_xmovrr; + } + if (doOpt == 0) { + return; + } + auto &curSrcRegOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &prevSrcRegOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + /* same src freg */ + if (curSrcRegOpnd.GetRegisterNumber() != prevSrcRegOpnd.GetRegisterNumber()) { + return; + } + auto &curDstRegOpnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t curDstReg = curDstRegOpnd.GetRegisterNumber(); + CG *cg = cgFunc.GetCG(); + /* optimize case 1 */ + auto &prevDstRegOpnd = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + regno_t prevDstReg = prevDstRegOpnd.GetRegisterNumber(); + auto *aarch64CGFunc = static_cast(&cgFunc); + RegOperand &dst = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(curDstReg), doOpt, kRegTyInt); + RegOperand &src = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(prevDstReg), doOpt, kRegTyInt); + Insn &newInsn = cg->BuildInstruction(newMop, dst, src); + bb.InsertInsnBefore(insn, newInsn); + bb.RemoveInsn(insn); + if (nextInsn == nullptr) { + return; + } + RegOperand &newOpnd = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(prevDstReg), doOpt, kRegTyInt); + uint32 opndNum = nextInsn->GetOperandSize(); + for (uint32 opndIdx = 0; opndIdx < opndNum; ++opndIdx) { + Operand &opnd = nextInsn->GetOperand(opndIdx); + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + if (base != nullptr) { + if (base->IsRegister()) { + auto *reg = static_cast(base); + if (reg->GetRegisterNumber() == curDstReg) { + memOpnd.SetBaseRegister(newOpnd); + } + } + } + Operand *offset = memOpnd.GetIndexRegister(); + if (offset != nullptr) { + if (offset->IsRegister()) { + auto *reg = static_cast(offset); + if (reg->GetRegisterNumber() == curDstReg) { + memOpnd.SetIndexRegister(newOpnd); + } + } + } + } else if (opnd.IsRegister()) { + /* Check if it is a source operand. */ + const AArch64MD *md = &AArch64CG::kMd[static_cast(nextInsn)->GetMachineOpcode()]; + auto *regProp = static_cast(md->operand[opndIdx]); + if (regProp->IsUse()) { + auto ® = static_cast(opnd); + if (reg.GetRegisterNumber() == curDstReg) { + nextInsn->SetOperand(opndIdx, newOpnd); + } + } + } + } +} + +void CbnzToCbzAArch64::Run(BB &bb, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + /* reg has to be R0, since return value is in R0 */ + auto ®Opnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (regOpnd0.GetRegisterNumber() != R0) { + return; + } + BB *nextBB = bb.GetNext(); + /* Make sure nextBB can only be reached by bb */ + if (nextBB->GetPreds().size() > 1 || nextBB->GetEhPreds().empty()) { + return; + } + BB *targetBB = nullptr; + auto it = bb.GetSuccsBegin(); + if (*it == nextBB) { + ++it; + } + targetBB = *it; + /* Make sure when nextBB is empty, targetBB is fallthru of bb. */ + if (targetBB != nextBB->GetNext()) { + return; + } + /* Is nextBB branch to the return-bb? */ + if (nextBB->GetSuccs().size() != 1) { + return; + } + BB *nextBBTarget = *(nextBB->GetSuccsBegin()); + if (nextBBTarget->GetKind() != BB::kBBReturn) { + return; + } + /* Next insn should be a mov R0 = 0 */ + Insn *movInsn = nextBB->GetFirstMachineInsn(); + if (movInsn == nullptr) { + return; + } + MOperator movInsnMop = movInsn->GetMachineOpcode(); + if (movInsnMop != MOP_xmovri32 && movInsnMop != MOP_xmovri64) { + return; + } + auto &movDest = static_cast(movInsn->GetOperand(kInsnFirstOpnd)); + if (movDest.GetRegisterNumber() != R0) { + return; + } + auto &movImm = static_cast(movInsn->GetOperand(kInsnSecondOpnd)); + if (movImm.GetValue() != 0) { + return; + } + Insn *brInsn = movInsn->GetNextMachineInsn(); + if (brInsn == nullptr) { + return; + } + if (brInsn->GetMachineOpcode() != MOP_xuncond) { + return; + } + /* Control flow looks nice, instruction looks nice */ + Operand &brTarget = brInsn->GetOperand(kInsnFirstOpnd); + insn.SetOperand(kInsnSecondOpnd, brTarget); + if (thisMop == MOP_wcbnz) { + insn.SetMOP(MOP_wcbz); + } else { + insn.SetMOP(MOP_xcbz); + } + nextBB->RemoveInsn(*movInsn); + nextBB->RemoveInsn(*brInsn); + /* nextBB is now a fallthru bb, not a goto bb */ + nextBB->SetKind(BB::kBBFallthru); + /* + * fix control flow, we have bb, nextBB, targetBB, nextBB_target + * connect bb -> nextBB_target erase targetBB + */ + it = bb.GetSuccsBegin(); + CHECK_FATAL(it != bb.GetSuccsEnd(), "succs is empty."); + if (*it == targetBB) { + bb.EraseSuccs(it); + bb.PushFrontSuccs(*nextBBTarget); + } else { + ++it; + bb.EraseSuccs(it); + bb.PushBackSuccs(*nextBBTarget); + } + for (auto targetBBIt = targetBB->GetPredsBegin(); targetBBIt != targetBB->GetPredsEnd(); ++targetBBIt) { + if (*targetBBIt == &bb) { + targetBB->ErasePreds(targetBBIt); + break; + } + } + for (auto nextIt = nextBBTarget->GetPredsBegin(); nextIt != nextBBTarget->GetPredsEnd(); ++nextIt) { + if (*nextIt == nextBB) { + nextBBTarget->ErasePreds(nextIt); + break; + } + } + nextBBTarget->PushBackPreds(bb); + + /* nextBB has no target, originally just branch target */ + nextBB->EraseSuccs(nextBB->GetSuccsBegin()); + ASSERT(nextBB->GetSuccs().empty(), "peep: branch target incorrect"); + /* Now make nextBB fallthru to targetBB */ + nextBB->PushFrontSuccs(*targetBB); + targetBB->PushBackPreds(*nextBB); +} + +void CsetCbzToBeqOptAArch64::Run(BB &bb, Insn &insn) { + Insn *insn1 = insn.GetPreviousMachineInsn(); + if (insn1 == nullptr) { + return; + } + /* prevInsn must be "cset" insn */ + MOperator opCode1 = insn1->GetMachineOpcode(); + if (opCode1 != MOP_xcsetrc && opCode1 != MOP_wcsetrc) { + return; + } + + auto &tmpRegOp1 = static_cast(insn1->GetOperand(kInsnFirstOpnd)); + regno_t baseRegNO1 = tmpRegOp1.GetRegisterNumber(); + auto &tmpRegOp2 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t baseRegNO2 = tmpRegOp2.GetRegisterNumber(); + if (baseRegNO1 != baseRegNO2) { + return; + } + /* If the reg will be used later, we shouldn't optimize the cset insn here */ + if (IfOperandIsLiveAfterInsn(tmpRegOp2, insn)) { + return; + } + MOperator opCode = insn.GetMachineOpcode(); + bool reverse = (opCode == MOP_xcbz || opCode == MOP_wcbz); + Operand &rflag = static_cast(&cgFunc)->GetOrCreateRflag(); + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &cond = static_cast(insn1->GetOperand(kInsnSecondOpnd)); + MOperator jmpOperator = SelectMOperator(cond.GetCode(), reverse); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(jmpOperator, rflag, label); + bb.RemoveInsn(*insn1); + bb.ReplaceInsn(insn, newInsn); +} + +MOperator CsetCbzToBeqOptAArch64::SelectMOperator(AArch64CC_t condCode, bool inverse) const { + switch (condCode) { + case CC_NE: + return inverse ? MOP_beq : MOP_bne; + case CC_EQ: + return inverse ? MOP_bne : MOP_beq; + case CC_MI: + return inverse ? MOP_bpl : MOP_bmi; + case CC_PL: + return inverse ? MOP_bmi : MOP_bpl; + case CC_VS: + return inverse ? MOP_bvc : MOP_bvs; + case CC_VC: + return inverse ? MOP_bvs : MOP_bvc; + case CC_HI: + return inverse ? MOP_bls : MOP_bhi; + case CC_LS: + return inverse ? MOP_bhi : MOP_bls; + case CC_GE: + return inverse ? MOP_blt : MOP_bge; + case CC_LT: + return inverse ? MOP_bge : MOP_blt; + case CC_HS: + return inverse ? MOP_blo : MOP_bhs; + case CC_LO: + return inverse ? MOP_bhs : MOP_blo; + case CC_LE: + return inverse ? MOP_bgt : MOP_ble; + case CC_GT: + return inverse ? MOP_ble : MOP_bgt; + default: + return MOP_undef; + } +} + +void ContiLDRorSTRToSameMEMAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPrev(); + while (prevInsn != nullptr && !prevInsn->GetMachineOpcode() && prevInsn != bb.GetFirstInsn()) { + prevInsn = prevInsn->GetPrev(); + } + if (!insn.IsMachineInstruction() || prevInsn == nullptr) { + return; + } + bool loadAfterStore = false; + bool loadAfterLoad = false; + MOperator thisMop = insn.GetMachineOpcode(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + /* + * store regB, RegC, offset + * load regA, RegC, offset + */ + if ((thisMop == MOP_xldr && prevMop == MOP_xstr) || (thisMop == MOP_wldr && prevMop == MOP_wstr) || + (thisMop == MOP_dldr && prevMop == MOP_dstr) || (thisMop == MOP_sldr && prevMop == MOP_sstr)) { + loadAfterStore = true; + } + /* + * load regA, RegC, offset + * load regB, RegC, offset + */ + if ((thisMop == MOP_xldr || thisMop == MOP_wldr || thisMop == MOP_dldr || thisMop == MOP_sldr) && + prevMop == thisMop) { + loadAfterLoad = true; + } + if (!loadAfterStore && !loadAfterLoad) { + return; + } + ASSERT(insn.GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + ASSERT(prevInsn->GetOperand(kInsnSecondOpnd).IsMemoryAccessOperand(), "expects mem operands"); + + auto &memOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + AArch64MemOperand::AArch64AddressingMode addrMode1 = memOpnd1.GetAddrMode(); + if (addrMode1 != AArch64MemOperand::kAddrModeBOi || (!memOpnd1.IsIntactIndexed())) { + return; + } + + auto *base1 = static_cast(memOpnd1.GetBaseRegister()); + ASSERT(base1 == nullptr || !base1->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset1 = memOpnd1.GetOffsetImmediate(); + + auto &memOpnd2 = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + AArch64MemOperand::AArch64AddressingMode addrMode2 = memOpnd2.GetAddrMode(); + if (addrMode2 != AArch64MemOperand::kAddrModeBOi || (!memOpnd2.IsIntactIndexed())) { + return; + } + + auto *base2 = static_cast(memOpnd2.GetBaseRegister()); + ASSERT(base2 == nullptr || !base2->IsVirtualRegister(), "physical register has not been allocated?"); + AArch64OfstOperand *offset2 = memOpnd2.GetOffsetImmediate(); + + if (base1 == nullptr || base2 == nullptr || offset1 == nullptr || offset2 == nullptr) { + return; + } + + auto ®1 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto ®2 = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + int offsetVal1 = offset1->GetOffsetValue(); + int offsetVal2 = offset2->GetOffsetValue(); + if (base1->GetRegisterNumber() != base2->GetRegisterNumber() || + reg1.GetRegisterType() != reg2.GetRegisterType() || reg1.GetSize() != reg2.GetSize() || + offsetVal1 != offsetVal2) { + return; + } + if (loadAfterStore && reg1.GetRegisterNumber() != reg2.GetRegisterNumber()) { + /* replace it with mov */ + MOperator newOp = MOP_wmovrr; + if (reg1.GetRegisterType() == kRegTyInt) { + newOp = (reg1.GetSize() <= k32BitSize) ? MOP_wmovrr : MOP_xmovrr; + } else if (reg1.GetRegisterType() == kRegTyFloat) { + newOp = (reg1.GetSize() <= k32BitSize) ? MOP_xvmovs : MOP_xvmovd; + } + Insn *nextInsn = insn.GetNext(); + while (nextInsn != nullptr && !nextInsn->GetMachineOpcode() && nextInsn != bb.GetLastInsn()) { + nextInsn = nextInsn->GetNext(); + } + bool moveSameReg = false; + if (nextInsn && nextInsn->GetIsSpill() && !IfOperandIsLiveAfterInsn(reg1, *nextInsn)) { + MOperator nextMop = nextInsn->GetMachineOpcode(); + if ((thisMop == MOP_xldr && nextMop == MOP_xstr) || (thisMop == MOP_wldr && nextMop == MOP_wstr) || + (thisMop == MOP_dldr && nextMop == MOP_dstr) || (thisMop == MOP_sldr && nextMop == MOP_sstr)) { + nextInsn->Insn::SetOperand(kInsnFirstOpnd, reg2); + moveSameReg = true; + } + } + if (moveSameReg == false) { + CG *cg = cgFunc.GetCG(); + bb.InsertInsnAfter(*prevInsn, cg->BuildInstruction(newOp, reg1, reg2)); + } + bb.RemoveInsn(insn); + } else if (reg1.GetRegisterNumber() == reg2.GetRegisterNumber() && + base1->GetRegisterNumber() != reg2.GetRegisterNumber()) { + bb.RemoveInsn(insn); + } +} + +void RemoveIncDecRefAArch64::Run(BB &bb, Insn &insn) { + ASSERT(insn.GetMachineOpcode() == MOP_xbl, "expect a xbl MOP at RemoveIncDecRef optimization"); + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if (target.GetName() == "MCC_IncDecRef_NaiveRCFast" && mopMov == MOP_xmovrr && + static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() == R1 && + static_cast(insnMov->GetOperand(kInsnSecondOpnd)).GetRegisterNumber() == R0) { + bb.RemoveInsn(*insnMov); + bb.RemoveInsn(insn); + bb.SetKind(BB::kBBFallthru); + } +} + +#ifdef USE_32BIT_REF +constexpr uint32 kRefSize = 32; +#else +constexpr uint32 kRefSize = 64; +#endif + +void InlineReadBarriersAArch64::Run(BB &bb, Insn &insn) { + if (!CGOptions::IsGCOnly()) { /* Inline read barriers only enabled for GCONLY. */ + return; + } + const std::string &barrierName = GetReadBarrierName(insn); + CG *cg = cgFunc.GetCG(); + if (barrierName == kMccDummy) { + /* remove dummy call. */ + bb.RemoveInsn(insn); + } else { + /* replace barrier function call with load instruction. */ + bool isVolatile = (barrierName == kMccLoadRefV || barrierName == kMccLoadRefVS); + bool isStatic = (barrierName == kMccLoadRefS || barrierName == kMccLoadRefVS); + /* refSize is 32 if USE_32BIT_REF defined, otherwise 64. */ + const uint32 refSize = kRefSize; + auto *aarch64CGFunc = static_cast(&cgFunc); + MOperator loadOp = GetLoadOperator(refSize, isVolatile); + RegOperand ®Op = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(R0, refSize, kRegTyInt); + AArch64reg addrReg = isStatic ? R0 : R1; + MemOperand &addr = aarch64CGFunc->CreateMemOpnd(addrReg, 0, refSize); + Insn &loadInsn = cg->BuildInstruction(loadOp, regOp, addr); + bb.ReplaceInsn(insn, loadInsn); + } + bb.SetKind(BB::kBBFallthru); + bool isTailCall = (insn.GetMachineOpcode() == MOP_tail_call_opt_xbl); + if (isTailCall) { + /* add 'ret' instruction for tail call optimized load barrier. */ + Insn &retInsn = cg->BuildInstruction(MOP_xret); + bb.AppendInsn(retInsn); + bb.SetKind(BB::kBBReturn); + } +} + +void ReplaceDivToMultiAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + Insn *prePrevInsn = prevInsn->GetPreviousMachineInsn(); + auto &sdivOpnd1 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &sdivOpnd2 = static_cast(insn.GetOperand(kInsnThirdOpnd)); + if (sdivOpnd1.GetRegisterNumber() == sdivOpnd2.GetRegisterNumber() || sdivOpnd1.GetRegisterNumber() == R16 || + sdivOpnd2.GetRegisterNumber() == R16 || prePrevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + MOperator prePrevMop = prePrevInsn->GetMachineOpcode(); + if (prevMop && (prevMop == MOP_wmovkri16) && prePrevMop && (prePrevMop == MOP_xmovri32)) { + /* Check if dest operand of insn is idential with register of prevInsn and prePrevInsn. */ + if ((&(prevInsn->GetOperand(kInsnFirstOpnd)) != &sdivOpnd2) || + (&(prePrevInsn->GetOperand(kInsnFirstOpnd)) != &sdivOpnd2)) { + return; + } + auto &prevLsl = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (prevLsl.GetShiftAmount() != k16BitSize) { + return; + } + auto &prevImmOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + auto &prePrevImmOpnd = static_cast(prePrevInsn->GetOperand(kInsnSecondOpnd)); + /* + * expect the immediate value of first mov is 0x086A0 which matches 0x186A0 + * because 0x10000 is ignored in 32 bits register + */ + if ((prevImmOpnd.GetValue() != 1) || (prePrevImmOpnd.GetValue() != 34464)) { + return; + } + auto *aarch64CGFunc = static_cast(&cgFunc); + CG *cg = cgFunc.GetCG(); + /* mov w16, #0x588f */ + RegOperand &tempOpnd = aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R16), + k64BitSize, kRegTyInt); + /* create a immedate operand with this specific value */ + ImmOperand &multiplierLow = aarch64CGFunc->CreateImmOperand(0x588f, k32BitSize, false); + Insn &multiplierLowInsn = cg->BuildInstruction(MOP_xmovri32, tempOpnd, multiplierLow); + bb.InsertInsnBefore(*prePrevInsn, multiplierLowInsn); + + /* + * movk w16, #0x4f8b, LSL #16 + * create a immedate operand with this specific value + */ + ImmOperand &multiplierHigh = aarch64CGFunc->CreateImmOperand(0x4f8b, k32BitSize, false); + LogicalShiftLeftOperand *multiplierHighLsl = aarch64CGFunc->GetLogicalShiftLeftOperand(k16BitSize, true); + Insn &multiplierHighInsn = + cg->BuildInstruction(MOP_wmovkri16, tempOpnd, multiplierHigh, *multiplierHighLsl); + bb.InsertInsnBefore(*prePrevInsn, multiplierHighInsn); + + /* smull x16, w0, w16 */ + Insn &newSmullInsn = + cg->BuildInstruction(MOP_xsmullrrr, tempOpnd, sdivOpnd1, tempOpnd); + bb.InsertInsnBefore(*prePrevInsn, newSmullInsn); + + /* asr x16, x16, #32 */ + ImmOperand &dstLsrImmHigh = aarch64CGFunc->CreateImmOperand(k32BitSize, k32BitSize, false); + Insn &dstLsrInsnHigh = + cg->BuildInstruction(MOP_xasrrri6, tempOpnd, tempOpnd, dstLsrImmHigh); + bb.InsertInsnBefore(*prePrevInsn, dstLsrInsnHigh); + + /* add x16, x16, w0, SXTW */ + Operand &sxtw = aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, 0, 3); + Insn &addInsn = + cg->BuildInstruction(MOP_xxwaddrrre, tempOpnd, tempOpnd, sdivOpnd1, sxtw); + bb.InsertInsnBefore(*prePrevInsn, addInsn); + + /* asr x16, x16, #17 */ + ImmOperand &dstLsrImmChange = aarch64CGFunc->CreateImmOperand(17, k32BitSize, false); + Insn &dstLsrInsnChange = + cg->BuildInstruction(MOP_xasrrri6, tempOpnd, tempOpnd, dstLsrImmChange); + bb.InsertInsnBefore(*prePrevInsn, dstLsrInsnChange); + + /* add x2, x16, x0, LSR #31 */ + auto &sdivOpnd0 = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t sdivOpnd0RegNO = sdivOpnd0.GetRegisterNumber(); + RegOperand &extendSdivOpnd0 = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(sdivOpnd0RegNO), + k64BitSize, kRegTyInt); + + regno_t sdivOpnd1RegNum = sdivOpnd1.GetRegisterNumber(); + RegOperand &extendSdivOpnd1 = + aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(sdivOpnd1RegNum), + k64BitSize, kRegTyInt); + /* shift bit amount is thirty-one at this insn */ + BitShiftOperand &addLsrOpnd = aarch64CGFunc->CreateBitShiftOperand(BitShiftOperand::kLSR, 31, 6); + Insn &addLsrInsn = cg->BuildInstruction(MOP_xaddrrrs, extendSdivOpnd0, tempOpnd, + extendSdivOpnd1, addLsrOpnd); + bb.InsertInsnBefore(*prePrevInsn, addLsrInsn); + + /* + * remove insns + * Check if x1 is used after sdiv insn, and if it is in live-out. + */ + if (sdivOpnd2.GetRegisterNumber() != sdivOpnd0.GetRegisterNumber()) { + if (IfOperandIsLiveAfterInsn(sdivOpnd2, insn)) { + /* Only remove div instruction. */ + bb.RemoveInsn(insn); + return; + } + } + + bb.RemoveInsn(*prePrevInsn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(insn); + } +} + +void AndCmpBranchesToCsetAArch64::Run(BB &bb, Insn &insn) { + /* prevInsn must be "cmp" insn */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr || + (prevInsn->GetMachineOpcode() != MOP_wcmpri && prevInsn->GetMachineOpcode() != MOP_xcmpri)) { + return; + } + /* prevPrevInsn must be "and" insn */ + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr || + (prevPrevInsn->GetMachineOpcode() != MOP_wandrri12 && prevPrevInsn->GetMachineOpcode() != MOP_xandrri13)) { + return; + } + + auto &csetCond = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &cmpImm = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + int64 cmpImmVal = cmpImm.GetValue(); + auto &andImm = static_cast(prevPrevInsn->GetOperand(kInsnThirdOpnd)); + int64 andImmVal = andImm.GetValue(); + if ((csetCond.GetCode() == CC_EQ && cmpImmVal == andImmVal) || + (csetCond.GetCode() == CC_NE && cmpImmVal == 0)) { + /* if flag_reg of "cmp" is live later, we can't remove cmp insn. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (IfOperandIsLiveAfterInsn(flagReg, insn)) { + return; + } + + auto &csetReg = static_cast(insn.GetOperand(kInsnFirstOpnd)); + auto &prevInsnSecondReg = prevInsn->GetOperand(kInsnSecondOpnd); + bool isRegDiff = !RegOperand::IsSameRegNO(csetReg, prevInsnSecondReg); + if (isRegDiff && IfOperandIsLiveAfterInsn(static_cast(prevInsnSecondReg), insn)) { + return; + } + if (andImmVal == 1) { + if (!RegOperand::IsSameRegNO(prevInsnSecondReg, prevPrevInsn->GetOperand(kInsnFirstOpnd))) { + return; + } + /* save the "and" insn only. */ + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + if (isRegDiff) { + prevPrevInsn->Insn::SetOperand(kInsnFirstOpnd, csetReg); + } + } else { + if (!RegOperand::IsSameReg(prevInsnSecondReg, prevPrevInsn->GetOperand(kInsnFirstOpnd)) || + !RegOperand::IsSameReg(prevInsnSecondReg, prevPrevInsn->GetOperand(kInsnSecondOpnd))) { + return; + } + + /* andImmVal is n power of 2 */ + int n = logValueAtBase2(andImmVal); + if (n < 0) { + return; + } + + /* create ubfx insn */ + MOperator ubfxOp = (csetReg.GetSize() <= k32BitSize) ? MOP_wubfxrri5i5 : MOP_xubfxrri6i6; + if (ubfxOp == MOP_wubfxrri5i5 && n >= k32BitSize) { + return; + } + auto &dstReg = static_cast(csetReg); + auto &srcReg = static_cast(prevInsnSecondReg); + CG *cg = cgFunc.GetCG(); + auto *aarch64CGFunc = static_cast(&cgFunc); + ImmOperand &bitPos = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + ImmOperand &bitSize = aarch64CGFunc->CreateImmOperand(1, k8BitSize, false); + Insn &ubfxInsn = cg->BuildInstruction(ubfxOp, dstReg, srcReg, bitPos, bitSize); + bb.InsertInsnBefore(*prevPrevInsn, ubfxInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } + } +} + +void AndCmpBranchesToTstAArch64::Run(BB &bb, Insn &insn) { + /* nextInsn must be "cmp" insn */ + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr || + (nextInsn->GetMachineOpcode() != MOP_wcmpri && nextInsn->GetMachineOpcode() != MOP_xcmpri)) { + return; + } + /* nextNextInsn must be "beq" or "bne" insn */ + Insn *nextNextInsn = nextInsn->GetNext(); + if (nextNextInsn == nullptr || + (nextNextInsn->GetMachineOpcode() != MOP_beq && nextNextInsn->GetMachineOpcode() != MOP_bne)) { + return; + } + auto &andRegOp = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t andRegNO1 = andRegOp.GetRegisterNumber(); + auto &cmpRegOp2 = static_cast(nextInsn->GetOperand(kInsnSecondOpnd)); + regno_t cmpRegNO2 = cmpRegOp2.GetRegisterNumber(); + if (andRegNO1 != cmpRegNO2) { + return; + } + /* If the reg will be used later, we shouldn't optimize the and insn here */ + if (IfOperandIsLiveAfterInsn(andRegOp, *nextInsn)) { + return; + } + Operand &immOpnd = nextInsn->GetOperand(kInsnThirdOpnd); + ASSERT(immOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(immOpnd); + int64 defConstValue = defConst.GetValue(); + if (defConstValue != 0) { + return; + } + /* build tst insn */ + Operand &andOpnd3 = insn.GetOperand(kInsnThirdOpnd); + auto &andRegOp2 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + MOperator newOp = MOP_undef; + if (andOpnd3.IsRegister()) { + newOp = (andRegOp2.GetSize() <= k32BitSize) ? MOP_wtstrr : MOP_xtstrr; + } else { + newOp = (andRegOp2.GetSize() <= k32BitSize) ? MOP_wtstri32 : MOP_xtstri64; + } + Operand &rflag = static_cast(&cgFunc)->GetOrCreateRflag(); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(newOp, rflag, andRegOp2, andOpnd3); + bb.InsertInsnAfter(*nextInsn, newInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*nextInsn); +} + +void AndCbzBranchesToTstAArch64::Run(BB &bb, Insn &insn) { + /* nextInsn must be "cbz" or "cbnz" insn */ + Insn *nextInsn = insn.GetNext(); + if (nextInsn == nullptr || + (nextInsn->GetMachineOpcode() != MOP_wcbz && nextInsn->GetMachineOpcode() != MOP_xcbz)) { + return; + } + auto &andRegOp = static_cast(insn.GetOperand(kInsnFirstOpnd)); + regno_t andRegNO1 = andRegOp.GetRegisterNumber(); + auto &cbzRegOp2 = static_cast(nextInsn->GetOperand(kInsnFirstOpnd)); + regno_t cbzRegNO2 = cbzRegOp2.GetRegisterNumber(); + if (andRegNO1 != cbzRegNO2) { + return; + } + /* If the reg will be used later, we shouldn't optimize the and insn here */ + if (IfOperandIsLiveAfterInsn(andRegOp, *nextInsn)) { + return; + } + /* build tst insn */ + Operand &andOpnd3 = insn.GetOperand(kInsnThirdOpnd); + auto &andRegOp2 = static_cast(insn.GetOperand(kInsnSecondOpnd)); + MOperator newTstOp = MOP_undef; + if (andOpnd3.IsRegister()) { + newTstOp = (andRegOp2.GetSize() <= k32BitSize) ? MOP_wtstrr : MOP_xtstrr; + } else { + newTstOp = (andRegOp2.GetSize() <= k32BitSize) ? MOP_wtstri32 : MOP_xtstri64; + } + Operand &rflag = static_cast(&cgFunc)->GetOrCreateRflag(); + Insn &newInsnTst = cgFunc.GetCG()->BuildInstruction(newTstOp, rflag, andRegOp2, andOpnd3); + /* build beq insn */ + MOperator opCode = nextInsn->GetMachineOpcode(); + bool reverse = (opCode == MOP_xcbz || opCode == MOP_wcbz); + auto &label = static_cast(nextInsn->GetOperand(kInsnSecondOpnd)); + MOperator jmpOperator = reverse ? MOP_beq : MOP_bne; + Insn &newInsnJmp = cgFunc.GetCG()->BuildInstruction(jmpOperator, rflag, label); + bb.ReplaceInsn(insn, newInsnTst); + bb.ReplaceInsn(*nextInsn, newInsnJmp); +} + +void ZeroCmpBranchesAArch64::Run(BB &bb, Insn &insn) { + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (!insn.IsBranch() || insn.GetOperandSize() <= kInsnSecondOpnd || prevInsn == nullptr) { + return; + } + if (!insn.GetOperand(kInsnSecondOpnd).IsLabel()) { + return; + } + LabelOperand *label = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + RegOperand *regOpnd = nullptr; + RegOperand *reg0 = nullptr; + RegOperand *reg1 = nullptr; + MOperator newOp = MOP_undef; + ImmOperand *imm = nullptr; + switch (prevInsn->GetMachineOpcode()) { + case MOP_wcmpri: + case MOP_xcmpri: { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + imm = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (imm->GetValue() != 0) { + return; + } + if (insn.GetMachineOpcode() == MOP_bge) { + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else if (insn.GetMachineOpcode() == MOP_blt) { + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + } + case MOP_wcmprr: + case MOP_xcmprr: { + reg0 = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + reg1 = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + if (!reg0->IsZeroRegister() && !reg1->IsZeroRegister()) { + return; + } + switch (insn.GetMachineOpcode()) { + case MOP_bge: + if (reg1->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else { + return; + } + break; + case MOP_ble: + if (reg0->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbz : MOP_xtbz; + } else { + return; + } + break; + case MOP_blt: + if (reg1->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + case MOP_bgt: + if (reg0->IsZeroRegister()) { + regOpnd = &static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + newOp = (regOpnd->GetSize() <= k32BitSize) ? MOP_wtbnz : MOP_xtbnz; + } else { + return; + } + break; + default: + return; + } + break; + } + default: + return; + } + CG *cg = cgFunc.GetCG(); + auto aarch64CGFunc = static_cast(&cgFunc); + ImmOperand &bitp = aarch64CGFunc->CreateImmOperand( + (regOpnd->GetSize() <= k32BitSize) ? (k32BitSize - 1) : (k64BitSize - 1), k8BitSize, false); + bb.InsertInsnAfter( + insn, cg->BuildInstruction(newOp, *static_cast(regOpnd), bitp, *label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); +} + +void ElimDuplicateExtensionAArch64::Run(BB &bb, Insn &insn) { + (void)bb; + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + uint32 index; + uint32 upper; + bool is32bits = false; + MOperator *table = nullptr; + MOperator thisMop = insn.GetMachineOpcode(); + switch (thisMop) { + case MOP_xsxtb32: + is32bits = true; + [[clang::fallthrough]]; + case MOP_xsxtb64: + table = sextMopTable; + index = 0; + upper = kSizeOfSextMopTable; + break; + case MOP_xsxth32: + is32bits = true; + [[clang::fallthrough]]; + case MOP_xsxth64: + table = sextMopTable; + index = 2; + upper = kSizeOfSextMopTable; + break; + case MOP_xsxtw64: + table = sextMopTable; + index = 4; + upper = kSizeOfSextMopTable; + break; + case MOP_xuxtb32: + is32bits = true; + table = uextMopTable; + index = 0; + upper = kSizeOfUextMopTable; + break; + case MOP_xuxth32: + is32bits = true; + table = uextMopTable; + index = 1; + upper = kSizeOfUextMopTable; + break; + case MOP_xuxtw64: + table = uextMopTable; + index = 2; + upper = kSizeOfUextMopTable; + break; + default: + CHECK_FATAL(false, "Unexpected mop"); + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + for (uint32 i = index; i < upper; ++i) { + if (prevMop == table[i]) { + Operand &prevDestOpnd = prevInsn->GetOperand(kInsnFirstOpnd); + regno_t dest = static_cast(prevDestOpnd).GetRegisterNumber(); + regno_t src = static_cast(insn.GetOperand(kInsnSecondOpnd)).GetRegisterNumber(); + if (dest == src) { + insn.SetMOP(is32bits ? MOP_wmovrr : MOP_xmovrr); + if (upper == kSizeOfSextMopTable && prevDestOpnd.GetSize() != insn.GetOperand(kInsnFirstOpnd).GetSize()) { + if (is32bits) { + insn.GetOperand(kInsnFirstOpnd).SetSize(k64BitSize); + insn.SetMOP(MOP_xmovrr); + } else { + prevDestOpnd.SetSize(k64BitSize); + prevInsn->SetMOP(prevMop == MOP_xsxtb32 ? MOP_xsxtb64 : MOP_xsxth64); + } + } + } + break; + } + } +} + +/* + * if there is define point of checkInsn->GetOperand(opndIdx) between startInsn and firstInsn + * return define insn. else return nullptr + */ +const Insn *CmpCsetAArch64::DefInsnOfOperandInBB(const Insn &startInsn, const Insn &checkInsn, int opndIdx) { + Insn *prevInsn = nullptr; + for (const Insn *insn = &startInsn; insn != nullptr; insn = prevInsn) { + prevInsn = insn->GetPreviousMachineInsn(); + if (!insn->IsMachineInstruction()) { + continue; + } + /* checkInsn.GetOperand(opndIdx) is thought modified conservatively */ + if (insn->IsCall()) { + return insn; + } + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsDef()) { + continue; + } + /* Operand is base reg of Memory, defined by str */ + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + ASSERT(base->IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(*base, checkInsn.GetOperand(opndIdx)) && + memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + return insn; + } + } else { + ASSERT(opnd.IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(checkInsn.GetOperand(opndIdx), opnd)) { + return insn; + } + } + } + } + return nullptr; +} + +bool CmpCsetAArch64::OpndDefByOneValidBit(const Insn &defInsn) { + MOperator defMop = defInsn.GetMachineOpcode(); + switch (defMop) { + case MOP_wcsetrc: + case MOP_xcsetrc: + return true; + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = defInsn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + return (defConstValue == 0 || defConstValue == 1); + } + case MOP_xmovrr: + case MOP_wmovrr: + return defInsn.GetOperand(kInsnSecondOpnd).IsZeroRegister(); + case MOP_wlsrrri5: + case MOP_xlsrrri6: { + Operand &opnd2 = defInsn.GetOperand(kInsnThirdOpnd); + ASSERT(opnd2.IsIntImmediate(), "expects ImmOperand"); + auto &opndImm = static_cast(opnd2); + int64 shiftBits = opndImm.GetValue(); + return ((defMop == MOP_wlsrrri5 && shiftBits == (k32BitSize - 1)) || + (defMop == MOP_xlsrrri6 && shiftBits == (k64BitSize - 1))); + } + default: + return false; + } +} + +/* + * help function for cmpcset optimize + * if all define points of used opnd in insn has only one valid bit,return true. + * for cmp reg,#0(#1), that is checking for reg + */ +bool CmpCsetAArch64::CheckOpndDefPoints(Insn &checkInsn, int opndIdx) { + /* check current BB */ + const Insn *defInsn = DefInsnOfOperandInBB(checkInsn, checkInsn, opndIdx); + if (defInsn != nullptr) { + return OpndDefByOneValidBit(*defInsn); + } + /* check pred */ + for (auto predBB : checkInsn.GetBB()->GetPreds()) { + const Insn *tempInsn = nullptr; + if (predBB->GetLastInsn() != nullptr) { + tempInsn = DefInsnOfOperandInBB(*predBB->GetLastInsn(), checkInsn, opndIdx); + } + if (tempInsn == nullptr || !OpndDefByOneValidBit(*tempInsn)) { + return false; + } + } + return true; +} + +/* Check there is use point of rflag start from startInsn to current bb bottom */ +bool CmpCsetAArch64::FlagUsedLaterInCurBB(const BB &bb, Insn &startInsn) const { + if (&bb != startInsn.GetBB()) { + return false; + } + Insn *nextInsn = nullptr; + for (Insn *insn = &startInsn; insn != nullptr; insn = nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + /* + * For condition operand, such as NE, EQ and so on, the register number should be + * same with RFLAG, we only need check the property of use/def. + */ + if (!opnd.IsConditionCode()) { + continue; + } + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isUse = regProp->IsUse(); + if (isUse) { + return true; + } else { + ASSERT(regProp->IsDef(), "register should be redefined."); + return false; + } + } + } + return false; +} + +void CmpCsetAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator firstMop = insn.GetMachineOpcode(); + MOperator secondMop = nextInsn->GetMachineOpcode(); + if ((firstMop == MOP_wcmpri || firstMop == MOP_xcmpri) && + (secondMop == MOP_wcsetrc || secondMop == MOP_xcsetrc)) { + Operand &cmpFirstOpnd = insn.GetOperand(kInsnSecondOpnd); + /* get ImmOperand, must be 0 or 1 */ + Operand &cmpSecondOpnd = insn.GetOperand(kInsnThirdOpnd); + auto &cmpFlagReg = static_cast(insn.GetOperand(kInsnFirstOpnd)); + ASSERT(cmpSecondOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &cmpConst = static_cast(cmpSecondOpnd); + int64 cmpConstVal = cmpConst.GetValue(); + Operand &csetFirstOpnd = nextInsn->GetOperand(kInsnFirstOpnd); + if ((cmpConstVal != 0 && cmpConstVal != 1) || !CheckOpndDefPoints(insn, 1) || + (nextInsn->GetNextMachineInsn() != nullptr && + FlagUsedLaterInCurBB(bb, *nextInsn->GetNextMachineInsn())) || + FindRegLiveOut(cmpFlagReg, *insn.GetBB())) { + return; + } + + Insn *csetInsn = nextInsn; + nextInsn = nextInsn->GetNextMachineInsn(); + auto &cond = static_cast(csetInsn->GetOperand(kInsnSecondOpnd)); + if ((cmpConstVal == 0 && cond.GetCode() == CC_NE) || (cmpConstVal == 1 && cond.GetCode() == CC_EQ)) { + if (RegOperand::IsSameRegNO(cmpFirstOpnd, csetFirstOpnd)) { + bb.RemoveInsn(insn); + bb.RemoveInsn(*csetInsn); + } else { + if (cmpFirstOpnd.GetSize() != csetFirstOpnd.GetSize()) { + return; + } + MOperator mopCode = (cmpFirstOpnd.GetSize() == k64BitSize) ? MOP_xmovrr : MOP_wmovrr; + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, csetFirstOpnd, cmpFirstOpnd); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } else if ((cmpConstVal == 1 && cond.GetCode() == CC_NE) || (cmpConstVal == 0 && cond.GetCode() == CC_EQ)) { + if (cmpFirstOpnd.GetSize() != csetFirstOpnd.GetSize()) { + return; + } + MOperator mopCode = (cmpFirstOpnd.GetSize() == k64BitSize) ? MOP_xeorrri13 : MOP_weorrri12; + ImmOperand &one = static_cast(&cgFunc)->CreateImmOperand(1, k8BitSize, false); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(mopCode, csetFirstOpnd, cmpFirstOpnd, one); + bb.ReplaceInsn(insn, newInsn); + bb.RemoveInsn(*csetInsn); + } + } +} + +/* + * help function for DeleteMovAfterCbzOrCbnz + * input: + * bb: the bb to be checked out + * checkCbz: to check out BB end with cbz or cbnz, if cbz, input true + * opnd: for MOV reg, #0, opnd indicate reg + * return: + * according to cbz, return true if insn is cbz or cbnz and the first operand of cbz(cbnz) is same as input + * operand + */ +bool DeleteMovAfterCbzOrCbnzAArch64::PredBBCheck(BB &bb, bool checkCbz, const Operand &opnd) const { + if (bb.GetKind() != BB::kBBIf) { + return false; + } + + Insn *condBr = cgcfg->FindLastCondBrInsn(bb); + ASSERT(condBr != nullptr, "condBr must be found"); + if (!cgcfg->IsCompareAndBranchInsn(*condBr)) { + return false; + } + MOperator mOp = condBr->GetMachineOpcode(); + if (checkCbz && mOp != MOP_wcbz && mOp != MOP_xcbz) { + return false; + } + if (!checkCbz && mOp != MOP_xcbnz && mOp != MOP_wcbnz) { + return false; + } + return RegOperand::IsSameRegNO(condBr->GetOperand(kInsnFirstOpnd), opnd); +} + +bool DeleteMovAfterCbzOrCbnzAArch64::OpndDefByMovZero(const Insn &insn) const { + MOperator defMop = insn.GetMachineOpcode(); + switch (defMop) { + case MOP_xmovri32: + case MOP_xmovri64: { + Operand &defOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(defOpnd.IsIntImmediate(), "expects ImmOperand"); + auto &defConst = static_cast(defOpnd); + int64 defConstValue = defConst.GetValue(); + if (defConstValue == 0) { + return true; + } + return false; + } + case MOP_xmovrr: + case MOP_wmovrr: { + Operand &secondOpnd = insn.GetOperand(kInsnSecondOpnd); + ASSERT(secondOpnd.IsRegister(), "expects RegOperand here"); + auto ®Opnd = static_cast(secondOpnd); + return regOpnd.IsZeroRegister(); + } + default: + return false; + } +} + +/* check whether predefine insn of first operand of test_insn is exist in current BB */ +bool DeleteMovAfterCbzOrCbnzAArch64::NoPreDefine(Insn &testInsn) const { + Insn *nextInsn = nullptr; + for (Insn *insn = testInsn.GetBB()->GetFirstInsn(); insn != nullptr && insn != &testInsn; insn = nextInsn) { + nextInsn = insn->GetNextMachineInsn(); + if (!insn->IsMachineInstruction()) { + continue; + } + ASSERT(!insn->IsCall(), "CG internal error, call insn should not be at the middle of the BB."); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + if (!regProp->IsDef()) { + continue; + } + if (opnd.IsMemoryAccessOperand()) { + auto &memOpnd = static_cast(opnd); + RegOperand *base = memOpnd.GetBaseRegister(); + ASSERT(base != nullptr, "nullptr check"); + ASSERT(base->IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(*base, testInsn.GetOperand(kInsnFirstOpnd)) && + memOpnd.GetAddrMode() == AArch64MemOperand::kAddrModeBOi && + (memOpnd.IsPostIndexed() || memOpnd.IsPreIndexed())) { + return false; + } + } else { + ASSERT(opnd.IsRegister(), "expects RegOperand"); + if (RegOperand::IsSameRegNO(testInsn.GetOperand(kInsnFirstOpnd), opnd)) { + return false; + } + } + } + } + return true; +} +void DeleteMovAfterCbzOrCbnzAArch64::ProcessBBHandle(BB *processBB, const BB &bb, const Insn &insn) { + FOR_BB_INSNS_SAFE(processInsn, processBB, nextProcessInsn) { + nextProcessInsn = processInsn->GetNextMachineInsn(); + if (!processInsn->IsMachineInstruction()) { + continue; + } + /* register may be a caller save register */ + if (processInsn->IsCall()) { + break; + } + if (!OpndDefByMovZero(*processInsn) || !NoPreDefine(*processInsn) || + !RegOperand::IsSameRegNO(processInsn->GetOperand(kInsnFirstOpnd), insn.GetOperand(kInsnFirstOpnd))) { + continue; + } + bool toDoOpt = true; + MOperator condBrMop = insn.GetMachineOpcode(); + /* process elseBB, other preds must be cbz */ + if (condBrMop == MOP_wcbnz || condBrMop == MOP_xcbnz) { + /* check out all preds of process_bb */ + for (auto *processBBPred : processBB->GetPreds()) { + if (processBBPred == &bb) { + continue; + } + if (!PredBBCheck(*processBBPred, true, processInsn->GetOperand(kInsnFirstOpnd))) { + toDoOpt = false; + break; + } + } + } else { + /* process ifBB, other preds can be cbz or cbnz(one at most) */ + for (auto processBBPred : processBB->GetPreds()) { + if (processBBPred == &bb) { + continue; + } + /* for cbnz pred, there is one at most */ + if (!PredBBCheck(*processBBPred, processBBPred != processBB->GetPrev(), + processInsn->GetOperand(kInsnFirstOpnd))) { + toDoOpt = false; + break; + } + } + } + if (!toDoOpt) { + continue; + } + processBB->RemoveInsn(*processInsn); + } +} + +/* ldr wn, [x1, wn, SXTW] + * add x2, wn, x2 + */ +bool ComplexMemOperandAddAArch64::IsExpandBaseOpnd(const Insn &insn, Insn &prevInsn) { + MOperator prevMop = prevInsn.GetMachineOpcode(); + if (prevMop >= MOP_wldrsb && prevMop <= MOP_xldr && + prevInsn.GetOperand(kInsnFirstOpnd).Equals(insn.GetOperand(kInsnSecondOpnd))) { + return true; + } + return false; +} + +void ComplexMemOperandAddAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + Insn *prevInsn = insn.GetPreviousMachineInsn(); + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrr && thisMop != MOP_waddrrr) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + if (!IsMemOperandOptPattern(insn, *nextInsn)) { + return; + } + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + auto newBaseOpnd = static_cast(&insn.GetOperand(kInsnSecondOpnd)); + auto newIndexOpnd = static_cast(&insn.GetOperand(kInsnThirdOpnd)); + regno_t memBaseOpndRegNO = newBaseOpnd->GetRegisterNumber(); + if (newBaseOpnd->GetSize() <= k32BitSize && prevInsn != nullptr && IsExpandBaseOpnd(insn, *prevInsn)) { + newBaseOpnd = &aarch64CGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(memBaseOpndRegNO), + k64BitSize, kRegTyInt); + } + if (newBaseOpnd->GetSize() != k64BitSize) { + return; + } + if (newIndexOpnd->GetSize() <= k32BitSize) { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), newBaseOpnd, + newIndexOpnd, 0, false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } else { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), newBaseOpnd, + newIndexOpnd, nullptr, nullptr); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } + bb.RemoveInsn(insn); + } +} + +void DeleteMovAfterCbzOrCbnzAArch64::Run(BB &bb, Insn &insn) { + if (bb.GetKind() != BB::kBBIf) { + return; + } + if (&insn != cgcfg->FindLastCondBrInsn(bb)) { + return; + } + if (!cgcfg->IsCompareAndBranchInsn(insn)) { + return; + } + BB *processBB = nullptr; + if (bb.GetNext() == maplebe::CGCFG::GetTargetSuc(bb)) { + return; + } + + MOperator condBrMop = insn.GetMachineOpcode(); + if (condBrMop == MOP_wcbnz || condBrMop == MOP_xcbnz) { + processBB = bb.GetNext(); + } else { + processBB = maplebe::CGCFG::GetTargetSuc(bb); + } + + ASSERT(processBB != nullptr, "process_bb is null in DeleteMovAfterCbzOrCbnzAArch64::Run"); + ProcessBBHandle(processBB, bb, insn); +} + +MOperator OneHoleBranchesPreAArch64::FindNewMop(const BB &bb, const Insn &insn) const { + MOperator newOp = MOP_undef; + if (&insn != bb.GetLastInsn()) { + return newOp; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_wcbz && thisMop != MOP_wcbnz && thisMop != MOP_xcbz && thisMop != MOP_xcbnz) { + return newOp; + } + switch (thisMop) { + case MOP_wcbz: + newOp = MOP_wtbnz; + break; + case MOP_wcbnz: + newOp = MOP_wtbz; + break; + case MOP_xcbz: + newOp = MOP_xtbnz; + break; + case MOP_xcbnz: + newOp = MOP_xtbz; + break; + default: + CHECK_FATAL(false, "can not touch here"); + break; + } + return newOp; +} + +void OneHoleBranchesPreAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + MOperator newOp = FindNewMop(bb, insn); + if (newOp == MOP_undef) { + return; + } + Insn *prevInsn = insn.GetPreviousMachineInsn(); + LabelOperand &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + if (prevInsn != nullptr && prevInsn->GetMachineOpcode() == MOP_xuxtb32 && + (static_cast(prevInsn->GetOperand(kInsnSecondOpnd)).GetValidBitsNum() <= k8BitSize || + static_cast(prevInsn->GetOperand(kInsnFirstOpnd)).GetValidBitsNum() <= k8BitSize)) { + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + if (IfOperandIsLiveAfterInsn(static_cast(insn.GetOperand(kInsnFirstOpnd)), insn)) { + return; + } + insn.SetOperand(kInsnFirstOpnd, prevInsn->GetOperand(kInsnSecondOpnd)); + bb.RemoveInsn(*prevInsn); + } + if (prevInsn != nullptr && + (prevInsn->GetMachineOpcode() == MOP_xeorrri13 || prevInsn->GetMachineOpcode() == MOP_weorrri12) && + static_cast(prevInsn->GetOperand(kInsnThirdOpnd)).GetValue() == 1) { + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr) { + return; + } + if (prevPrevInsn->GetMachineOpcode() != MOP_xuxtb32 || + static_cast(prevPrevInsn->GetOperand(kInsnSecondOpnd)).GetValidBitsNum() != 1) { + return; + } + if (&(prevPrevInsn->GetOperand(kInsnFirstOpnd)) != &(prevInsn->GetOperand(kInsnSecondOpnd))) { + return; + } + ImmOperand &oneHoleOpnd = aarch64CGFunc->CreateImmOperand(0, k8BitSize, false); + auto ®Operand = static_cast(prevPrevInsn->GetOperand(kInsnSecondOpnd)); + bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(newOp, regOperand, oneHoleOpnd, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } +} + +bool LoadFloatPointAArch64::FindLoadFloatPoint(std::vector &optInsn, Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + optInsn.clear(); + if (mOp != MOP_xmovzri16) { + return false; + } + optInsn.emplace_back(&insn); + + Insn *insnMov2 = insn.GetNextMachineInsn(); + if (insnMov2 == nullptr) { + return false; + } + if (insnMov2->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.emplace_back(insnMov2); + + Insn *insnMov3 = insnMov2->GetNextMachineInsn(); + if (insnMov3 == nullptr) { + return false; + } + if (insnMov3->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.emplace_back(insnMov3); + + Insn *insnMov4 = insnMov3->GetNextMachineInsn(); + if (insnMov4 == nullptr) { + return false; + } + if (insnMov4->GetMachineOpcode() != MOP_xmovkri16) { + return false; + } + optInsn.emplace_back(insnMov4); + return true; +} + +bool LoadFloatPointAArch64::IsPatternMatch(const std::vector &optInsn) { + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + if ((static_cast(insn1->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn2->GetOperand(kInsnFirstOpnd)).GetRegisterNumber()) || + (static_cast(insn2->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn3->GetOperand(kInsnFirstOpnd)).GetRegisterNumber()) || + (static_cast(insn3->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != + static_cast(insn4->GetOperand(kInsnFirstOpnd)).GetRegisterNumber())) { + return false; + } + if ((static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != 0) || + (static_cast(insn2->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + k16BitSize) || + (static_cast(insn3->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + k32BitSize) || + (static_cast(insn4->GetOperand(kInsnThirdOpnd)).GetShiftAmount() != + (k16BitSize + k32BitSize))) { + return false; + } + return true; +} + +void LoadFloatPointAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + /* logical shift left values in three optimized pattern */ + std::vector optInsn; + if (FindLoadFloatPoint(optInsn, insn) && IsPatternMatch(optInsn)) { + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + auto &movConst1 = static_cast(insn1->GetOperand(kInsnSecondOpnd)); + auto &movConst2 = static_cast(insn2->GetOperand(kInsnSecondOpnd)); + auto &movConst3 = static_cast(insn3->GetOperand(kInsnSecondOpnd)); + auto &movConst4 = static_cast(insn4->GetOperand(kInsnSecondOpnd)); + /* movk/movz's immOpnd is 16-bit unsigned immediate */ + uint64 value = static_cast(movConst1.GetValue()) + + (static_cast(movConst2.GetValue()) << k16BitSize) + + (static_cast(movConst3.GetValue()) << k32BitSize) + + (static_cast(movConst4.GetValue()) << (k16BitSize + k32BitSize)); + + LabelIdx lableIdx = cgFunc.CreateLabel(); + LabelOperand &target = aarch64CGFunc->GetOrCreateLabelOperand(lableIdx); + cgFunc.InsertLabelMap(lableIdx, value); + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_xldli, insn4->GetOperand(kInsnFirstOpnd), + target); + bb.InsertInsnAfter(*insn4, newInsn); + bb.RemoveInsn(*insn1); + bb.RemoveInsn(*insn2); + bb.RemoveInsn(*insn3); + bb.RemoveInsn(*insn4); + } +} + +void ReplaceOrrToMovAArch64::Run(BB &bb, Insn &insn){ + Operand *opndOfOrr = nullptr; + ImmOperand *immOpnd = nullptr; + AArch64RegOperand *reg1 = nullptr; + AArch64RegOperand *reg2 = nullptr; + MOperator thisMop = insn.GetMachineOpcode(); + MOperator newMop = MOP_undef; + switch (thisMop) { + case MOP_wiorri12r: { /* opnd1 is Reg32 and opnd2 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnSecondOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnThirdOpnd)); + newMop = MOP_wmovrr; + break; + } + case MOP_wiorrri12: { /* opnd1 is reg32 and opnd3 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnThirdOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + newMop = MOP_wmovrr; + break; + } + case MOP_xiorri13r: { /* opnd1 is Reg64 and opnd2 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnSecondOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnThirdOpnd)); + newMop = MOP_xmovrr; + break; + } + case MOP_xiorrri13: { /* opnd1 is reg64 and opnd3 is immediate. */ + opndOfOrr = &(insn.GetOperand(kInsnThirdOpnd)); + reg2 = &static_cast(insn.GetOperand(kInsnSecondOpnd)); + newMop = MOP_xmovrr; + break; + } + default: + break; + } + ASSERT(opndOfOrr->IsIntImmediate(), "expects immediate operand"); + immOpnd = static_cast(opndOfOrr); + if (immOpnd->GetValue() == 0) { + reg1 = &static_cast(insn.GetOperand(kInsnFirstOpnd)); + bb.ReplaceInsn(insn, cgFunc.GetCG()->BuildInstruction(newMop, *reg1, *reg2)); + } +} + +void ReplaceCmpToCmnAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + MOperator thisMop = insn.GetMachineOpcode(); + MOperator nextMop = MOP_undef; + MOperator newMop = MOP_undef; + switch (thisMop) { + case MOP_xmovri32: { + nextMop = MOP_wcmprr; + newMop = MOP_wcmnri; + break; + } + case MOP_xmovri64: { + nextMop = MOP_xcmprr; + newMop = MOP_xcmnri; + break; + } + default: + break; + } + Operand *opnd1OfMov = &(insn.GetOperand(kInsnFirstOpnd)); + Operand *opnd2OfMov = &(insn.GetOperand(kInsnSecondOpnd)); + if (opnd2OfMov->IsIntImmediate()) { + ImmOperand *immOpnd = static_cast(opnd2OfMov); + int64 iVal = immOpnd->GetValue(); + if (kNegativeImmLowerLimit <= iVal && iVal < 0) { + Insn *nextInsn = insn.GetNextMachineInsn(); /* get the next insn to judge if it is a cmp instruction. */ + if (nextInsn != nullptr) { + if (nextInsn->GetMachineOpcode() == nextMop) { + Operand *opndCmp2 = &(nextInsn->GetOperand(kInsnSecondOpnd)); + Operand *opndCmp3 = &(nextInsn->GetOperand(kInsnThirdOpnd)); /* get the third operand of cmp */ + /* if the first operand of mov equals the third operand of cmp, match the pattern. */ + if (opnd1OfMov == opndCmp3) { + ImmOperand &newOpnd = aarch64CGFunc->CreateImmOperand(iVal * (-1), immOpnd->GetSize(), false); + Operand ®Flag = nextInsn->GetOperand(kInsnFirstOpnd); + bb.ReplaceInsn(*nextInsn, cgFunc.GetCG()->BuildInstruction(MOperator(newMop), regFlag, + *opndCmp2, newOpnd)); + } + } + } + } + } +} + +void RemoveIncRefAArch64::Run(BB &bb, Insn &insn) { + MOperator mOp = insn.GetMachineOpcode(); + if (mOp != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_IncDecRef_NaiveRCFast") { + return; + } + Insn *insnMov2 = insn.GetPreviousMachineInsn(); + if (insnMov2 == nullptr) { + return; + } + MOperator mopMov2 = insnMov2->GetMachineOpcode(); + if (mopMov2 != MOP_xmovrr) { + return; + } + Insn *insnMov1 = insnMov2->GetPreviousMachineInsn(); + if (insnMov1 == nullptr) { + return; + } + MOperator mopMov1 = insnMov1->GetMachineOpcode(); + if (mopMov1 != MOP_xmovrr) { + return; + } + if (static_cast(insnMov1->GetOperand(kInsnSecondOpnd)).GetRegisterNumber() != + static_cast(insnMov2->GetOperand(kInsnSecondOpnd)).GetRegisterNumber()) { + return; + } + auto &mov2Dest = static_cast(insnMov2->GetOperand(kInsnFirstOpnd)); + auto &mov1Dest = static_cast(insnMov1->GetOperand(kInsnFirstOpnd)); + if (mov1Dest.IsVirtualRegister() || mov2Dest.IsVirtualRegister() || mov1Dest.GetRegisterNumber() != R0 || + mov2Dest.GetRegisterNumber() != R1) { + return; + } + bb.RemoveInsn(insn); + bb.RemoveInsn(*insnMov2); + bb.RemoveInsn(*insnMov1); +} + +bool LongIntCompareWithZAArch64::FindLondIntCmpWithZ(std::vector &optInsn, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + optInsn.clear(); + /* first */ + if (thisMop != MOP_xcmpri) { + return false; + } + optInsn.emplace_back(&insn); + + /* second */ + Insn *nextInsn1 = insn.GetNextMachineInsn(); + if (nextInsn1 == nullptr) { + return false; + } + MOperator nextMop1 = nextInsn1->GetMachineOpcode(); + if (nextMop1 != MOP_wcsinvrrrc) { + return false; + } + optInsn.emplace_back(nextInsn1); + + /* third */ + Insn *nextInsn2 = nextInsn1->GetNextMachineInsn(); + if (nextInsn2 == nullptr) { + return false; + } + MOperator nextMop2 = nextInsn2->GetMachineOpcode(); + if (nextMop2 != MOP_wcsincrrrc) { + return false; + } + optInsn.emplace_back(nextInsn2); + + /* forth */ + Insn *nextInsn3 = nextInsn2->GetNextMachineInsn(); + if (nextInsn3 == nullptr) { + return false; + } + MOperator nextMop3 = nextInsn3->GetMachineOpcode(); + if (nextMop3 != MOP_wcmpri) { + return false; + } + optInsn.emplace_back(nextInsn3); + return true; +} + +bool LongIntCompareWithZAArch64::IsPatternMatch(const std::vector &optInsn) { + constexpr int insnLen = 4; + if (optInsn.size() != insnLen) { + return false; + } + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + ASSERT(insnNum == 3, " this specific case has three insns"); + if (insn2->GetOperand(kInsnSecondOpnd).IsZeroRegister() && insn2->GetOperand(kInsnThirdOpnd).IsZeroRegister() && + insn3->GetOperand(kInsnThirdOpnd).IsZeroRegister() && + &(insn3->GetOperand(kInsnFirstOpnd)) == &(insn3->GetOperand(kInsnSecondOpnd)) && + static_cast(insn2->GetOperand(kInsnFourthOpnd)).GetCode() == CC_GE && + static_cast(insn3->GetOperand(kInsnFourthOpnd)).GetCode() == CC_LE && + static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetValue() == 0 && + static_cast(insn4->GetOperand(kInsnThirdOpnd)).GetValue() == 0) { + return true; + } + return false; +} + +void LongIntCompareWithZAArch64::Run(BB &bb, Insn &insn) { + std::vector optInsn; + /* found pattern */ + if (FindLondIntCmpWithZ(optInsn, insn) && IsPatternMatch(optInsn)) { + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(optInsn[0]->GetMachineOpcode(), + optInsn[0]->GetOperand(kInsnFirstOpnd), + optInsn[0]->GetOperand(kInsnSecondOpnd), + optInsn[0]->GetOperand(kInsnThirdOpnd)); + /* use newInsn to replace the third optInsn */ + bb.ReplaceInsn(*optInsn[3], newInsn); + optInsn.clear(); + } +} + +void ComplexMemOperandAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xadrpl12) { + return; + } + + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldp) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstp))) { + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + ASSERT(memOpnd != nullptr, "memOpnd is null in AArch64Peep::ComplexMemOperandAArch64"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + return; + } + + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + if (memOpnd->GetBaseRegister() != ®Opnd) { + return; + } + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + + /* load store pairs cannot have relocation */ + if (nextInsn->IsLoadStorePair() && insn.GetOperand(kInsnThirdOpnd).IsStImmediate()) { + return; + } + + auto &stImmOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + AArch64OfstOperand &offOpnd = aarch64CGFunc->GetOrCreateOfstOpnd( + stImmOpnd.GetOffset() + memOpnd->GetOffsetImmediate()->GetOffsetValue(), k32BitSize); + if (cgFunc.GetMirModule().IsCModule()) { + Insn *prevInsn = insn.GetPrev(); + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_xadrp) { + return; + } else { + auto &prevStImmOpnd = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + prevStImmOpnd.SetOffset(offOpnd.GetValue()); + } + } + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeLo12Li, memOpnd->GetSize(), + &newBaseOpnd, nullptr, &offOpnd, stImmOpnd.GetSymbol()); + + nextInsn->SetMemOpnd(static_cast(&newMemOpnd)); + bb.RemoveInsn(insn); + CHECK_FATAL(!CGOptions::IsLazyBinding() || cgFunc.GetCG()->IsLibcore(), + "this pattern can't be found in this phase"); + } +} + +void ComplexMemOperandPreAddAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrr && thisMop != MOP_waddrrr) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + if (!IsMemOperandOptPattern(insn, *nextInsn)) { + return; + } + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &newIndexOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + if (newBaseOpnd.GetSize() != k64BitSize) { + return; + } + if (newIndexOpnd.GetSize() <= k32BitSize) { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, 0, false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } else { + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, nullptr, nullptr); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + } + bb.RemoveInsn(insn); + } +} + +bool ComplexMemOperandLSLAArch64::CheckShiftValid(const Insn &insn, BitShiftOperand &lsl) const { + /* check if shift amount is valid */ + uint32 lslAmount = lsl.GetShiftAmount(); + constexpr uint8 twoShiftBits = 2; + constexpr uint8 threeShiftBits = 3; + uint8 memSize = static_cast(insn).GetLoadStoreSize(); + if ((memSize == k32BitSize && (lsl.GetShiftAmount() != 0 && lslAmount != twoShiftBits)) || + (memSize == k64BitSize && (lsl.GetShiftAmount() != 0 && lslAmount != threeShiftBits))) { + return false; + } + if (memSize != (k8BitSize << lslAmount)) { + return false; + } + return true; +} + +void ComplexMemOperandLSLAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xaddrrrs) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop && + ((nextMop >= MOP_wldrsb && nextMop <= MOP_dldr) || (nextMop >= MOP_wstrb && nextMop <= MOP_dstr))) { + /* Check if base register of nextInsn and the dest operand of insn are identical. */ + AArch64MemOperand *memOpnd = static_cast(nextInsn->GetMemOpnd()); + ASSERT(memOpnd != nullptr, "null ptr check"); + + /* Only for AddrMode_B_OI addressing mode. */ + if (memOpnd->GetAddrMode() != AArch64MemOperand::kAddrModeBOi) { + return; + } + + /* Only for immediate is 0. */ + if (memOpnd->GetOffsetImmediate()->GetOffsetValue() != 0) { + return; + } + + /* Only for intact memory addressing. */ + if (!memOpnd->IsIntactIndexed()) { + return; + } + + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + + /* Check if dest operand of insn is idential with base register of nextInsn. */ + if (memOpnd->GetBaseRegister() != ®Opnd) { + return; + } + +#ifdef USE_32BIT_REF + if (nextInsn->IsAccessRefField() && nextInsn->GetOperand(kInsnFirstOpnd).GetSize() > k32BitSize) { + return; + } +#endif + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + auto &lsl = static_cast(insn.GetOperand(kInsnFourthOpnd)); + if (!CheckShiftValid(*nextInsn, lsl)) { + return; + } + auto &newBaseOpnd = static_cast(insn.GetOperand(kInsnSecondOpnd)); + auto &newIndexOpnd = static_cast(insn.GetOperand(kInsnThirdOpnd)); + AArch64MemOperand &newMemOpnd = + aarch64CGFunc->GetOrCreateMemOpnd(AArch64MemOperand::kAddrModeBOrX, memOpnd->GetSize(), &newBaseOpnd, + &newIndexOpnd, lsl.GetShiftAmount(), false); + nextInsn->SetOperand(kInsnSecondOpnd, newMemOpnd); + bb.RemoveInsn(insn); + } +} + + +void ComplexMemOperandLabelAArch64::Run(BB &bb, Insn &insn) { + Insn *nextInsn = insn.GetNextMachineInsn(); + if (nextInsn == nullptr) { + return; + } + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_xldli) { + return; + } + MOperator nextMop = nextInsn->GetMachineOpcode(); + if (nextMop != MOP_xvmovdr) { + return; + } + auto ®Opnd = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (regOpnd.GetRegisterNumber() != + static_cast(nextInsn->GetOperand(kInsnSecondOpnd)).GetRegisterNumber()) { + return; + } + + /* Check if x0 is used after ldr insn, and if it is in live-out. */ + if (IfOperandIsLiveAfterInsn(regOpnd, *nextInsn)) { + return; + } + + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_dldli, nextInsn->GetOperand(kInsnFirstOpnd), + insn.GetOperand(kInsnSecondOpnd)); + bb.InsertInsnAfter(*nextInsn, newInsn); + bb.RemoveInsn(insn); + bb.RemoveInsn(*nextInsn); +} + +/* + * mov R0, vreg1 / R0 -> objDesignateInsn + * add vreg2, vreg1, #imm -> fieldDesignateInsn + * mov R1, vreg2 -> fieldParamDefInsn + * mov R2, vreg3 -> fieldValueDefInsn + */ +bool WriteFieldCallAArch64::WriteFieldCallOptPatternMatch(const Insn &writeFieldCallInsn, WriteRefFieldParam ¶m, + std::vector ¶mDefInsns) { + Insn *fieldValueDefInsn = writeFieldCallInsn.GetPreviousMachineInsn(); + if (fieldValueDefInsn == nullptr || fieldValueDefInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &fieldValueDefInsnDestOpnd = fieldValueDefInsn->GetOperand(kInsnFirstOpnd); + auto &fieldValueDefInsnDestReg = static_cast(fieldValueDefInsnDestOpnd); + if (fieldValueDefInsnDestReg.GetRegisterNumber() != R2) { + return false; + } + paramDefInsns.emplace_back(fieldValueDefInsn); + param.fieldValue = &(fieldValueDefInsn->GetOperand(kInsnSecondOpnd)); + Insn *fieldParamDefInsn = fieldValueDefInsn->GetPreviousMachineInsn(); + if (fieldParamDefInsn == nullptr || fieldParamDefInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &fieldParamDestOpnd = fieldParamDefInsn->GetOperand(kInsnFirstOpnd); + auto &fieldParamDestReg = static_cast(fieldParamDestOpnd); + if (fieldParamDestReg.GetRegisterNumber() != R1) { + return false; + } + paramDefInsns.emplace_back(fieldParamDefInsn); + Insn *fieldDesignateInsn = fieldParamDefInsn->GetPreviousMachineInsn(); + if (fieldDesignateInsn == nullptr || fieldDesignateInsn->GetMachineOpcode() != MOP_xaddrri12) { + return false; + } + Operand &fieldParamDefSrcOpnd = fieldParamDefInsn->GetOperand(kInsnSecondOpnd); + Operand &fieldDesignateDestOpnd = fieldDesignateInsn->GetOperand(kInsnFirstOpnd); + if (!RegOperand::IsSameReg(fieldParamDefSrcOpnd, fieldDesignateDestOpnd)) { + return false; + } + Operand &fieldDesignateBaseOpnd = fieldDesignateInsn->GetOperand(kInsnSecondOpnd); + param.fieldBaseOpnd = &(static_cast(fieldDesignateBaseOpnd)); + auto &immOpnd = static_cast(fieldDesignateInsn->GetOperand(kInsnThirdOpnd)); + param.fieldOffset = immOpnd.GetValue(); + paramDefInsns.emplace_back(fieldDesignateInsn); + Insn *objDesignateInsn = fieldDesignateInsn->GetPreviousMachineInsn(); + if (objDesignateInsn == nullptr || objDesignateInsn->GetMachineOpcode() != MOP_xmovrr) { + return false; + } + Operand &objDesignateDestOpnd = objDesignateInsn->GetOperand(kInsnFirstOpnd); + auto &objDesignateDestReg = static_cast(objDesignateDestOpnd); + if (objDesignateDestReg.GetRegisterNumber() != R0) { + return false; + } + Operand &objDesignateSrcOpnd = objDesignateInsn->GetOperand(kInsnSecondOpnd); + if (RegOperand::IsSameReg(objDesignateDestOpnd, objDesignateSrcOpnd) || + !RegOperand::IsSameReg(objDesignateSrcOpnd, fieldDesignateBaseOpnd)) { + return false; + } + param.objOpnd = &(objDesignateInsn->GetOperand(kInsnSecondOpnd)); + paramDefInsns.emplace_back(objDesignateInsn); + return true; +} + +bool WriteFieldCallAArch64::IsWriteRefFieldCallInsn(const Insn &insn) { + if (!insn.IsCall() || insn.IsIndirectCall()) { + return false; + } + Operand *targetOpnd = insn.GetCallTargetOperand(); + ASSERT(targetOpnd != nullptr, "targetOpnd must not be nullptr"); + if (!targetOpnd->IsFuncNameOpnd()) { + return false; + } + FuncNameOperand *target = static_cast(targetOpnd); + const MIRSymbol *funcSt = target->GetFunctionSymbol(); + ASSERT(funcSt->GetSKind() == kStFunc, "the kind of funcSt is unreasonable"); + const std::string &funcName = funcSt->GetName(); + return funcName == "MCC_WriteRefField" || funcName == "MCC_WriteVolatileField"; +} + +static bool MayThrowBetweenInsn(const Insn &prevCallInsn, const Insn &currCallInsn) { + for (Insn *insn = prevCallInsn.GetNext(); insn != nullptr && insn != &currCallInsn; insn = insn->GetNext()) { + if (insn->MayThrow()) { + return true; + } + } + return false; +} + +void WriteFieldCallAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + std::vector paramDefInsns; + Insn *nextInsn = insn.GetNextMachineInsn(); + if (!IsWriteRefFieldCallInsn(insn)) { + return; + } + if (!hasWriteFieldCall) { + if (!WriteFieldCallOptPatternMatch(insn, firstCallParam, paramDefInsns)) { + return; + } + prevCallInsn = &insn; + hasWriteFieldCall = true; + return; + } + WriteRefFieldParam currentCallParam; + if (!WriteFieldCallOptPatternMatch(insn, currentCallParam, paramDefInsns)) { + return; + } + if (prevCallInsn == nullptr || MayThrowBetweenInsn(*prevCallInsn, insn)) { + return; + } + if (firstCallParam.objOpnd == nullptr || currentCallParam.objOpnd == nullptr || + currentCallParam.fieldBaseOpnd == nullptr) { + return; + } + if (!RegOperand::IsSameReg(*firstCallParam.objOpnd, *currentCallParam.objOpnd)) { + return; + } + MemOperand &addr = + aarch64CGFunc->CreateMemOpnd(*currentCallParam.fieldBaseOpnd, currentCallParam.fieldOffset, k64BitSize); + Insn &strInsn = cgFunc.GetCG()->BuildInstruction(MOP_xstr, *currentCallParam.fieldValue, addr); + strInsn.AppendComment("store reference field"); + strInsn.MarkAsAccessRefField(true); + bb.InsertInsnAfter(insn, strInsn); + for (Insn *paramDefInsn : paramDefInsns) { + bb.RemoveInsn(*paramDefInsn); + } + bb.RemoveInsn(insn); + prevCallInsn = &strInsn; + nextInsn = strInsn.GetNextMachineInsn(); +} + +void RemoveDecRefAArch64::Run(BB &bb, Insn &insn) { + if (insn.GetMachineOpcode() != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_DecRef_NaiveRCFast") { + return; + } + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if ((mopMov != MOP_xmovrr && mopMov != MOP_xmovri64) || + static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != R0) { + return; + } + Operand &srcOpndOfMov = insnMov->GetOperand(kInsnSecondOpnd); + if (!srcOpndOfMov.IsZeroRegister() && + !(srcOpndOfMov.IsImmediate() && static_cast(srcOpndOfMov).GetValue() == 0)) { + return; + } + bb.RemoveInsn(*insnMov); + bb.RemoveInsn(insn); + bb.SetKind(BB::kBBFallthru); +} + +/* + * Find 5 insn with certain OP code + * 1 : MOP_xaddrri12 + * 2 : MOP_waddrrr + * 3 : MOP_waddrri12 + * 4 : MOP_xsxtw64 + * 5 : MOP_xaddrrrs + */ +bool ComputationTreeAArch64::FindComputationTree(std::vector &optInsn, Insn &insn) { + MOperator thisMop = insn.GetMachineOpcode(); + optInsn.clear(); + /* first */ + if (thisMop != MOP_xaddrri12) { + return false; + } + optInsn.emplace_back(&insn); + /* second */ + Insn *nextInsn1 = insn.GetNextMachineInsn(); + if (nextInsn1 == nullptr) { + return false; + } + MOperator nextMop1 = nextInsn1->GetMachineOpcode(); + if (nextMop1 != MOP_waddrrr) { + return false; + } + optInsn.emplace_back(nextInsn1); + /* third */ + Insn *nextInsn2 = nextInsn1->GetNextMachineInsn(); + if (nextInsn2 == nullptr) { + return false; + } + MOperator nextMop2 = nextInsn2->GetMachineOpcode(); + if (nextMop2 != MOP_waddrri12) { + return false; + } + optInsn.emplace_back(nextInsn2); + /* forth */ + Insn *nextInsn3 = nextInsn2->GetNextMachineInsn(); + if (nextInsn3 == nullptr) { + return false; + } + MOperator nextMop3 = nextInsn3->GetMachineOpcode(); + if (nextMop3 != MOP_xsxtw64) { + return false; + } + optInsn.emplace_back(nextInsn3); + /* fifth */ + Insn *nextInsn4 = nextInsn3->GetNextMachineInsn(); + if (nextInsn4 == nullptr) { + return false; + } + MOperator nextMop4 = nextInsn4->GetMachineOpcode(); + if (nextMop4 != MOP_xaddrrrs) { + return false; + } + optInsn.emplace_back(nextInsn4); + return true; +} + +/* + * Make sure the insn in opt_insn match the pattern as following: + * add x1, x1, #16 + * add w2, w10, w10 + * add w2, w2, #1 + * sxtw x2, w2 + * add x1, x1, x2, LSL #3 + * bl MCC_LoadRefField_NaiveRCFast + */ +bool ComputationTreeAArch64::IsPatternMatch(const std::vector &optInsn) const { + /* this speific pattern has exactly four insns */ + if (optInsn.size() <= 4) { + ERR(kLncErr, "access opt_insn failed"); + return false; + } + int insnNum = 0; + Insn *insn1 = optInsn[insnNum]; + Insn *insn2 = optInsn[++insnNum]; + Insn *insn3 = optInsn[++insnNum]; + Insn *insn4 = optInsn[++insnNum]; + Insn *insn5 = optInsn[++insnNum]; + ASSERT(insnNum == 4, "match pattern failed in AArch64Peep::PatternIsMatch"); + Insn *insn6 = insn5->GetNext(); + if (insn6 != nullptr && insn6->GetMachineOpcode() != MOP_xbl && insn6->GetMachineOpcode() != MOP_tail_call_opt_xbl) { + return false; + } + CHECK_FATAL(insn6 != nullptr, "Insn null ptr check"); + auto &funcNameOpnd = static_cast(insn6->GetOperand(kInsnFirstOpnd)); + if (&(insn1->GetOperand(kInsnFirstOpnd)) == &(insn5->GetOperand(kInsnSecondOpnd)) && + &(insn2->GetOperand(kInsnSecondOpnd)) == &(insn2->GetOperand(kInsnThirdOpnd)) && + &(insn2->GetOperand(kInsnFirstOpnd)) == &(insn3->GetOperand(kInsnSecondOpnd)) && + &(insn3->GetOperand(kInsnFirstOpnd)) == &(insn4->GetOperand(kInsnSecondOpnd)) && + &(insn4->GetOperand(kInsnFirstOpnd)) == &(insn5->GetOperand(kInsnThirdOpnd)) && + funcNameOpnd.GetName() == "MCC_LoadRefField_NaiveRCFast" && + static_cast(insn1->GetOperand(kInsnThirdOpnd)).GetValue() == k16BitSize && + static_cast(insn3->GetOperand(kInsnThirdOpnd)).GetValue() == 1) { + return true; + } + return false; +} + +void ComputationTreeAArch64::Run(BB &bb, Insn &insn) { + std::vector optInsn; + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (!insn.IsMachineInstruction()) { + return; + } + /* found pattern */ + if (FindComputationTree(optInsn, insn) && IsPatternMatch(optInsn)) { + Insn *sxtwInsn = optInsn[4]; // The pattern must has four insns. + CHECK_FATAL(sxtwInsn->GetOperand(kInsnFourthOpnd).GetKind() == Operand::kOpdShift, "should not happened"); + auto &lsl = static_cast(sxtwInsn->GetOperand(kInsnFourthOpnd)); + Operand *sxtw = nullptr; + Operand *imm = nullptr; + int32 lslBitLenth = 3; + uint32 lslShiftAmountCaseA = 3; + uint32 lslShiftAmountCaseB = 2; + int32 oriAddEnd = 16; + if (lsl.GetShiftAmount() == lslShiftAmountCaseA) { + sxtw = &aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, + lslShiftAmountCaseA + 1, lslBitLenth); + imm = &aarch64CGFunc->CreateImmOperand(oriAddEnd + static_cast(1ULL << lslShiftAmountCaseA), + kMaxImmVal12Bits, true); + } else if (lsl.GetShiftAmount() == lslShiftAmountCaseB) { + sxtw = &aarch64CGFunc->CreateExtendShiftOperand(ExtendShiftOperand::kSXTW, + lslShiftAmountCaseB + 1, lslBitLenth); + imm = &aarch64CGFunc->CreateImmOperand(oriAddEnd + static_cast(1ULL << lslShiftAmountCaseB), + kMaxImmVal12Bits, true); + } + Insn &newInsn = cgFunc.GetCG()->BuildInstruction(MOP_xxwaddrrre, + sxtwInsn->GetOperand(kInsnFirstOpnd), + optInsn[0]->GetOperand(kInsnSecondOpnd), + optInsn[1]->GetOperand(kInsnSecondOpnd), *sxtw); + bb.ReplaceInsn(*sxtwInsn, newInsn); + Insn &newAdd = + cgFunc.GetCG()->BuildInstruction(MOP_xaddrri12, sxtwInsn->GetOperand(kInsnFirstOpnd), + sxtwInsn->GetOperand(kInsnFirstOpnd), *imm); + (void)bb.InsertInsnAfter(newInsn, newAdd); + optInsn.clear(); + } +} + +/* + * We optimize the following pattern in this function: + * and x1, x1, #imm (is n power of 2) + * cbz/cbnz x1, .label + * => + * and x1, x1, #imm (is n power of 2) + * tbnz/tbz x1, #n, .label + */ +void OneHoleBranchesAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (&insn != bb.GetLastInsn()) { + return; + } + /* check cbz/cbnz insn */ + MOperator thisMop = insn.GetMachineOpcode(); + if (thisMop != MOP_wcbz && thisMop != MOP_wcbnz && thisMop != MOP_xcbz && thisMop != MOP_xcbnz) { + return; + } + /* check and insn */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_wandrri12 && prevMop != MOP_xandrri13) { + return; + } + /* check opearnd of two insns */ + if (&(prevInsn->GetOperand(kInsnFirstOpnd)) != &(insn.GetOperand(kInsnFirstOpnd))) { + return; + } + auto &imm = static_cast(prevInsn->GetOperand(kInsnThirdOpnd)); + int n = logValueAtBase2(imm.GetValue()); + if (n < 0) { + return; + } + + /* replace insn */ + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + MOperator newOp = MOP_undef; + switch (thisMop) { + case MOP_wcbz: + newOp = MOP_wtbz; + break; + case MOP_wcbnz: + newOp = MOP_wtbnz; + break; + case MOP_xcbz: + newOp = MOP_xtbz; + break; + case MOP_xcbnz: + newOp = MOP_xtbnz; + break; + default: + CHECK_FATAL(false, "can not touch here"); + break; + } + ImmOperand &oneHoleOpnd = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction( + newOp, prevInsn->GetOperand(kInsnSecondOpnd), oneHoleOpnd, label)); + bb.RemoveInsn(insn); +} + +void ReplaceIncDecWithIncAArch64::Run(BB &bb, Insn &insn) { + if (insn.GetMachineOpcode() != MOP_xbl) { + return; + } + auto &target = static_cast(insn.GetOperand(kInsnFirstOpnd)); + if (target.GetName() != "MCC_IncDecRef_NaiveRCFast") { + return; + } + Insn *insnMov = insn.GetPreviousMachineInsn(); + if (insnMov == nullptr) { + return; + } + MOperator mopMov = insnMov->GetMachineOpcode(); + if (mopMov != MOP_xmovrr) { + return; + } + if (static_cast(insnMov->GetOperand(kInsnFirstOpnd)).GetRegisterNumber() != R1 || + !insnMov->GetOperand(kInsnSecondOpnd).IsZeroRegister()) { + return; + } + std::string funcName = "MCC_IncRef_NaiveRCFast"; + GStrIdx strIdx = GlobalTables::GetStrTable().GetStrIdxFromName(funcName); + MIRSymbol *st = GlobalTables::GetGsymTable().GetSymbolFromStrIdx(strIdx, true); + if (st == nullptr) { + LogInfo::MapleLogger() << "WARNING: Replace IncDec With Inc fail due to no MCC_IncRef_NaiveRCFast func\n"; + return; + } + bb.RemoveInsn(*insnMov); + target.SetFunctionSymbol(*st); +} + + +void AndCmpBranchesToTbzAArch64::Run(BB &bb, Insn &insn) { + AArch64CGFunc *aarch64CGFunc = static_cast(&cgFunc); + if (&insn != bb.GetLastInsn()) { + return; + } + MOperator mopB = insn.GetMachineOpcode(); + if (mopB != MOP_beq && mopB != MOP_bne) { + return; + } + auto &label = static_cast(insn.GetOperand(kInsnSecondOpnd)); + /* get the instruction before bne/beq, expects its type is cmp. */ + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (prevMop != MOP_wcmpri && prevMop != MOP_xcmpri) { + return; + } + + /* get the instruction before "cmp", expect its type is "and". */ + Insn *prevPrevInsn = prevInsn->GetPreviousMachineInsn(); + if (prevPrevInsn == nullptr) { + return; + } + MOperator mopAnd = prevPrevInsn->GetMachineOpcode(); + if (mopAnd != MOP_wandrri12 && mopAnd != MOP_xandrri13) { + return; + } + + /* + * check operand + * + * the real register of "cmp" and "and" must be the same. + */ + if (&(prevInsn->GetOperand(kInsnSecondOpnd)) != &(prevPrevInsn->GetOperand(kInsnFirstOpnd))) { + return; + } + + int opndIdx = 2; + if (!prevPrevInsn->GetOperand(opndIdx).IsIntImmediate() || !prevInsn->GetOperand(opndIdx).IsIntImmediate()) { + return; + } + auto &immAnd = static_cast(prevPrevInsn->GetOperand(opndIdx)); + auto &immCmp = static_cast(prevInsn->GetOperand(opndIdx)); + if (immCmp.GetValue() == 0) { + int n = logValueAtBase2(immAnd.GetValue()); + if (n < 0) { + return; + } + /* judge whether the flag_reg and "w0" is live later. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + auto &cmpReg = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB()) || FindRegLiveOut(cmpReg, *prevInsn->GetBB())) { + return; + } + MOperator mopNew = MOP_undef; + switch (mopB) { + case MOP_beq: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbz; + } + break; + case MOP_bne: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbnz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbnz; + } + break; + default: + CHECK_FATAL(false, "expects beq or bne insn"); + break; + } + ImmOperand &newImm = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(mopNew, + prevPrevInsn->GetOperand(kInsnSecondOpnd), newImm, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } else { + int n = logValueAtBase2(immAnd.GetValue()); + int m = logValueAtBase2(immCmp.GetValue()); + if (n < 0 || m < 0 || n != m) { + return; + } + /* judge whether the flag_reg and "w0" is live later. */ + auto &flagReg = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + auto &cmpReg = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + if (FindRegLiveOut(flagReg, *prevInsn->GetBB()) || FindRegLiveOut(cmpReg, *prevInsn->GetBB())) { + return; + } + MOperator mopNew = MOP_undef; + switch (mopB) { + case MOP_beq: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbnz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbnz; + } + break; + case MOP_bne: + if (mopAnd == MOP_wandrri12) { + mopNew = MOP_wtbz; + } else if (mopAnd == MOP_xandrri13) { + mopNew = MOP_xtbz; + } + break; + default: + CHECK_FATAL(false, "expects beq or bne insn"); + break; + } + ImmOperand &newImm = aarch64CGFunc->CreateImmOperand(n, k8BitSize, false); + (void)bb.InsertInsnAfter(insn, cgFunc.GetCG()->BuildInstruction(mopNew, + prevPrevInsn->GetOperand(kInsnSecondOpnd), newImm, label)); + bb.RemoveInsn(insn); + bb.RemoveInsn(*prevInsn); + bb.RemoveInsn(*prevPrevInsn); + } +} + +void RemoveSxtBeforeStrAArch64::Run(BB &bb , Insn &insn) { + MOperator mop = insn.GetMachineOpcode(); + Insn *prevInsn = insn.GetPreviousMachineInsn(); + if (prevInsn == nullptr) { + return; + } + MOperator prevMop = prevInsn->GetMachineOpcode(); + if (!(mop == MOP_wstrh && prevMop == MOP_xsxth32) && !(mop == MOP_wstrb && prevMop == MOP_xsxtb32)) { + return; + } + auto &prevOpnd0 = static_cast(prevInsn->GetOperand(kInsnFirstOpnd)); + if (IfOperandIsLiveAfterInsn(prevOpnd0, insn)) { + return; + } + auto &prevOpnd1 = static_cast(prevInsn->GetOperand(kInsnSecondOpnd)); + regno_t prevRegNO0 = prevOpnd0.GetRegisterNumber(); + regno_t prevRegNO1 = prevOpnd1.GetRegisterNumber(); + regno_t regNO0 = static_cast(insn.GetOperand(kInsnFirstOpnd)).GetRegisterNumber(); + if (prevRegNO0 != prevRegNO1) { + return; + } + if (prevRegNO0 == regNO0) { + bb.RemoveInsn(*prevInsn); + return; + } + insn.SetOperand(0, prevOpnd1); + bb.RemoveInsn(*prevInsn); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp index eb645dd9c25aeacc0173dcd1ded1cf6ebb6b8c8a..86cf27ad818fbd8fe6064920d9944f55dd58afe4 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp @@ -344,7 +344,9 @@ void AArch64GenProEpilog::GenStackGuard(BB &bb) { cgFunc.GetCurBB()->AppendInsn(insn); uint64 vArea = 0; - if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + if (cgFunc.GetMirModule().IsCModule() && + cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) && + cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); if (ml->GetSizeOfGRSaveArea() > 0) { vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); @@ -401,7 +403,9 @@ BB &AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb) { cgFunc.GetCurBB()->AppendInsn(insn); uint64 vArea = 0; - if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + if (cgFunc.GetMirModule().IsCModule() && + cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) && + cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); if (ml->GetSizeOfGRSaveArea() > 0) { vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); @@ -1115,9 +1119,6 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg ipoint = cgFunc.GetCurBB()->GetLastInsn(); cfiOffset = stackFrameSize; (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); - if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { - argsToStkPassSize -= (kDivide2 * k8ByteSize); - } ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); cfiOffset = GetOffsetFromCFA(); @@ -1256,8 +1257,10 @@ void AArch64GenProEpilog::GeneratePushRegs() { AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); int32 offset; if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { - offset = static_cast((memLayout->RealStackFrameSize() - - aarchCGFunc.SizeOfCalleeSaved()) - memLayout->GetSizeOfLocals()); + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - + memLayout->GetSizeOfLocals()); /* SizeOfArgsToStackPass not deducted since + AdjustmentStackPointer() is not called for lmbc */ } else { offset = static_cast((memLayout->RealStackFrameSize() - (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen))) - memLayout->SizeOfArgsToStackPass()); /* for FP/LR */ @@ -1267,7 +1270,9 @@ void AArch64GenProEpilog::GeneratePushRegs() { offset -= kAarch64StackPtrAlignmentInt; } - if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + if (cgFunc.GetMirModule().IsCModule() && + cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) && + cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { /* GR/VR save areas are above the callee save area */ AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); auto saveareasize = static_cast(RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + @@ -1312,6 +1317,7 @@ void AArch64GenProEpilog::GeneratePushRegs() { void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); + uint32 offset; if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { AArch64MemLayout *memlayout = static_cast(cgFunc.GetMemlayout()); uint8 size; @@ -1321,34 +1327,25 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { size = kSizeOfPtr; } uint32 dataSizeBits = size * kBitsPerByte; - uint32 offset; if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ - if ((memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) > 0) { - offset += size; /* End of area should be aligned. Hole between VR and GR area */ - } } else { - offset = (UINT32_MAX - memlayout->GetSizeOfGRSaveArea()) + 1; /* FP reference */ - if ((memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) > 0) { - offset -= size; - } + offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()) + + memlayout->SizeOfArgsToStackPass(); + } + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { + offset += size; /* End of area should be aligned. Hole between VR and GR area */ } - uint32 grSize = (UINT32_MAX - offset) + 1; - uint32 startRegno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); - ASSERT(startRegno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); - for (uint32 i = startRegno + static_cast(R0); i < static_cast(R8); i++) { + uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { uint32 tmpOffset = 0; if (CGOptions::IsBigEndian()) { if ((dataSizeBits >> 3) < 8) { tmpOffset += 8U - (dataSizeBits >> 3); } } - Operand *stackLoc; - if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { - stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); - } else { - stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); - } + Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); RegOperand ® = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); Insn &inst = @@ -1358,25 +1355,21 @@ void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { } if (!CGOptions::UseGeneralRegOnly()) { if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { - offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); + offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); /* SP reference */ } else { - offset = (UINT32_MAX - (memlayout->GetSizeOfVRSaveArea() + grSize)) + 1; + offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()) + + memlayout->SizeOfArgsToStackPass(); } - startRegno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); - ASSERT(startRegno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); - for (uint32 i = startRegno + static_cast(V0); i < static_cast(V8); i++) { + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { uint32 tmpOffset = 0; if (CGOptions::IsBigEndian()) { if ((dataSizeBits >> 3) < 16) { tmpOffset += 16U - (dataSizeBits >> 3); } } - Operand *stackLoc; - if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { - stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); - } else { - stackLoc = aarchCGFunc.GenLmbcFpMemOperand(static_cast(offset), size); - } + Operand *stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); RegOperand ® = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); Insn &inst = @@ -1684,13 +1677,13 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r * ldp/stp's imm should be within -512 and 504; * if ldp's imm > 504, we fall back to the ldp-add version */ - bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc); + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0 || isLmbc) { int32 lmbcOffset = 0; if (!isLmbc) { stackFrameSize -= argsToStkPassSize; } else { - lmbcOffset = argsToStkPassSize - (kDivide2 * k8ByteSizeInt); + lmbcOffset = argsToStkPassSize; } if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) { Operand *o2; @@ -1771,8 +1764,10 @@ void AArch64GenProEpilog::GeneratePopRegs() { AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); int32 offset; if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { - offset = static_cast((memLayout->RealStackFrameSize() - - aarchCGFunc.SizeOfCalleeSaved()) - memLayout->GetSizeOfLocals()); + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen)/* FP/LR */) - + memLayout->GetSizeOfLocals()); /* SizeOfArgsToStackPass not deducted since + AdjustmentStackPointer() is not called for lmbc */ } else { offset = static_cast((static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen))) - /* for FP/LR */ @@ -1783,7 +1778,8 @@ void AArch64GenProEpilog::GeneratePopRegs() { offset -= kAarch64StackPtrAlignmentInt; } - if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) && + cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { /* GR/VR save areas are above the callee save area */ AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); auto saveareasize = static_cast(RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp+ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp+ new file mode 100644 index 0000000000000000000000000000000000000000..cb6ff651c68f33048edc40daf398f597c5ce59f7 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp+ @@ -0,0 +1,2062 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "aarch64_proepilog.h" +#include "cg_option.h" +#include "cgfunc.h" + +namespace maplebe { +using namespace maple; + +namespace { +const std::set kFrameWhiteListFunc { +#include "framewhitelist.def" +}; + +bool IsFuncNeedFrame(const std::string &funcName) { + return kFrameWhiteListFunc.find(funcName) != kFrameWhiteListFunc.end(); +} +constexpr int32 kSoeChckOffset = 8192; + +enum RegsPushPop : uint8 { + kRegsPushOp, + kRegsPopOp +}; + +enum PushPopType : uint8 { + kPushPopSingle = 0, + kPushPopPair = 1 +}; + +MOperator pushPopOps[kRegsPopOp + 1][kRegTyFloat + 1][kPushPopPair + 1] = { + { /* push */ + { 0 /* undef */ }, + { /* kRegTyInt */ + MOP_xstr, /* single */ + MOP_xstp, /* pair */ + }, + { /* kRegTyFloat */ + MOP_dstr, /* single */ + MOP_dstp, /* pair */ + }, + }, + { /* pop */ + { 0 /* undef */ }, + { /* kRegTyInt */ + MOP_xldr, /* single */ + MOP_xldp, /* pair */ + }, + { /* kRegTyFloat */ + MOP_dldr, /* single */ + MOP_dldp, /* pair */ + }, + } +}; + +inline void AppendInstructionTo(Insn &insn, CGFunc &func) { + func.GetCurBB()->AppendInsn(insn); +} +} + +bool AArch64GenProEpilog::HasLoop() { + FOR_ALL_BB(bb, &cgFunc) { + if (bb->IsBackEdgeDest()) { + return true; + } + FOR_BB_INSNS_REV(insn, bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + if (insn->HasLoop()) { + return true; + } + } + } + return false; +} + +/* + * Remove redundant mov and mark optimizable bl/blr insn in the BB. + * Return value: true to call this modified block again. + */ +bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, std::set &callInsns, const BB &exitBB) { + if (bb.NumInsn() == 1 && + (bb.GetLastInsn()->GetMachineOpcode() != MOP_xbr && + bb.GetLastInsn()->GetMachineOpcode() != MOP_xblr && + bb.GetLastInsn()->GetMachineOpcode() != MOP_xbl && + bb.GetLastInsn()->GetMachineOpcode() != MOP_xuncond)) { + return false; + } + FOR_BB_INSNS_REV_SAFE(insn, &bb, prev_insn) { + if (!insn->IsMachineInstruction() || insn->IsPseudoInstruction()) { + continue; + } + MOperator insnMop = insn->GetMachineOpcode(); + switch (insnMop) { + case MOP_xldr: + case MOP_xldp: + case MOP_dldr: + case MOP_dldp: { + if (bb.GetKind() == BB::kBBReturn) { + RegOperand ® = static_cast(insn->GetOperand(0)); + if (AArch64Abi::IsCalleeSavedReg(static_cast(reg.GetRegisterNumber()))) { + break; /* inserted restore from calleeregs-placement, ignore */ + } + } + return false; + } + case MOP_wmovrr: + case MOP_xmovrr: { + CHECK_FATAL(insn->GetOperand(0).IsRegister(), "operand0 is not register"); + CHECK_FATAL(insn->GetOperand(1).IsRegister(), "operand1 is not register"); + auto ®1 = static_cast(insn->GetOperand(0)); + auto ®2 = static_cast(insn->GetOperand(1)); + + if (reg1.GetRegisterNumber() != R0 || reg2.GetRegisterNumber() != R0) { + return false; + } + + bb.RemoveInsn(*insn); + break; + } + case MOP_xblr: { + if (insn->GetOperand(0).IsRegister()) { + RegOperand ® = static_cast(insn->GetOperand(0)); + if (AArch64Abi::IsCalleeSavedReg(static_cast(reg.GetRegisterNumber()))) { + return false; /* can't tailcall, register will be overwritten by restore */ + } + } + /* flow through */ + } + [[clang::fallthrough]]; + case MOP_xbl: { + callInsns.insert(insn); + return false; + } + case MOP_xuncond: { + LabelOperand &bLab = static_cast(insn->GetOperand(0)); + if (exitBB.GetLabIdx() == bLab.GetLabelIndex()) { + break; + } + return false; + } + default: + return false; + } + } + + return true; +} + +/* Recursively invoke this function for all predecessors of exitBB */ +void AArch64GenProEpilog::TailCallBBOpt(BB &bb, std::set &callInsns, BB &exitBB) { + /* callsite also in the return block as in "if () return; else foo();" + call in the exit block */ + if (!bb.IsEmpty() && !OptimizeTailBB(bb, callInsns, exitBB)) { + return; + } + + for (auto tmpBB : bb.GetPreds()) { + if (tmpBB->GetSuccs().size() != 1 || !tmpBB->GetEhSuccs().empty() || + (tmpBB->GetKind() != BB::kBBFallthru && tmpBB->GetKind() != BB::kBBGoto)) { + continue; + } + + if (OptimizeTailBB(*tmpBB, callInsns, exitBB)) { + TailCallBBOpt(*tmpBB, callInsns, exitBB); + } + } +} + +/* + * If a function without callee-saved register, and end with a function call, + * then transfer bl/blr to b/br. + * Return value: true if function do not need Prologue/Epilogue. false otherwise. + */ +bool AArch64GenProEpilog::TailCallOpt() { + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + return false; + } + /* Count how many call insns in the whole function. */ + uint32 nCount = 0; + bool hasGetStackClass = false; + + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS(insn, bb) { + if (insn->IsCall()) { + if (insn->GetMachineOpcode() == MOP_xbl) { + auto &target = static_cast(insn->GetOperand(0)); + if (IsFuncNeedFrame(target.GetName())) { + hasGetStackClass = true; + } + } + ++nCount; + } + } + } + if ((nCount > 0 && cgFunc.GetFunction().GetAttr(FUNCATTR_interface)) || hasGetStackClass) { + return false; + } + + if (nCount == 0) { + // no bl instr in any bb + return true; + } + + size_t exitBBSize = cgFunc.GetExitBBsVec().size(); + /* For now to reduce complexity */ + + BB *exitBB = nullptr; + if (exitBBSize == 0) { + if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() && + cgFunc.GetLastBB()->GetPrev()->GetPrev() != nullptr) { + exitBB = cgFunc.GetLastBB()->GetPrev()->GetPrev(); + } else { + exitBB = cgFunc.GetLastBB()->GetPrev(); + } + } else { + exitBB = cgFunc.GetExitBBsVec().front(); + } + uint32 i = 1; + size_t optCount = 0; + do { + std::set callInsns; + TailCallBBOpt(*exitBB, callInsns, *exitBB); + if (callInsns.size() != 0) { + optCount += callInsns.size(); + exitBB2CallSitesMap[exitBB] = callInsns; + } + if (i < exitBBSize) { + exitBB = cgFunc.GetExitBBsVec()[i]; + ++i; + } else { + break; + } + } while(1); + + /* regular calls exist in function */ + if (nCount != optCount) { + return false; + } + return true; +} + +static bool IsAddOrSubOp(MOperator mOp) { + switch (mOp) { + case MOP_xaddrrr: + case MOP_xaddrrrs: + case MOP_xxwaddrrre: + case MOP_xaddrri24: + case MOP_xaddrri12: + case MOP_xsubrrr: + case MOP_xsubrrrs: + case MOP_xxwsubrrre: + case MOP_xsubrri12: + return true; + default: + return false; + } +} + +/* tailcallopt cannot be used if stack address of this function is taken and passed, + not checking the passing for now, just taken */ +static bool IsStackAddrTaken(CGFunc &cgFunc) { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS_REV(insn, bb) { + if (IsAddOrSubOp(insn->GetMachineOpcode())) { + for (uint32 i = 0; i < insn->GetOperandSize(); i++) { + if (insn->GetOperand(i).IsRegister()) { + RegOperand ® = static_cast(insn->GetOperand(i)); + if (reg.GetRegisterNumber() == R29 || reg.GetRegisterNumber() == R31 || reg.GetRegisterNumber() == RSP) { + return true; + } + } + } + } + } + } + return false; +} + +bool AArch64GenProEpilog::NeedProEpilog() { + if (cgFunc.GetMirModule().GetSrcLang() != kSrcLangC) { + return true; + } else if (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) { + return true; + } + bool funcHasCalls = false; + if (cgFunc.GetCG()->DoTailCall() && !IsStackAddrTaken(cgFunc)) { + funcHasCalls = !TailCallOpt(); // return value == "no call instr/only or 1 tailcall" + } else { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS_REV(insn, bb) { + if (insn->IsCall()) { + funcHasCalls = true; + } + } + } + } + auto &aarchCGFunc = static_cast(cgFunc); + const MapleVector ®sToRestore = (!CGOptions::DoRegSavesOpt()) ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); + size_t calleeSavedRegSize = kTwoRegister; + CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?"); + if (funcHasCalls || regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() || + static_cast(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 || + cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) { + return true; + } + return false; +} + +void AArch64GenProEpilog::GenStackGuard(BB &bb) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (currCG->AddStackGuard()) { + BB *formerCurBB = cgFunc.GetCurBB(); + aarchCGFunc.GetDummyBB()->ClearInsns(); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(true); + cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB()); + + MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard"))); + StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0); + RegOperand &stAddrOpnd = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, kRegTyInt); + aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd); + + MemOperand *guardMemOp = + aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOi, kSizeOfPtr * kBitsPerByte, + stAddrOpnd, nullptr, &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), stkGuardSym); + MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64); + Insn &insn = currCG->BuildInstruction(mOp, stAddrOpnd, *guardMemOp); + insn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(insn); + + uint64 vArea = 0; + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + if (ml->GetSizeOfGRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (ml->GetSizeOfVRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } + } + + int32 stkSize = static_cast(static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + if (useFP) { + stkSize -= static_cast(static_cast(cgFunc.GetMemlayout())->SizeOfArgsToStackPass()); + } + int32 memSize = stkSize - kOffset8MemPos - static_cast(vArea); + MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, kSizeOfPtr * kBitsPerByte); + if (downStk->GetMemVaryType() == kNotVary && + aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) { + downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10); + } + mOp = aarchCGFunc.PickStInsn(kSizeOfPtr * kBitsPerByte, PTY_u64); + Insn &tmpInsn = currCG->BuildInstruction(mOp, stAddrOpnd, *downStk); + tmpInsn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(tmpInsn); + + bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB()); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(false); + cgFunc.SetCurBB(*formerCurBB); + } +} + +BB &AArch64GenProEpilog::GenStackGuardCheckInsn(BB &bb) { + CG *currCG = cgFunc.GetCG(); + if (!currCG->AddStackGuard()) { + return bb; + } + + BB *formerCurBB = cgFunc.GetCurBB(); + cgFunc.GetDummyBB()->ClearInsns(); + cgFunc.SetCurBB(*(cgFunc.GetDummyBB())); + auto &aarchCGFunc = static_cast(cgFunc); + + const MIRSymbol *stkGuardSym = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_guard"))); + StImmOperand &stOpnd = aarchCGFunc.CreateStImmOperand(*stkGuardSym, 0, 0); + RegOperand &stAddrOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, kSizeOfPtr * kBitsPerByte, + kRegTyInt); + aarchCGFunc.SelectAddrof(stAddrOpnd, stOpnd); + + MemOperand *guardMemOp = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOi, + kSizeOfPtr * kBitsPerByte, stAddrOpnd, nullptr, + &aarchCGFunc.GetOrCreateOfstOpnd(0, k32BitSize), + stkGuardSym); + MOperator mOp = aarchCGFunc.PickLdInsn(k64BitSize, PTY_u64); + Insn &insn = currCG->BuildInstruction(mOp, stAddrOpnd, *guardMemOp); + insn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(insn); + + uint64 vArea = 0; + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + if (ml->GetSizeOfGRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfGRSaveArea(), kAarch64StackPtrAlignment); + } + if (ml->GetSizeOfVRSaveArea() > 0) { + vArea += RoundUp(ml->GetSizeOfVRSaveArea(), kAarch64StackPtrAlignment); + } + } + + RegOperand &checkOp = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R10, kSizeOfPtr * kBitsPerByte, kRegTyInt); + int32 stkSize = static_cast(static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + if (useFP) { + stkSize -= static_cast(static_cast(cgFunc.GetMemlayout())->SizeOfArgsToStackPass()); + } + uint32 memSize = stkSize - kOffset8MemPos - static_cast(vArea); + MemOperand *downStk = aarchCGFunc.CreateStackMemOpnd(stackBaseReg, memSize, kSizeOfPtr * kBitsPerByte); + if (downStk->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*downStk, k64BitSize)) { + downStk = &aarchCGFunc.SplitOffsetWithAddInstruction(*downStk, k64BitSize, R10); + } + mOp = aarchCGFunc.PickLdInsn(kSizeOfPtr * kBitsPerByte, PTY_u64); + Insn &newInsn = currCG->BuildInstruction(mOp, checkOp, *downStk); + newInsn.SetDoNotRemove(true); + cgFunc.GetCurBB()->AppendInsn(newInsn); + + cgFunc.SelectBxor(stAddrOpnd, stAddrOpnd, checkOp, PTY_u64); + LabelIdx failLable = aarchCGFunc.CreateLabel(); + aarchCGFunc.SelectCondGoto(aarchCGFunc.GetOrCreateLabelOperand(failLable), OP_brtrue, OP_eq, + stAddrOpnd, aarchCGFunc.CreateImmOperand(0, k64BitSize, false), PTY_u64, false); + + MIRSymbol *failFunc = GlobalTables::GetGsymTable().GetSymbolFromStrIdx( + GlobalTables::GetStrTable().GetStrIdxFromName(std::string("__stack_chk_fail"))); + ListOperand *srcOpnds = aarchCGFunc.CreateListOpnd(*cgFunc.GetFuncScopeAllocator()); + Insn &callInsn = aarchCGFunc.AppendCall(*failFunc, *srcOpnds); + callInsn.SetDoNotRemove(true); + + bb.AppendBBInsns(*(cgFunc.GetCurBB())); + + BB *newBB = cgFunc.CreateNewBB(failLable, bb.IsUnreachable(), bb.GetKind(), bb.GetFrequency()); + bb.AppendBB(*newBB); + if (cgFunc.GetLastBB() == &bb) { + cgFunc.SetLastBB(*newBB); + } + bb.SetKind(BB::kBBFallthru); + bb.PushBackSuccs(*newBB); + newBB->PushBackPreds(bb); + + cgFunc.SetCurBB(*formerCurBB); + return *newBB; +} + +bool AArch64GenProEpilog::InsertOpndRegs(Operand &op, std::set &vecRegs) { + Operand *opnd = &op; + CHECK_FATAL(opnd != nullptr, "opnd is nullptr in InsertRegs"); + if (opnd->IsList()) { + MapleList pregList = static_cast(opnd)->GetOperands(); + for (auto *preg : pregList) { + if (preg != nullptr) { + vecRegs.insert(preg->GetRegisterNumber()); + } + } + } + if (opnd->IsMemoryAccessOperand()) { /* the registers of kOpdMem are complex to be detected */ + RegOperand *baseOpnd = static_cast(opnd)->GetBaseRegister(); + if (baseOpnd != nullptr) { + vecRegs.insert(baseOpnd->GetRegisterNumber()); + } + RegOperand *indexOpnd = static_cast(opnd)->GetIndexRegister(); + if (indexOpnd != nullptr) { + vecRegs.insert(indexOpnd->GetRegisterNumber()); + } + } + if (opnd->IsRegister()) { + RegOperand *preg = static_cast(opnd); + if (preg != nullptr) { + vecRegs.insert(preg->GetRegisterNumber()); + } + } + return true; +} + +bool AArch64GenProEpilog::InsertInsnRegs(Insn &insn, bool insertSource, std::set &vecSourceRegs, + bool insertTarget, std::set &vecTargetRegs){ + Insn *curInsn = &insn; + for (uint32 o = 0; o < curInsn->GetOperandSize(); ++o) { + Operand &opnd = curInsn->GetOperand(o); + if (insertSource == true && curInsn->OpndIsUse(o)) { + InsertOpndRegs(opnd, vecSourceRegs); + } + if (insertTarget == true && curInsn->OpndIsDef(o)) { + InsertOpndRegs(opnd, vecTargetRegs); + } + } + return true; +} + +bool AArch64GenProEpilog::FindRegs(Operand &op, std::set &vecRegs) { + Operand *opnd = &op; + if (opnd == nullptr || vecRegs.empty()) { + return false; + } + if (opnd->IsList()) { + MapleList pregList = static_cast(opnd)->GetOperands(); + for (auto *preg : pregList) { + if (preg->GetRegisterNumber() == R29 || + vecRegs.find(preg->GetRegisterNumber()) != vecRegs.end()) { + return true; /* the opReg will overwrite or reread the vecRegs */ + } + } + } + if (opnd->IsMemoryAccessOperand()) { /* the registers of kOpdMem are complex to be detected */ + RegOperand *baseOpnd = static_cast(opnd)->GetBaseRegister(); + RegOperand *indexOpnd = static_cast(opnd)->GetIndexRegister(); + if ((baseOpnd != nullptr && baseOpnd->GetRegisterNumber() == R29) || + (indexOpnd != nullptr && indexOpnd->GetRegisterNumber() == R29)) { + return true; /* Avoid modifying data on the stack */ + } + if ((baseOpnd != nullptr && vecRegs.find(baseOpnd->GetRegisterNumber()) != vecRegs.end()) || + (indexOpnd != nullptr && vecRegs.find(indexOpnd->GetRegisterNumber()) != vecRegs.end())) { + return true; + } + } + if (opnd->IsRegister()) { + RegOperand *regOpnd = static_cast(opnd); + if (regOpnd->GetRegisterNumber() == R29 || + vecRegs.find(regOpnd->GetRegisterNumber()) != vecRegs.end()) { + return true; /* dst is a target register, result_dst is a target register */ + } + } + return false; +} + +bool AArch64GenProEpilog::BackwardFindDependency(BB &ifbb, std::set &vecReturnSourceRegs, + std::list &existingInsns, + std::list &moveInsns) { + /* + * Pattern match,(*) instruction are moved down below branch. + * ******************** + * curInsn: + * in predBB + * in ifBB + * in returnBB + * ********************* + * list: the insns can be moved into the coldBB + * (1) the instruction is neither a branch nor a call, except for the ifbb.GetLastInsn() + * As long as a branch insn exists, + * the fast path finding fails and the return value is false, + * but the code sinking can be continued. + * (2) the predBB is not a ifBB, + * As long as a ifBB in preds exists, + * the code sinking fails, + * but fast path finding can be continued. + * (3) the targetRegs of insns in existingInsns can neither be reread or overwrite + * (4) the sourceRegs of insns in existingInsns can not be overwrite + * (5) the sourceRegs of insns in returnBB can neither be reread or overwrite + * (6) the targetRegs and sourceRegs cannot be R29 R30, to protect the stack + * (7) modified the reg when: + * -------------- + * curInsn: move R2,R1 + * : s s s + * s s s + * -> s s s + * ------------ + * (a) all targets cannot be R1, all sources cannot be R1 + * all targets cannot be R2, all return sources cannot be R2 + * (b) the targetRegs and sourceRegs cannot be list or MemoryAccess + * (c) no ifBB in preds, no branch insns + * (d) the bits of source-R2 must be equal to the R2 + * (e) replace the R2 with R1 + */ + BB *pred = &ifbb; + std::set vecTargetRegs; /* the targrtRegs of existingInsns */ + std::set vecSourceRegs; /* the soureRegs of existingInsns */ + bool ifPred = false; /* Indicates whether a ifBB in pred exists */ + bool bl = false; /* Indicates whether a branch insn exists */ + do { + FOR_BB_INSNS_REV(insn, pred) { + /* code sinking fails, the insns must be retained in the ifBB */ + if (ifPred || insn == ifbb.GetLastInsn() || insn->IsBranch() || insn->IsCall() || + insn->IsStore() || insn->IsStorePair()) { + /* fast path finding fails */ + if (insn != ifbb.GetLastInsn() && (insn->IsBranch() || insn->IsCall() || + insn->IsStore() || insn->IsStorePair())) { + bl = true; + } + InsertInsnRegs(*insn, true, vecSourceRegs, true, vecTargetRegs); + existingInsns.push_back(insn); + continue; + } + /* code sinking */ + if (insn->IsImmaterialInsn()) { + moveInsns.push_back(insn); + continue; + } + /* code sinking */ + if (!insn->IsMachineInstruction()) { + moveInsns.push_back(insn); + continue; + } + bool allow = true; /* whether allow this insn move into the codeBB */ + for (uint32 o = 0; allow && o < insn->GetOperandSize(); ++o) { + Operand &opnd = insn->GetOperand(o); + if (insn->OpndIsDef(o)) { + allow = allow & !FindRegs(opnd, vecTargetRegs); + allow = allow & !FindRegs(opnd, vecSourceRegs); + allow = allow & !FindRegs(opnd, vecReturnSourceRegs); + } + if (insn->OpndIsUse(o)) { + allow = allow & !FindRegs(opnd, vecTargetRegs); + } + } + /* if a result_dst not allowed, this insn can be allowed on the condition of mov Rx,R0/R1, + * and tje existing insns cannot be blr + * RLR 31, RFP 32, RSP 33, RZR 34 */ + if (!ifPred && !bl && !allow && (insn->GetMachineOpcode() == MOP_xmovrr || + insn->GetMachineOpcode() == MOP_wmovrr)) { + Operand *resultOpnd = &(insn->GetOperand(0)); + Operand *srcOpnd = &(insn->GetOperand(1)); + regno_t resultNO = static_cast(resultOpnd)->GetRegisterNumber(); + regno_t srcNO = static_cast(srcOpnd)->GetRegisterNumber(); + if (!FindRegs(*resultOpnd, vecTargetRegs) && !FindRegs(*srcOpnd, vecTargetRegs) && + !FindRegs(*srcOpnd, vecSourceRegs) && !FindRegs(*srcOpnd, vecReturnSourceRegs) && + (srcNO < RLR || srcNO > RZR)) { + allow = true; /* allow on the conditional mov Rx,Rxx */ + for (auto *exit : existingInsns) { + /* the registers of kOpdMem are complex to be detected */ + for (uint32 o = 0; o < exit->GetOperandSize(); ++o) { + if (!exit->OpndIsUse(o)) { + continue; + } + Operand *opd = &(exit->GetOperand(o)); + if (opd->IsList() || opd->IsMemoryAccessOperand()) { + allow = false; + break; + } + /* Distinguish between 32-bit regs and 64-bit regs */ + if (opd->IsRegister() && + static_cast(opd)->GetRegisterNumber() == resultNO && + opd != resultOpnd) { + allow = false; + break; + } + } + } + } + /* replace the R2 with R1 */ + if (allow) { + for (auto *exit : existingInsns) { + for (uint32 o = 0; o < exit->GetOperandSize(); ++o) { + if (!exit->OpndIsUse(o)) { + continue; + } + Operand *opd = &(exit->GetOperand(o)); + if (opd->IsRegister() && (opd == resultOpnd)) { + exit->SetOperand(o, *srcOpnd); + } + } + } + } + } + if (!allow) { /* all result_dsts are not target register */ + /* code sinking fails */ + InsertInsnRegs(*insn, true, vecSourceRegs, true, vecTargetRegs); + existingInsns.push_back(insn); + } else { + moveInsns.push_back(insn); + } + } + if (pred->GetPreds().empty()) { + break; + } + if (!ifPred) { + for (auto *tmPred : pred->GetPreds()) { + pred = tmPred; + /* try to find the BB without branch */ + if (tmPred->GetKind() == BB::kBBGoto || tmPred->GetKind() == BB::kBBFallthru) { + ifPred = false; + break; + } else { + ifPred = true; + } + } + } + } while (pred != nullptr); + for (std::set::iterator it = vecTargetRegs.begin(); it != vecTargetRegs.end(); ++it) { + if (AArch64Abi::IsCalleeSavedReg(static_cast(*it))) { /* flag register */ + return false; + } + } + return !bl; +} + +BB *AArch64GenProEpilog::IsolateFastPath(BB &bb) { + /* + * Detect "if (cond) return" fast path, and move extra instructions + * to the slow path. + * Must match the following block structure. BB1 can be a series of + * single-pred/single-succ blocks. + * BB1 ops1 cmp-br to BB3 BB1 cmp-br to BB3 + * BB2 ops2 br to retBB ==> BB2 ret + * BB3 slow path BB3 ops1 ops2 + * if the detect is successful, BB3 will be used to generate prolog stuff. + */ + if (bb.GetPrev() != nullptr) { + return nullptr; + } + BB *ifBB = nullptr; + BB *returnBB = nullptr; + BB *coldBB = nullptr; + CG *currCG = cgFunc.GetCG(); + { + BB *curBB = &bb; + /* Look for straight line code */ + while (1) { + if (!curBB->GetEhSuccs().empty()) { + return nullptr; + } + if (curBB->GetSuccs().size() == 1) { + if (curBB->HasCall()) { + return nullptr; + } + BB *succ = curBB->GetSuccs().front(); + if (succ->GetPreds().size() != 1 || !succ->GetEhPreds().empty()) { + return nullptr; + } + curBB = succ; + } else if (curBB->GetKind() == BB::kBBIf) { + ifBB = curBB; + break; + } else { + return nullptr; + } + } + } + /* targets of if bb can only be reached by if bb */ + { + CHECK_FATAL(!ifBB->GetSuccs().empty(), "null succs check!"); + BB *first = ifBB->GetSuccs().front(); + BB *second = ifBB->GetSuccs().back(); + if (first->GetPreds().size() != 1 || !first->GetEhPreds().empty()) { + return nullptr; + } + if (second->GetPreds().size() != 1 || !second->GetEhPreds().empty()) { + return nullptr; + } + /* One target of the if bb jumps to a return bb */ + if (first->GetKind() != BB::kBBGoto && first->GetKind() != BB::kBBFallthru) { + return nullptr; + } + if (first->GetSuccs().size() != 1) { + return nullptr; + } + if (first->GetSuccs().front()->GetKind() != BB::kBBReturn) { + return nullptr; + } + if (first->GetSuccs().front()->GetPreds().size() != 1) { + return nullptr; + } + if (first->GetSuccs().front()->NumInsn() > 2) { /* avoid a insn is used to debug */ + return nullptr; + } + if (second->GetSuccs().empty()) { + return nullptr; + } + returnBB = first; + coldBB = second; + } + /* Search backward looking for dependencies for the cond branch */ + std::list existingInsns; /* the insns must be retained in the ifBB (and the return BB) */ + std::list moveInsns; /* instructions to be moved to coldbb */ + /* + * The control flow matches at this point. + * Make sure the SourceRegs of the insns in returnBB (vecReturnSourceReg) cannot be overwrite. + * the regs in insns have three forms: list, MemoryAccess, or Register. + */ + CHECK_FATAL(returnBB != nullptr, "null ptr check"); + std::set vecReturnSourceRegs; + FOR_BB_INSNS_REV(insn, returnBB) { + if (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair()) { + return nullptr; + } + InsertInsnRegs(*insn, true, vecReturnSourceRegs, false, vecReturnSourceRegs); + existingInsns.push_back(insn); + } + FOR_BB_INSNS_REV(insn, returnBB->GetSuccs().front()) { + if (insn->IsBranch() || insn->IsCall() || insn->IsStore() || insn->IsStorePair()) { + return nullptr; + } + InsertInsnRegs(*insn, true, vecReturnSourceRegs, false, vecReturnSourceRegs); + existingInsns.push_back(insn); + } + /* + * The mv is the 1st move using the parameter register leading to the branch + * The ld is the load using the parameter register indirectly for the branch + * The depMv is the move which preserves the result of the load but might + * destroy a parameter register which will be moved below the branch. + */ + bool fast = BackwardFindDependency(*ifBB, vecReturnSourceRegs, existingInsns, moveInsns); + /* move extra instructions to the slow path */ + if (!fast) { + return nullptr; + } + for (auto in : moveInsns) { + in->GetBB()->RemoveInsn(*in); + CHECK_FATAL(coldBB != nullptr, "null ptr check"); + static_cast(coldBB->InsertInsnBegin(*in)); + } + /* All instructions are in the right place, replace branch to ret bb to just ret. */ + /* Remove the lastInsn of gotoBB */ + if (returnBB->GetKind() == BB::kBBGoto) { + returnBB->RemoveInsn(*returnBB->GetLastInsn()); + } + BB *tgtBB = returnBB->GetSuccs().front(); + CHECK_FATAL(tgtBB != nullptr, "null ptr check"); + FOR_BB_INSNS(insn, tgtBB) { + returnBB->AppendInsn(*insn); /* add the insns such as MOP_xret */ + } + returnBB->AppendInsn(currCG->BuildInstruction(MOP_xret)); + /* bb is now a retbb and has no succ. */ + returnBB->SetKind(BB::kBBReturn); + auto predIt = std::find(tgtBB->GetPredsBegin(), tgtBB->GetPredsEnd(), returnBB); + tgtBB->ErasePreds(predIt); + tgtBB->ClearInsns(); + returnBB->ClearSuccs(); + if (tgtBB->GetPrev() != nullptr && tgtBB->GetNext() != nullptr) { + tgtBB->GetPrev()->SetNext(tgtBB->GetNext()); + tgtBB->GetNext()->SetPrev(tgtBB->GetPrev()); + } + return coldBB; +} + +MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, + const MemOperand &mo, uint32 bitLen, AArch64reg baseRegNum) { + auto &aarchCGFunc = static_cast(cgFunc); + CHECK_FATAL(mo.GetAddrMode() == MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi"); + OfstOperand *ofstOp = mo.GetOffsetImmediate(); + int32 offsetVal = static_cast(ofstOp->GetOffsetValue()); + CHECK_FATAL(offsetVal > 0, "offsetVal should be greater than 0"); + CHECK_FATAL((static_cast(offsetVal) & 0x7) == 0, "(offsetVal & 0x7) should be equal to 0"); + /* + * Offset adjustment due to FP/SP has already been done + * in AArch64GenProEpilog::GeneratePushRegs() and AArch64GenProEpilog::GeneratePopRegs() + */ + RegOperand &br = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(baseRegNum, bitLen, kRegTyInt); + if (aarchCGFunc.GetSplitBaseOffset() == 0) { + aarchCGFunc.SetSplitBaseOffset(offsetVal); /* remember the offset; don't forget to clear it */ + ImmOperand &immAddEnd = aarchCGFunc.CreateImmOperand(offsetVal, k64BitSize, true); + RegOperand *origBaseReg = mo.GetBaseRegister(); + aarchCGFunc.SelectAdd(br, *origBaseReg, immAddEnd, PTY_i64); + } + offsetVal = offsetVal - aarchCGFunc.GetSplitBaseOffset(); + return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, offsetVal); +} + +void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, + AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast(offset), kSizeOfPtr * kBitsPerByte); + + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + CHECK_FATAL(offset >= 0, "offset must >= 0"); + if (offset > kStpLdpImm64UpperBound) { + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast(o2), dataSize, R16); + } + Insn &pushInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + std::string comment = "SAVE CALLEE REGISTER PAIR"; + pushInsn.SetComment(comment); + AppendInstructionTo(pushInsn, cgFunc); + + /* Append CFi code */ + if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) { + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + stackFrameSize -= static_cast(cgFunc.GetMemlayout()->SizeOfArgsToStackPass()); + int32 cfiOffset = stackFrameSize - offset; + BB *curBB = cgFunc.GetCurBB(); + Insn *newInsn = curBB->InsertInsnAfter(pushInsn, aarchCGFunc.CreateCfiOffsetInsn(reg0, -cfiOffset, k64BitSize)); + curBB->InsertInsnAfter(*newInsn, aarchCGFunc.CreateCfiOffsetInsn(reg1, -cfiOffset + kOffset8MemPos, k64BitSize)); + } +} + +void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, + AArch64reg reg, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, kSizeOfPtr * kBitsPerByte, rty); + Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast(offset), kSizeOfPtr * kBitsPerByte); + + MemOperand *aarchMemO1 = static_cast(o1); + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + if (aarchMemO1->GetMemVaryType() == kNotVary && + aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) { + o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9); + } + + Insn &pushInsn = currCG->BuildInstruction(mOp, o0, *o1); + std::string comment = "SAVE CALLEE REGISTER"; + pushInsn.SetComment(comment); + AppendInstructionTo(pushInsn, cgFunc); + + /* Append CFI code */ + if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) { + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + stackFrameSize -= static_cast(cgFunc.GetMemlayout()->SizeOfArgsToStackPass()); + int32 cfiOffset = stackFrameSize - offset; + cgFunc.GetCurBB()->InsertInsnAfter(pushInsn, + aarchCGFunc.CreateCfiOffsetInsn(reg, -cfiOffset, k64BitSize)); + } +} + +Insn &AArch64GenProEpilog::AppendInstructionForAllocateOrDeallocateCallFrame(int64 argsToStkPassSize, + AArch64reg reg0, AArch64reg reg1, + RegType rty, bool isAllocate) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopPair] : pushPopOps[kRegsPopOp][rty][kPushPopPair]; + uint8 size; + if (CGOptions::IsArm64ilp32()) { + size = k8ByteSize; + } else { + size = kSizeOfPtr; + } + if (argsToStkPassSize <= kStrLdrImm64UpperBound - kOffset8MemPos) { + mOp = isAllocate ? pushPopOps[kRegsPushOp][rty][kPushPopSingle] : pushPopOps[kRegsPopOp][rty][kPushPopSingle]; + RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty); + MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, argsToStkPassSize, size * kBitsPerByte); + Insn &insn1 = currCG->BuildInstruction(mOp, o0, *o2); + AppendInstructionTo(insn1, cgFunc); + RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty); + o2 = aarchCGFunc.CreateStackMemOpnd(RSP, argsToStkPassSize + size, + size * kBitsPerByte); + Insn &insn2 = currCG->BuildInstruction(mOp, o1, *o2); + AppendInstructionTo(insn2, cgFunc); + return insn2; + } else { + RegOperand &oo = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(R9, size * kBitsPerByte, kRegTyInt); + ImmOperand &io1 = aarchCGFunc.CreateImmOperand(argsToStkPassSize, k64BitSize, true); + aarchCGFunc.SelectCopyImm(oo, io1, PTY_i64); + RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, size * kBitsPerByte, rty); + RegOperand &rsp = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, size * kBitsPerByte, kRegTyInt); + MemOperand *mo = aarchCGFunc.CreateMemOperand( + MemOperand::kAddrModeBOrX, size * kBitsPerByte, rsp, oo, 0); + Insn &insn1 = currCG->BuildInstruction(isAllocate ? MOP_xstr : MOP_xldr, o0, *mo); + AppendInstructionTo(insn1, cgFunc); + ImmOperand &io2 = aarchCGFunc.CreateImmOperand(size, k64BitSize, true); + aarchCGFunc.SelectAdd(oo, oo, io2, PTY_i64); + RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, size * kBitsPerByte, rty); + mo = aarchCGFunc.CreateMemOperand(MemOperand::kAddrModeBOrX, + size * kBitsPerByte, rsp, oo, 0); + Insn &insn2 = currCG->BuildInstruction(isAllocate ? MOP_xstr : MOP_xldr, o1, *mo); + AppendInstructionTo(insn2, cgFunc); + return insn2; + } +} + +Insn &AArch64GenProEpilog::CreateAndAppendInstructionForAllocateCallFrame(int64 argsToStkPassSize, + AArch64reg reg0, AArch64reg reg1, + RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + Insn *allocInsn = nullptr; + if (argsToStkPassSize > kStpLdpImm64UpperBound) { + allocInsn = &AppendInstructionForAllocateOrDeallocateCallFrame(argsToStkPassSize, reg0, reg1, rty, true); + } else { + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + Operand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, argsToStkPassSize, + kSizeOfPtr * kBitsPerByte); + allocInsn = &currCG->BuildInstruction(mOp, o0, o1, *o2); + AppendInstructionTo(*allocInsn, cgFunc); + } + if (currCG->NeedInsertInstrumentationFunction()) { + aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction()); + } else if (currCG->InstrumentWithDebugTraceCall()) { + aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction()); + } else if (currCG->InstrumentWithProfile()) { + aarchCGFunc.AppendCall(*currCG->GetProfileFunction()); + } + return *allocInsn; +} + +void AArch64GenProEpilog::AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame")); + } + + Insn *ipoint = nullptr; + /* + * stackFrameSize includes the size of args to stack-pass + * if a function has neither VLA nor alloca. + */ + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + /* + * ldp/stp's imm should be within -512 and 504; + * if stp's imm > 512, we fall back to the stp-sub version + */ + bool useStpSub = false; + int64 offset = 0; + int32 cfiOffset = 0; + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + /* + * stack_frame_size == size of formal parameters + callee-saved (including FP/RL) + * + size of local vars + * + size of actuals + * (when passing more than 8 args, its caller's responsibility to + * allocate space for it. size of actuals represent largest such size in the function. + */ + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + } else { + if (stackFrameSize > kStpLdpImm64UpperBound) { + useStpSub = true; + offset = kOffset16MemPos; + stackFrameSize -= offset; + } else { + offset = stackFrameSize; + } + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(static_cast(-offset), kSizeOfPtr * kBitsPerByte); + ipoint = &currCG->BuildInstruction(mOp, o0, o1, o2); + AppendInstructionTo(*ipoint, cgFunc); + cfiOffset = offset; + if (currCG->NeedInsertInstrumentationFunction()) { + aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction()); + } else if (currCG->InstrumentWithDebugTraceCall()) { + aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction()); + } else if (currCG->InstrumentWithProfile()) { + aarchCGFunc.AppendCall(*currCG->GetProfileFunction()); + } + } + + ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint); + + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + CHECK_FATAL(!useStpSub, "Invalid assumption"); + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + } + + if (useStpSub) { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + aarchCGFunc.SetUsedStpSubPairForCallFrameAllocation(true); + } + + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + int32 cfiOffsetSecond = 0; + if (useStpSub) { + cfiOffsetSecond = stackFrameSize; + ipoint = InsertCFIDefCfaOffset(cfiOffsetSecond, *ipoint); + } + cfiOffsetSecond = GetOffsetFromCFA(); + if (!cgFunc.HasVLAOrAlloca()) { + cfiOffsetSecond -= argsToStkPassSize; + } + if (cgFunc.GenCfi()) { + BB *curBB = cgFunc.GetCurBB(); + if (useFP) { + ipoint = curBB->InsertInsnAfter( + *ipoint, aarchCGFunc.CreateCfiOffsetInsn(stackBaseReg, -cfiOffsetSecond, k64BitSize)); + } + curBB->InsertInsnAfter(*ipoint, + aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffsetSecond + kOffset8MemPos, k64BitSize)); + } +} + +void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame for debugging")); + } + + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + + Insn *ipoint = nullptr; + int32 cfiOffset = 0; + + if (argsToStkPassSize > 0) { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + (void)InsertCFIDefCfaOffset(cfiOffset, *ipoint); + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + argsToStkPassSize -= (kDivide2 * k8ByteSize); + } + ipoint = &CreateAndAppendInstructionForAllocateCallFrame(argsToStkPassSize, reg0, reg1, rty); + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); + cfiOffset -= argsToStkPassSize; + } else { + bool useStpSub = false; + + if (stackFrameSize > kStpLdpImm64UpperBound) { + useStpSub = true; + RegOperand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + ImmOperand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + ipoint = cgFunc.GetCurBB()->GetLastInsn(); + cfiOffset = stackFrameSize; + ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint); + } else { + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(-stackFrameSize, kSizeOfPtr * kBitsPerByte); + ipoint = &currCG->BuildInstruction(mOp, o0, o1, o2); + AppendInstructionTo(*ipoint, cgFunc); + cfiOffset = stackFrameSize; + ipoint = InsertCFIDefCfaOffset(cfiOffset, *ipoint); + } + + if (useStpSub) { + MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; + RegOperand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + RegOperand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + MemOperand *o2 = aarchCGFunc.CreateStackMemOpnd(RSP, 0, kSizeOfPtr * kBitsPerByte); + ipoint = &currCG->BuildInstruction(mOp, o0, o1, *o2); + AppendInstructionTo(*ipoint, cgFunc); + } + + if (currCG->NeedInsertInstrumentationFunction()) { + aarchCGFunc.AppendCall(*currCG->GetInstrumentationFunction()); + } else if (currCG->InstrumentWithDebugTraceCall()) { + aarchCGFunc.AppendCall(*currCG->GetDebugTraceEnterFunction()); + } else if (currCG->InstrumentWithProfile()) { + aarchCGFunc.AppendCall(*currCG->GetProfileFunction()); + } + + CHECK_FATAL(ipoint != nullptr, "ipoint should not be nullptr at this point"); + cfiOffset = GetOffsetFromCFA(); + } + if (cgFunc.GenCfi()) { + BB *curBB = cgFunc.GetCurBB(); + if (useFP) { + ipoint = curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(stackBaseReg, -cfiOffset, k64BitSize)); + } + curBB->InsertInsnAfter(*ipoint, aarchCGFunc.CreateCfiOffsetInsn(RLR, -cfiOffset + kOffset8MemPos, k64BitSize)); + } +} + +/* + * From AArch64 Reference Manual + * C1.3.3 Load/Store Addressing Mode + * ... + * When stack alignment checking is enabled by system software and + * the base register is the SP, the current stack pointer must be + * initially quadword aligned, that is aligned to 16 bytes. Misalignment + * generates a Stack Alignment fault. The offset does not have to + * be a multiple of 16 bytes unless the specific Load/Store instruction + * requires this. SP cannot be used as a register offset. + */ +void AArch64GenProEpilog::GeneratePushRegs() { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + const MapleVector ®sToSave = (!CGOptions::DoRegSavesOpt()) ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); + + CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?"); + + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("save callee-saved registers")); + } + + /* + * Even if we don't use RFP, since we push a pair of registers in one instruction + * and the stack needs be aligned on a 16-byte boundary, push RFP as well if function has a call + * Make sure this is reflected when computing callee_saved_regs.size() + */ + if (!currCG->GenerateDebugFriendlyCode()) { + AppendInstructionAllocateCallFrame(R29, RLR, kRegTyInt); + } else { + AppendInstructionAllocateCallFrameDebug(R29, RLR, kRegTyInt); + } + + if (useFP) { + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP")); + } + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; + if ((argsToStkPassSize > 0) || isLmbc) { + Operand *immOpnd; + if (isLmbc) { + int32 size = static_cast(static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true); + } else { + immOpnd = &aarchCGFunc.CreateImmOperand(argsToStkPassSize, k32BitSize, true); + } + aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + if (cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(stackBaseReg, + static_cast( + cgFunc.GetMemlayout())->RealStackFrameSize() - argsToStkPassSize, k64BitSize)); + } + } else { + aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + if (cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn( + currCG->BuildInstruction(cfi::OP_CFI_def_cfa_register, + aarchCGFunc.CreateCfiRegOperand(stackBaseReg, k64BitSize))); + } + } + } + + MapleVector::const_iterator it = regsToSave.begin(); + /* skip the first two registers */ + CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP"); + ++it; + CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR"); + ++it; + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); + } else { + offset = static_cast(memLayout->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass()); + } + + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc && + cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + auto saveareasize = static_cast(RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize)); + offset -= saveareasize; + } + + for (; it != regsToSave.end(); ++it) { + AArch64reg reg = *it; + CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?"); + CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?"); + RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf; + if (firstHalf == kRinvalid) { + /* remember it */ + firstHalf = reg; + } else { + AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset); + GetNextOffsetCalleeSaved(offset); + firstHalf = kRinvalid; + } + } + + if (intRegFirstHalf != kRinvalid) { + AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset); + GetNextOffsetCalleeSaved(offset); + } + + if (fpRegFirstHalf != kRinvalid) { + AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset); + GetNextOffsetCalleeSaved(offset); + } + + /* + * in case we split stp/ldp instructions, + * so that we generate a load-into-base-register instruction + * for pop pairs as well. + */ + aarchCGFunc.SetSplitBaseOffset(0); +} + +void AArch64GenProEpilog::GeneratePushUnnamedVarargRegs() { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + if (cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + AArch64MemLayout *memlayout = static_cast(cgFunc.GetMemlayout()); + uint8 size; + if (CGOptions::IsArm64ilp32()) { + size = k8ByteSize; + } else { + size = kSizeOfPtr; + } + uint32 dataSizeBits = size * kBitsPerByte; + uint32 offset; + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { + offset = static_cast(memlayout->GetGRSaveAreaBaseLoc()); /* SP reference */ + if (memlayout->GetSizeOfGRSaveArea() % kAarch64StackPtrAlignment) { + offset += size; /* End of area should be aligned. Hole between VR and GR area */ + } + } else { + offset = -memlayout->GetSizeOfGRSaveArea(); /* FP reference */ + } + uint32 grSize = -offset; + uint32 start_regno = k8BitSize - (memlayout->GetSizeOfGRSaveArea() / size); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for GR Save Area"); + for (uint32 i = start_regno + static_cast(R0); i < static_cast(R8); i++) { + uint32 tmpOffset = 0; + if (CGOptions::IsBigEndian()) { + if((dataSizeBits >> 3) < 8) { + tmpOffset += 8U - (dataSizeBits >> 3); + } + } + Operand *stackLoc; + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { + stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); + } else { + stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); + } + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyInt); + Insn &inst = + currCG->BuildInstruction(aarchCGFunc.PickStInsn(dataSizeBits, PTY_i64), reg, *stackLoc); + cgFunc.GetCurBB()->AppendInsn(inst); + offset += size; + } + if (!CGOptions::UseGeneralRegOnly()) { + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { + offset = static_cast(memlayout->GetVRSaveAreaBaseLoc()); + } else { + offset = -(memlayout->GetSizeOfVRSaveArea() + grSize); + } + start_regno = k8BitSize - (memlayout->GetSizeOfVRSaveArea() / (size * k2BitSize)); + ASSERT(start_regno <= k8BitSize, "Incorrect starting GR regno for VR Save Area"); + for (uint32 i = start_regno + static_cast(V0); i < static_cast(V8); i++) { + uint32 tmpOffset = 0; + if (CGOptions::IsBigEndian()) { + if((dataSizeBits >> 3) < 16) { + tmpOffset += 16U - (dataSizeBits >> 3); + } + } + Operand *stackLoc; + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { + stackLoc = &aarchCGFunc.CreateStkTopOpnd(offset + tmpOffset, dataSizeBits); + } else { + stackLoc = aarchCGFunc.GenLmbcFpMemOperand(offset, size); + } + RegOperand ® = + aarchCGFunc.GetOrCreatePhysicalRegisterOperand(static_cast(i), k64BitSize, kRegTyFloat); + Insn &inst = + currCG->BuildInstruction(aarchCGFunc.PickStInsn(dataSizeBits, PTY_f64), reg, *stackLoc); + cgFunc.GetCurBB()->AppendInsn(inst); + offset += (size * k2BitSize); + } + } + } +} + +void AArch64GenProEpilog::AppendInstructionStackCheck(AArch64reg reg, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + /* sub x16, sp, #0x2000 */ + auto &x16Opnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, k64BitSize, rty); + auto &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, rty); + auto &imm1 = aarchCGFunc.CreateImmOperand(offset, k64BitSize, true); + aarchCGFunc.SelectSub(x16Opnd, spOpnd, imm1, PTY_u64); + + /* ldr wzr, [x16] */ + auto &wzr = cgFunc.GetZeroOpnd(k32BitSize); + auto &refX16 = aarchCGFunc.CreateMemOpnd(reg, 0, k64BitSize); + auto &soeInstr = currCG->BuildInstruction(MOP_wldr, wzr, refX16); + if (currCG->GenerateVerboseCG()) { + soeInstr.SetComment("soerror"); + } + soeInstr.SetDoNotRemove(true); + AppendInstructionTo(soeInstr, cgFunc); +} + +void AArch64GenProEpilog::GenerateProlog(BB &bb) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + BB *formerCurBB = cgFunc.GetCurBB(); + aarchCGFunc.GetDummyBB()->ClearInsns(); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(true); + cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB()); + if (!cgFunc.GetHasProEpilogue()) { + return; + } + + // insert .loc for function + if (currCG->GetCGOptions().WithLoc()) { + MIRFunction *func = &cgFunc.GetFunction(); + MIRSymbol *fSym = GlobalTables::GetGsymTable().GetSymbolFromStidx(func->GetStIdx().Idx()); + if (currCG->GetCGOptions().WithSrc()) { + uint32 tempmaxsize = static_cast(currCG->GetMIRModule()->GetSrcFileInfo().size()); + uint32 endfilenum = currCG->GetMIRModule()->GetSrcFileInfo()[tempmaxsize - 1].second; + if (fSym->GetSrcPosition().FileNum() != 0 && fSym->GetSrcPosition().FileNum() <= endfilenum) { + Operand *o0 = cgFunc.CreateDbgImmOperand(fSym->GetSrcPosition().FileNum()); + int64_t lineNum = fSym->GetSrcPosition().LineNum(); + if (lineNum == 0) { + if (cgFunc.GetFunction().GetAttr(FUNCATTR_native)) { + lineNum = 0xffffe; + } else { + lineNum = 0xffffd; + } + } + Operand *o1 = cgFunc.CreateDbgImmOperand(lineNum); + Insn &loc = currCG->BuildInstruction(mpldbg::OP_DBG_loc, *o0, *o1); + cgFunc.GetCurBB()->AppendInsn(loc); + } + } else { + Operand *o0 = cgFunc.CreateDbgImmOperand(1); + Operand *o1 = cgFunc.CreateDbgImmOperand(fSym->GetSrcPosition().MplLineNum()); + Insn &loc = currCG->BuildInstruction(mpldbg::OP_DBG_loc, *o0, *o1); + cgFunc.GetCurBB()->AppendInsn(loc); + } + } + + const MapleVector ®sToSave = (!CGOptions::DoRegSavesOpt()) ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); + if (!regsToSave.empty()) { + /* + * Among other things, push the FP & LR pair. + * FP/LR are added to the callee-saved list in AllocateRegisters() + * We add them to the callee-saved list regardless of UseFP() being true/false. + * Activation Frame is allocated as part of pushing FP/LR pair + */ + GeneratePushRegs(); + } else { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + if (stackFrameSize > 0) { + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("allocate activation frame")); + } + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectSub(spOpnd, spOpnd, immOpnd, PTY_u64); + + int32 offset = stackFrameSize; + (void)InsertCFIDefCfaOffset(offset, *(cgFunc.GetCurBB()->GetLastInsn())); + } + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("copy SP to FP")); + } + if (useFP) { + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); + bool isLmbc = cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc; + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + if ((argsToStkPassSize > 0) || isLmbc) { + Operand *immOpnd; + if (isLmbc) { + int32 size = static_cast(static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + immOpnd = &aarchCGFunc.CreateImmOperand(size, k32BitSize, true); + } else { + immOpnd = &aarchCGFunc.CreateImmOperand(argsToStkPassSize, k32BitSize, true); + } + aarchCGFunc.SelectAdd(fpOpnd, spOpnd, *immOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + if (cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn( + stackBaseReg, + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - argsToStkPassSize, + k64BitSize)); + } + } else { + aarchCGFunc.SelectCopy(fpOpnd, PTY_u64, spOpnd, PTY_u64); + cgFunc.GetCurBB()->GetLastInsn()->SetFrameDef(true); + if (cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn( + currCG->BuildInstruction(cfi::OP_CFI_def_cfa_register, + aarchCGFunc.CreateCfiRegOperand(stackBaseReg, k64BitSize))); + } + } + } + } + GeneratePushUnnamedVarargRegs(); + if (currCG->DoCheckSOE()) { + AppendInstructionStackCheck(R16, kRegTyInt, kSoeChckOffset); + } + bb.InsertAtBeginning(*aarchCGFunc.GetDummyBB()); + cgFunc.SetCurBB(*formerCurBB); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(false); +} + +void AArch64GenProEpilog::GenerateRet(BB &bb) { + CG *currCG = cgFunc.GetCG(); + bb.AppendInsn(currCG->BuildInstruction(MOP_xret)); +} + +/* + * If all the preds of exitBB made the TailcallOpt(replace blr/bl with br/b), return true, we don't create ret insn. + * Otherwise, return false, create the ret insn. + */ +bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB) { + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + if (cgFunc.GetMirModule().IsCModule() && + (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || + ml->GetSizeOfLocals() > 0 || cgFunc.HasVLAOrAlloca())) { + return false; + } + for (auto tmpBB : exitBB.GetPreds()) { + Insn *firstInsn = tmpBB->GetFirstInsn(); + if ((firstInsn == nullptr || tmpBB->IsCommentBB()) && (!tmpBB->GetPreds().empty())) { + if (!TestPredsOfRetBB(*tmpBB)) { + return false; + } + } else { + Insn *lastInsn = tmpBB->GetLastInsn(); + if (lastInsn == nullptr) { + return false; + } + MOperator insnMop = lastInsn->GetMachineOpcode(); + if (insnMop != MOP_tail_call_opt_xbl && insnMop != MOP_tail_call_opt_xblr) { + return false; + } + } + } + return true; +} + +void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg, kSizeOfPtr * kBitsPerByte, rty); + Operand *o1 = &aarchCGFunc.CreateStkTopOpnd(static_cast(offset), kSizeOfPtr * kBitsPerByte); + MemOperand *aarchMemO1 = static_cast(o1); + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + if (aarchMemO1->GetMemVaryType() == kNotVary && aarchCGFunc.IsImmediateOffsetOutOfRange(*aarchMemO1, dataSize)) { + o1 = &aarchCGFunc.SplitOffsetWithAddInstruction(*aarchMemO1, dataSize, R9); + } + + Insn &popInsn = currCG->BuildInstruction(mOp, o0, *o1); + popInsn.SetComment("RESTORE"); + cgFunc.GetCurBB()->AppendInsn(popInsn); + + /* Append CFI code. */ + if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg, k64BitSize)); + } +} + +void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, + AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + Operand *o2 = &aarchCGFunc.CreateStkTopOpnd(static_cast(offset), kSizeOfPtr * kBitsPerByte); + + uint32 dataSize = kSizeOfPtr * kBitsPerByte; + CHECK_FATAL(offset >= 0, "offset must >= 0"); + if (offset > kStpLdpImm64UpperBound) { + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, + static_cast(*o2), dataSize, R16); + } + Insn &popInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + popInsn.SetComment("RESTORE RESTORE"); + cgFunc.GetCurBB()->AppendInsn(popInsn); + + /* Append CFI code */ + if (cgFunc.GenCfi() && !CGOptions::IsNoCalleeCFI()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg0, k64BitSize)); + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(reg1, k64BitSize)); + } +} + + +void AArch64GenProEpilog::AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + /* + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ + bool useLdpAdd = false; + int32 offset = 0; + + Operand *o2 = nullptr; + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + o2 = aarchCGFunc.CreateStackMemOpnd(RSP, argsToStkPassSize, kSizeOfPtr * kBitsPerByte); + } else { + if (stackFrameSize > kStpLdpImm64UpperBound) { + useLdpAdd = true; + offset = kOffset16MemPos; + stackFrameSize -= offset; + } else { + offset = stackFrameSize; + } + o2 = &aarchCGFunc.CreateCallFrameOperand(offset, kSizeOfPtr * kBitsPerByte); + } + + if (useLdpAdd) { + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + if (cgFunc.GenCfi()) { + int64 cfiOffset = GetOffsetFromCFA(); + BB *curBB = cgFunc.GetCurBB(); + curBB->InsertInsnAfter(*(curBB->GetLastInsn()), + aarchCGFunc.CreateCfiDefCfaInsn(RSP, cfiOffset - stackFrameSize, k64BitSize)); + } + } + + if (!cgFunc.HasVLAOrAlloca() && argsToStkPassSize > 0) { + CHECK_FATAL(!useLdpAdd, "Invalid assumption"); + if (argsToStkPassSize > kStpLdpImm64UpperBound) { + (void)AppendInstructionForAllocateOrDeallocateCallFrame(argsToStkPassSize, reg0, reg1, rty, false); + } else { + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + } + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + } else { + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + } + + if (cgFunc.GenCfi()) { + /* Append CFI restore */ + if (useFP) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize)); + } + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + } +} + +void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; + Operand &o0 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg0, kSizeOfPtr * kBitsPerByte, rty); + Operand &o1 = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(reg1, kSizeOfPtr * kBitsPerByte, rty); + int32 stackFrameSize = static_cast( + static_cast(cgFunc.GetMemlayout())->RealStackFrameSize()); + int64 argsToStkPassSize = cgFunc.GetMemlayout()->SizeOfArgsToStackPass(); + /* + * ldp/stp's imm should be within -512 and 504; + * if ldp's imm > 504, we fall back to the ldp-add version + */ + bool isLmbc = (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc); + if (cgFunc.HasVLAOrAlloca() || argsToStkPassSize == 0 || isLmbc) { + int lmbcOffset = 0; + if (isLmbc == false) { + stackFrameSize -= argsToStkPassSize; + } else { + lmbcOffset = argsToStkPassSize - (kDivide2 * k8ByteSize); + } + if (stackFrameSize > kStpLdpImm64UpperBound || isLmbc) { + Operand *o2; + o2 = aarchCGFunc.CreateStackMemOpnd(RSP, (isLmbc ? lmbcOffset : 0), kSizeOfPtr * kBitsPerByte); + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + if (cgFunc.GenCfi()) { + /* Append CFI restore */ + if (useFP) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize)); + } + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + } + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + } else { + MemOperand &o2 = aarchCGFunc.CreateCallFrameOperand(stackFrameSize, kSizeOfPtr * kBitsPerByte); + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + if (cgFunc.GenCfi()) { + if (useFP) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize)); + } + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + } + } + } else { + Operand *o2; + o2 = aarchCGFunc.CreateStackMemOpnd(RSP, argsToStkPassSize, kSizeOfPtr * kBitsPerByte); + if (argsToStkPassSize > kStpLdpImm64UpperBound) { + (void)AppendInstructionForAllocateOrDeallocateCallFrame(argsToStkPassSize, reg0, reg1, rty, false); + } else { + Insn &deallocInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); + cgFunc.GetCurBB()->AppendInsn(deallocInsn); + } + + if (cgFunc.GenCfi()) { + if (useFP) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(stackBaseReg, k64BitSize)); + } + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiRestoreInsn(RLR, k64BitSize)); + } + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + } +} + +void AArch64GenProEpilog::GeneratePopRegs() { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + + const MapleVector ®sToRestore = (!CGOptions::DoRegSavesOpt()) ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); + + CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?"); + + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("restore callee-saved registers")); + } + + MapleVector::const_iterator it = regsToRestore.begin(); + /* + * Even if we don't use FP, since we push a pair of registers + * in a single instruction (i.e., stp) and the stack needs be aligned + * on a 16-byte boundary, push FP as well if the function has a call. + * Make sure this is reflected when computing calleeSavedRegs.size() + * skip the first two registers + */ + CHECK_FATAL(*it == RFP, "The first callee saved reg is expected to be RFP"); + ++it; + CHECK_FATAL(*it == RLR, "The second callee saved reg is expected to be RLR"); + ++it; + + + AArch64MemLayout *memLayout = static_cast(cgFunc.GetMemlayout()); + int32 offset; + if (cgFunc.GetMirModule().GetFlavor() == MIRFlavor::kFlavorLmbc) { + offset = static_cast(memLayout->RealStackFrameSize() - + aarchCGFunc.SizeOfCalleeSaved() - memLayout->GetSizeOfLocals()); + } else { + offset = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize() - + (aarchCGFunc.SizeOfCalleeSaved() - (kDivide2 * kIntregBytelen) /* for FP/LR */) - + memLayout->SizeOfArgsToStackPass(); + } + + if (cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc && + cgFunc.GetMirModule().IsCModule() && cgFunc.GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc.GetMemlayout()); + auto saveareasize = static_cast(RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize)); + offset -= saveareasize; + } + + /* + * We are using a cleared dummy block; so insertPoint cannot be ret; + * see GenerateEpilog() + */ + for (; it != regsToRestore.end(); ++it) { + AArch64reg reg = *it; + CHECK_FATAL(reg != RFP, "stray RFP in callee_saved_list?"); + CHECK_FATAL(reg != RLR, "stray RLR in callee_saved_list?"); + + RegType regType = AArch64isa::IsGPRegister(reg) ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) ? intRegFirstHalf : fpRegFirstHalf; + if (firstHalf == kRinvalid) { + /* remember it */ + firstHalf = reg; + } else { + /* flush the pair */ + AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset); + GetNextOffsetCalleeSaved(offset); + firstHalf = kRinvalid; + } + } + + if (intRegFirstHalf != kRinvalid) { + AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset); + GetNextOffsetCalleeSaved(offset); + } + + if (fpRegFirstHalf != kRinvalid) { + AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset); + GetNextOffsetCalleeSaved(offset); + } + + if (!currCG->GenerateDebugFriendlyCode()) { + AppendInstructionDeallocateCallFrame(R29, RLR, kRegTyInt); + } else { + AppendInstructionDeallocateCallFrameDebug(R29, RLR, kRegTyInt); + } + + if (cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize)); + } + /* + * in case we split stp/ldp instructions, + * so that we generate a load-into-base-register instruction + * for the next function, maybe? (seems not necessary, but...) + */ + aarchCGFunc.SetSplitBaseOffset(0); +} + +void AArch64GenProEpilog::AppendJump(const MIRSymbol &funcSymbol) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + Operand &targetOpnd = aarchCGFunc.GetOrCreateFuncNameOpnd(funcSymbol); + cgFunc.GetCurBB()->AppendInsn(currCG->BuildInstruction(MOP_xuncond, targetOpnd)); +} + +void AArch64GenProEpilog::GenerateEpilog(BB &bb) { + if (!cgFunc.GetHasProEpilogue()) { + if (bb.GetPreds().empty() || !TestPredsOfRetBB(bb)) { + GenerateRet(bb); + } + return; + } + + /* generate stack protected instruction */ + BB &epilogBB = GenStackGuardCheckInsn(bb); + + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + BB *formerCurBB = cgFunc.GetCurBB(); + aarchCGFunc.GetDummyBB()->ClearInsns(); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(true); + cgFunc.SetCurBB(*aarchCGFunc.GetDummyBB()); + + Operand &spOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(RSP, k64BitSize, kRegTyInt); + Operand &fpOpnd = aarchCGFunc.GetOrCreatePhysicalRegisterOperand(stackBaseReg, k64BitSize, kRegTyInt); + + if (cgFunc.HasVLAOrAlloca() && cgFunc.GetMirModule().GetFlavor() != MIRFlavor::kFlavorLmbc) { + aarchCGFunc.SelectCopy(spOpnd, PTY_u64, fpOpnd, PTY_u64); + } + + /* Hack: exit bb should always be reachable, since we need its existance for ".cfi_remember_state" */ + if (&epilogBB != cgFunc.GetLastBB() && epilogBB.GetNext() != nullptr) { + BB *nextBB = epilogBB.GetNext(); + do { + if (nextBB == cgFunc.GetLastBB() || !nextBB->IsEmpty()) { + break; + } + nextBB = nextBB->GetNext(); + } while (nextBB != nullptr); + if (nextBB != nullptr && !nextBB->IsEmpty() && cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn(currCG->BuildInstruction(cfi::OP_CFI_remember_state)); + cgFunc.GetCurBB()->SetHasCfi(); + nextBB->InsertInsnBefore(*nextBB->GetFirstInsn(), + currCG->BuildInstruction(cfi::OP_CFI_restore_state)); + nextBB->SetHasCfi(); + } + } + + const MapleVector ®sToSave = (!CGOptions::DoRegSavesOpt()) ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); + if (!regsToSave.empty()) { + GeneratePopRegs(); + } else { + auto stackFrameSize = static_cast(cgFunc.GetMemlayout())->RealStackFrameSize(); + if (stackFrameSize > 0) { + if (currCG->GenerateVerboseCG()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCommentInsn("pop up activation frame")); + } + + if (cgFunc.HasVLAOrAlloca()) { + auto size = static_cast(cgFunc.GetMemlayout())->GetSegArgsToStkPass().GetSize(); + stackFrameSize = stackFrameSize < size ? 0 : stackFrameSize - size; + } + + if (stackFrameSize > 0) { + Operand &immOpnd = aarchCGFunc.CreateImmOperand(stackFrameSize, k32BitSize, true); + aarchCGFunc.SelectAdd(spOpnd, spOpnd, immOpnd, PTY_u64); + if (cgFunc.GenCfi()) { + cgFunc.GetCurBB()->AppendInsn(aarchCGFunc.CreateCfiDefCfaInsn(RSP, 0, k64BitSize)); + } + } + } + } + + if (currCG->InstrumentWithDebugTraceCall()) { + AppendJump(*(currCG->GetDebugTraceExitFunction())); + } + + GenerateRet(*(cgFunc.GetCurBB())); + epilogBB.AppendBBInsns(*cgFunc.GetCurBB()); + if (cgFunc.GetCurBB()->GetHasCfi()) { + epilogBB.SetHasCfi(); + } + + cgFunc.SetCurBB(*formerCurBB); + aarchCGFunc.GetDummyBB()->SetIsProEpilog(false); +} + +void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb) { + auto &aarchCGFunc = static_cast(cgFunc); + CG *currCG = cgFunc.GetCG(); + CHECK_FATAL(!cgFunc.GetExitBBsVec().empty(), "exit bb size is zero!"); + if (cgFunc.GetExitBB(0)->IsUnreachable()) { + /* if exitbb is unreachable then exitbb can not be generated */ + GenerateEpilog(bb); + } else if (aarchCGFunc.NeedCleanup()) { /* bl to the exit epilogue */ + LabelOperand &targetOpnd = aarchCGFunc.GetOrCreateLabelOperand(cgFunc.GetExitBB(0)->GetLabIdx()); + bb.AppendInsn(currCG->BuildInstruction(MOP_xuncond, targetOpnd)); + } +} + + +void AArch64GenProEpilog::ConvertToTailCalls(std::set &callInsnsMap) { + BB *exitBB = GetCurTailcallExitBB(); + + /* ExitBB is filled only by now. If exitBB has restore of SP indicating extra stack space has + been allocated, such as a function call with more than 8 args, argument with large aggr etc */ + FOR_BB_INSNS(insn, exitBB) { + if (insn->GetMachineOpcode() == MOP_xaddrri12 || insn->GetMachineOpcode() == MOP_xaddrri24) { + RegOperand ® = static_cast(insn->GetOperand(0)); + if (reg.GetRegisterNumber() == RSP) { + return; + } + } + } + + /* Replace all of the call insns. */ + for (Insn *callInsn : callInsnsMap) { + MOperator insnMop = callInsn->GetMachineOpcode(); + switch (insnMop) { + case MOP_xbl: { + callInsn->SetMOP(MOP_tail_call_opt_xbl); + break; + } + case MOP_xblr: { + callInsn->SetMOP(MOP_tail_call_opt_xblr); + break; + } + default: + CHECK_FATAL(false, "Internal error."); + break; + } + BB *bb = callInsn->GetBB(); + if (bb->GetKind() == BB::kBBGoto) { + bb->SetKind(BB::kBBFallthru); + if (bb->GetLastInsn()->GetMachineOpcode() == MOP_xuncond) { + bb->RemoveInsn(*bb->GetLastInsn()); + } + } + for (auto sBB: bb->GetSuccs()) { + bb->RemoveSuccs(*sBB); + sBB->RemovePreds(*bb); + break; + } + } + + /* copy instrs from exit block */ + for (Insn *callInsn: callInsnsMap) { + BB *toBB = callInsn->GetBB(); + BB *fromBB = exitBB; + if (toBB == fromBB) { + /* callsite also in the return exit block, just change the return to branch */ + Insn *lastInsn = toBB->GetLastInsn(); + if (lastInsn->GetMachineOpcode() == MOP_xret) { + Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*callInsn); + toBB->ReplaceInsn(*lastInsn, *newInsn); + for (Insn *insn = callInsn->GetNextMachineInsn(); insn != newInsn; insn = insn->GetNextMachineInsn()) { + insn->SetDoNotRemove(true); + } + toBB->RemoveInsn(*callInsn); + return; + } + CHECK_FATAL(0, "Tailcall in incorrect block"); + } + FOR_BB_INSNS_SAFE(insn, fromBB, next) { + if (insn->IsMachineInstruction() && insn->GetMachineOpcode() != MOP_xret) { + Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*insn); + newInsn->SetDoNotRemove(true); + toBB->InsertInsnBefore(*callInsn, *newInsn); + } + } + } + + /* remove instrs in exit block */ + BB *bb = exitBB; + if (bb->GetPreds().size() > 0) { + return; /* exit block still needed by other non-tailcall blocks */ + } + Insn &junk = cgFunc.GetCG()->BuildInstruction(MOP_pseudo_none); + bb->AppendInsn(junk); + FOR_BB_INSNS_SAFE(insn, bb, next) { + if (insn->GetMachineOpcode() != MOP_pseudo_none) { + bb->RemoveInsn(*insn); + } + } +} + +void AArch64GenProEpilog::Run() { + CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label, + "The first statement should be a label"); + cgFunc.SetHasProEpilogue(NeedProEpilog()); + if (cgFunc.GetHasProEpilogue()) { + GenStackGuard(*(cgFunc.GetFirstBB())); + } + BB *proLog = nullptr; + if (cgFunc.GetCG()->DoPrologueEpilogue() && Globals::GetInstance()->GetOptimLevel() == CGOptions::kLevel2) { + /* There are some O2 dependent assumptions made */ + proLog = IsolateFastPath(*(cgFunc.GetFirstBB())); + } + + if (cgFunc.IsExitBBsVecEmpty()) { + if (cgFunc.GetLastBB()->GetPrev()->GetFirstStmt() == cgFunc.GetCleanupLabel() && + cgFunc.GetLastBB()->GetPrev()->GetPrev()) { + cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev()->GetPrev()); + } else { + cgFunc.PushBackExitBBsVec(*cgFunc.GetLastBB()->GetPrev()); + } + } + + if (proLog != nullptr) { + GenerateProlog(*proLog); + proLog->SetFastPath(true); + cgFunc.GetFirstBB()->SetFastPath(true); + } else { + GenerateProlog(*(cgFunc.GetFirstBB())); + } + + for (auto *exitBB : cgFunc.GetExitBBsVec()) { + GenerateEpilog(*exitBB); + } + + if (cgFunc.GetFunction().IsJava()) { + GenerateEpilogForCleanup(*(cgFunc.GetCleanupBB())); + } + + if (cgFunc.GetMirModule().IsCModule() && !exitBB2CallSitesMap.empty()) { + cgFunc.GetTheCFG()->InitInsnVisitor(cgFunc); + for (auto pair : exitBB2CallSitesMap) { + BB *curExitBB = pair.first; + std::set& callInsnsMap = pair.second; + SetCurTailcallExitBB(curExitBB); + ConvertToTailCalls(callInsnsMap); + } + } +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp- b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp- new file mode 100644 index 0000000000000000000000000000000000000000..7dfdba899e6f0d80e681b913c01babf65f39cc87 --- /dev/null +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp- @@ -0,0 +1,572 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "aarch64_regsaves.h" +#include "aarch64_cg.h" +#include "aarch64_live.h" +#include "aarch64_cg.h" +#include "aarch64_proepilog.h" +#include "cg_dominance.h" +#include "cg_ssu_pre.h" + +namespace maplebe { + +#define RS_DUMP 1 // CG_DEBUG_FUNC(*cgFunc) + +void AArch64RegSavesOpt::InitData() { + calleeBitsDef = cgFunc->GetMemoryPool()->NewArray(cgFunc->NumBBs()); + errno_t retDef = memset_s(calleeBitsDef, cgFunc->NumBBs() * sizeof(CalleeBitsType), + 0, cgFunc->NumBBs() * sizeof(CalleeBitsType)); + calleeBitsUse = cgFunc->GetMemoryPool()->NewArray(cgFunc->NumBBs()); + errno_t retUse = memset_s(calleeBitsUse, cgFunc->NumBBs() * sizeof(CalleeBitsType), + 0, cgFunc->NumBBs() * sizeof(CalleeBitsType)); + CHECK_FATAL(retDef == EOK && retUse == EOK, "memset_s of calleesBits failed"); + + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + const MapleVector &sp = aarchCGFunc->GetCalleeSavedRegs(); + if (!sp.empty()) { + if (std::find(sp.begin(), sp.end(), RFP) != sp.end()) { + aarchCGFunc->GetProEpilogSavedRegs().push_back(RFP); + } + if (std::find(sp.begin(), sp.end(), RLR) != sp.end()) { + aarchCGFunc->GetProEpilogSavedRegs().push_back(RLR); + } + } +} + + +void AArch64RegSavesOpt::CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse) { + if (!opnd.IsRegister()) { + return; + } + const RegOperand ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + if (!AArch64Abi::IsCalleeSavedReg(static_cast(regNO)) || + (regNO >= R29 && regNO <= R31)) { + return; /* check only callee-save registers */ + } + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyVary) { + return; + } + if (isDef) { + /* First def */ + if (!IsCalleeBitSet(GetCalleeBitsDef(), bb.GetId(), regNO)) { + SetCalleeBit(GetCalleeBitsDef(), bb.GetId(), regNO); + } + } + if (isUse) { + /* Last use */ + SetCalleeBit(GetCalleeBitsUse(), bb.GetId(), regNO); + } +} + +void AArch64RegSavesOpt::GenerateReturnBBDefUse(BB &bb) { + PrimType returnType = cgFunc->GetFunction().GetReturnType()->GetPrimType(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + if (IsPrimitiveFloat(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(V0), k64BitSize, kRegTyFloat); + CollectLiveInfo(bb, phyOpnd, false, true); + } else if (IsPrimitiveInteger(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R0), k64BitSize, kRegTyInt); + CollectLiveInfo(bb, phyOpnd, false, true); + } +} + +void AArch64RegSavesOpt::ProcessAsmListOpnd(BB &bb, Operand &opnd, uint32 idx) { + bool isDef = false; + bool isUse = false; + switch (idx) { + case kAsmOutputListOpnd: + case kAsmClobberListOpnd: { + isDef = true; + break; + } + case kAsmInputListOpnd: { + isUse = true; + break; + } + default: + return; + } + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, isDef, isUse); + } +} + +void AArch64RegSavesOpt::ProcessListOpnd(BB &bb, Operand &opnd) { + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, false, true); + } +} + +void AArch64RegSavesOpt::ProcessMemOpnd(BB &bb, Operand &opnd) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + CollectLiveInfo(bb, *base, !memOpnd.IsIntactIndexed(), true); + } + if (offset != nullptr) { + CollectLiveInfo(bb, *offset, false, true); + } +} + +void AArch64RegSavesOpt::ProcessCondOpnd(BB &bb) { + Operand &rflag = cgFunc->GetOrCreateRflag(); + CollectLiveInfo(bb, rflag, false, true); +} + +/* Record in each local BB the 1st def and the last use of a callee-saved + register */ +void AArch64RegSavesOpt::GetLocalDefUse() { + for (auto bbp : bfs->sortedBBs) { + BB &bb = *bbp; + if (bb.GetKind() == BB::kBBReturn) { + GenerateReturnBBDefUse(bb); + } + if (bb.IsEmpty()) { + continue; + } + + FOR_BB_INSNS(insn, &bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + + bool isAsm = (insn->GetMachineOpcode() == MOP_asm); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isDef = regProp->IsRegDef(); + bool isUse = regProp->IsRegUse(); + if (opnd.IsList()) { + if (isAsm) { + ProcessAsmListOpnd(bb, opnd, i); + } else { + ProcessListOpnd(bb, opnd); + } + } else if (opnd.IsMemoryAccessOperand()) { + ProcessMemOpnd(bb, opnd); + } else if (opnd.IsConditionCode()) { + ProcessCondOpnd(bb); + } else { + CollectLiveInfo(bb, opnd, isDef, isUse); + } + } /* for all operands */ + } /* for all insns */ + } /* for all sortedBBs */ + + if (RS_DUMP) { + for (int i=0; iNumBBs(); i++) { + LogInfo::MapleLogger() << i << " : " << calleeBitsDef[i] << " " << calleeBitsUse[i] << "\n";; + } + } +} + +void AArch64RegSavesOpt::PrintBBs() const { + LogInfo::MapleLogger() << "RegSaves LiveIn/Out of BFS nodes:\n"; + for (auto *bb : bfs->sortedBBs) { + LogInfo::MapleLogger() << "\n< === > "; + LogInfo::MapleLogger() << bb->GetId(); + LogInfo::MapleLogger() << " succs:"; + for (auto *succBB : bb->GetSuccs()) { + LogInfo::MapleLogger() << " " << succBB->GetId(); + } + LogInfo::MapleLogger() << "\n LiveIn of [" << bb->GetId() << "]: "; + for (auto liveIn: bb->GetLiveInRegNO()) { + LogInfo::MapleLogger() << liveIn << " "; + } + LogInfo::MapleLogger() << "\n LiveOut of [" << bb->GetId() << "] "; + for (auto liveOut: bb->GetLiveOutRegNO()) { + LogInfo::MapleLogger() << liveOut << " "; + } + } + LogInfo::MapleLogger() << "\n"; +} + +/* 1st def MUST not have preceding save in dominator list. Each dominator + block must not have livein or liveout of the register */ +bool AArch64RegSavesOpt::CheckCriteria(BB *bb, regno_t reg) const { + /* Already a site to save */ + SavedRegInfo *sp = bbSavedRegs[bb->GetId()]; + if (sp != nullptr && sp->ContainSaveReg(reg)) { + return true; + } + + /* This preceding block has livein OR liveout of reg */ + MapleSet &liveIn = bb->GetLiveInRegNO(); + MapleSet &liveOut = bb->GetLiveOutRegNO(); + if (liveIn.find(reg) != liveIn.end() || + liveOut.find(reg) != liveOut.end()) { + return true; + } + + return false; +} + +/* Return true if reg is already to be saved in its dominator list */ +bool AArch64RegSavesOpt::AlreadySavedInDominatorList(BB *bb, regno_t reg) const { + BB *aBB = GetDomInfo()->GetDom(bb->GetId()); + + while (aBB->GetPreds().empty()) { /* can't go beyond prolog */ + if (CheckCriteria(aBB, reg)) { + return true; /* previously saved, inspect next reg */ + } + aBB = GetDomInfo()->GetDom(aBB->GetId()); + } + return false; /* not previously saved, to save at bb */ +} + +/* Determine callee-save regs save locations and record them in bbSavedRegs. + Save is needed for a 1st def callee-save register at its dominator block + outside any loop. */ +void AArch64RegSavesOpt::DetermineCalleeSaveLocations() { + for (auto *bb : bfs->sortedBBs) { + if (RS_DUMP) { + LogInfo::MapleLogger() << "BB: " << bb->GetId() << "\n"; + } + CalleeBitsType c = GetBBCalleeBits(GetCalleeBitsDef(), bb->GetId()); + CalleeBitsType mask = 1; + if (c == 0) { + continue; + } + for (int i=0; i<(sizeof(CalleeBitsType)<<3); i++) { + if (c & mask) { + MapleSet &liveIn = bb->GetLiveInRegNO(); + regno_t reg = ReverseRegBitMap(i); + if (liveIn.find(reg) == liveIn.end()) { /* not livein */ +#if 0 + BB* bbDom = GetDomInfo()->GetDom(bb->GetId()); +#else + BB* bbDom = bb; +#endif + bool done = false; + while (bbDom->GetLoop() != nullptr) { + bbDom = GetDomInfo()->GetDom(bbDom->GetId()); + if (CheckCriteria(bbDom, reg)) { + done = true; + break; + } + ASSERT(bbDom, "Can't find dominator for save location"); + } + if (done) { + mask <<= 1; + continue; + } + /* Check if a dominator of ddDom was already a location to save */ + if (AlreadySavedInDominatorList(bbDom, reg)) { + mask <<= 1; + continue; /* no need to save again, next reg */ + } + uint32 bid = bbDom->GetId(); + if (RS_DUMP) { + LogInfo::MapleLogger() << "R" << reg - 1; + LogInfo::MapleLogger() << " dominated by BB" << bid << "\n"; + } + SavedRegInfo *ctx = bbSavedRegs[bid]; + if (ctx == nullptr) { + ctx = memPool->New(alloc); + bbSavedRegs[bid] = ctx; + } + if (!bbSavedRegs[bid]->ContainSaveReg(reg)) { + bbSavedRegs[bid]->InsertSaveReg(reg); + } + } + } + mask <<= 1; + CalleeBitsType t = c; + if ((t >> 1) == 0) { + break; /* short cut */ + } + } + } +} + +/* Determine calleesave regs restore locations by calling ssu-pre, + previous bbSavedRegs memory is cleared and restore locs recorded in it */ +void AArch64RegSavesOpt::DetermineCalleeRestoreLocations() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + MapleAllocator sprealloc(memPool); + const MapleVector &callees = static_cast(cgFunc)->GetCalleeSavedRegs(); + for (auto reg : callees) { + if (reg >= R29) { + continue; /* save/restore in prologue, epilogue */ + } + SPreWorkCand wkCand(&sprealloc); + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + /* Set the saved BB locations of this callee-saved register */ + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr) { + if (sp->ContainSaveReg(reg)) { + wkCand.saveBBs.insert(bid); + } + } + /* Set the BB occurrences of this callee-saved register */ + if (IsCalleeBitSet(GetCalleeBitsDef(), bid, reg) || + IsCalleeBitSet(GetCalleeBitsUse(), bid, reg)) { + wkCand.occBBs.insert(bid); + } + } + DoRestorePlacementOpt(cgFunc, GetPostDomInfo(), &wkCand); + if (wkCand.restoreAtEpilog) { + /* Restore cannot be applied, skip this reg and place save/restore + in prolog/epilog */ + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr && !sp->GetSaveSet().empty()) { + if (sp->ContainSaveReg(reg)) { + sp->RemoveSaveReg(reg); + } + } + } + aarchCGFunc->GetProEpilogSavedRegs().push_back(reg); + if (RS_DUMP) { + LogInfo::MapleLogger() << "Restore R" << reg - 1 << " n/a, do in Pro/Epilog\n"; + } + continue; + } + if (!wkCand.restoreAtEntryBBs.empty() || !wkCand.restoreAtExitBBs.empty()) { + for (uint32 entBB : wkCand.restoreAtEntryBBs) { + if (RS_DUMP) { + std::string r = reg <= R28 ? "r" : "v"; + LogInfo::MapleLogger() << "BB " << entBB << " restore: " << r << reg - 1 << "\n"; + } + if (bbSavedRegs[entBB] == nullptr) { + bbSavedRegs[entBB] = memPool->New(alloc); + } + bbSavedRegs[entBB]->InsertEntryReg(reg); + } + bool done = false; + for (uint32 exitBB : wkCand.restoreAtExitBBs) { + for (BB *bb : bfs->sortedBBs) { + if (bb->GetId() == exitBB) { + if (bb->GetKind() == BB::kBBIgoto) { + CHECK_FATAL(false, "igoto detected"); +#if 0 + } else if (bb->GetKind() == BB::kBBIf) { +#else + } else if (bb->GetSuccs().size() > 1) { +#endif + for (BB *sbb : bb->GetSuccs()) { + if (sbb->GetPreds().size() > 1) { + CHECK_FATAL(false, "critical edge detected"); + } + } + for (BB *sbb : bb->GetSuccs()) { + if (bbSavedRegs[sbb->GetId()] == nullptr) { + bbSavedRegs[sbb->GetId()] = memPool->New(alloc); + } + bbSavedRegs[sbb->GetId()]->InsertEntryReg(reg); /* insert at both succs */ + } + done = true; + break; /* break out of the sortedBBs loop */ + } + } + } + if (!done) { + if (bbSavedRegs[exitBB] == nullptr) { + bbSavedRegs[exitBB] = memPool->New(alloc); + } + bbSavedRegs[exitBB]->InsertExitReg(reg); + } + } + } + } +} + +int32 AArch64RegSavesOpt::FindNextOffsetForCalleeSave() { + int32 offset = + static_cast(cgFunc->GetMemlayout())-> + RealStackFrameSize() - + (static_cast(cgFunc)->SizeOfCalleeSaved() - + (kDivide2 * kIntregBytelen) /* FP/LR */) - + cgFunc->GetMemlayout()->SizeOfArgsToStackPass(); + + if (cgFunc->GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc->GetMemlayout()); + int saveareasize = RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize); + offset -= saveareasize; + } + return offset; +} + +void AArch64RegSavesOpt::InsertCalleeSaveCode() { + int bid = 0; + BB *saveBB = cgFunc->GetCurBB(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + int32 offset = FindNextOffsetForCalleeSave(); +#if 1 + offset += (aarchCGFunc->GetProEpilogSavedRegs().size() - 2) << 3; // 2 for R29,RLR 3 for 8 bytes +#endif + for (BB *bb : bfs->sortedBBs) { + bid = bb->GetId(); + aarchCGFunc->SetSplitBaseOffset(0); + if (bbSavedRegs[bid] != nullptr && !bbSavedRegs[bid]->GetSaveSet().empty()) { + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + for (auto areg : bbSavedRegs[bid]->GetSaveSet()) { + AArch64reg reg = static_cast(areg); + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) + ? intRegFirstHalf : fpRegFirstHalf; + /* If reg not seen before, record offset and then update */ + if (regOffset.find(areg) == regOffset.end()) { + regOffset[areg] = offset; + offset += kIntregBytelen; + } + if (firstHalf == kRinvalid) { + /* 1st half in reg pair */ + firstHalf = reg; + } else { + /* Pair reg! Use of offset of the 1st half */ + AArch64GenProEpilog::AppendInstructionPushPair(*cgFunc, firstHalf, reg, regType, regOffset[firstHalf]); + firstHalf = kRinvalid; + } + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + LogInfo::MapleLogger() << r << reg-1 << " save in BB " << bid << "\n"; + LogInfo::MapleLogger() << "Offset = " << regOffset[areg]<< "\n"; + } + } + + if (intRegFirstHalf != kRinvalid) { + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, intRegFirstHalf, kRegTyInt, regOffset[intRegFirstHalf]); + } + + if(fpRegFirstHalf != kRinvalid) { + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, fpRegFirstHalf, kRegTyFloat, regOffset[fpRegFirstHalf]); + } + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + } + } + cgFunc->SetCurBB(*saveBB); +} + +void AArch64RegSavesOpt::InsertCalleeRestoreCode() { + int bid = 0; + BB *saveBB = cgFunc->GetCurBB(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + int32 offset = FindNextOffsetForCalleeSave(); + for (BB *bb : bfs->sortedBBs) { + bid = bb->GetId(); + aarchCGFunc->SetSplitBaseOffset(0); + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr) { + if (sp->GetEntrySet().empty() && sp->GetExitSet().empty()) { + continue; + } + + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + for (auto areg : sp->GetEntrySet()) { + AArch64reg reg = static_cast(areg); + offset = regOffset[areg]; + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + LogInfo::MapleLogger() << r << reg-1 << " entry restore in BB " << bid << "\n"; + LogInfo::MapleLogger() << "Saved Offset = " << offset << "\n"; + } + + /* restore is always single from saved offset */ + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64GenProEpilog::AppendInstructionPopSingle(*cgFunc, reg, regType, offset); + } + FOR_BB_INSNS(insn, aarchCGFunc->GetDummyBB()) { + insn->SetDoNotRemove(true); /* do not let ebo remove these restores */ + } + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + for (auto areg : sp->GetExitSet()) { + AArch64reg reg = static_cast(areg); + offset = regOffset[areg]; + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + LogInfo::MapleLogger() << r << reg-1 << " exit restore in BB " << bid << "\n"; + LogInfo::MapleLogger() << "Saved Offset = " << offset << "\n"; + } + + /* restore is always single from saved offset */ + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64GenProEpilog::AppendInstructionPopSingle(*cgFunc, reg, regType, offset); + } + FOR_BB_INSNS(insn, aarchCGFunc->GetDummyBB()) { + insn->SetDoNotRemove(true); + } +#if 0 + if (bb->GetKind() == BB::kBBIf || bb->GetKind() == BB::kBBGoto || + bb->GetKind() == BB::kBBIgoto) +#else + if ((bb->GetKind() == BB::kBBIf && bb->GetSuccs().size() <= 1) || + bb->GetKind() == BB::kBBGoto) { +#endif + bb->InsertAtEndMinus1(*aarchCGFunc->GetDummyBB()); + } else { + bb->InsertAtEnd(*aarchCGFunc->GetDummyBB()); + } + } + } + cgFunc->SetCurBB(*saveBB); +} + +/* Callee-save registers save/restore placement optimization */ +void AArch64RegSavesOpt::Run() { + DotGenerator::GenerateDot("SR", *cgFunc, cgFunc->GetMirModule(), true, cgFunc->GetName()); + if (Globals::GetInstance()->GetOptimLevel() <= 1) { + return; + } + + Bfs localBfs(*cgFunc, *memPool); + bfs = &localBfs; + bfs->ComputeBlockOrder(); + if (RS_DUMP) { + PrintBBs(); + } + + /* Determined 1st def and last use of all callee-saved registers used + for all BBs */ + InitData(); + GetLocalDefUse(); + + /* Determine save sites at dominators of 1st def with no live-in and + not within loop */ + DetermineCalleeSaveLocations(); + + /* Determine restore sites */ + DetermineCalleeRestoreLocations(); + + /* Generate callee save instrs at found sites */ + InsertCalleeSaveCode(); + + /* Generate callee restores at found sites */ + InsertCalleeRestoreCode(); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/aarch64/yy b/src/mapleall/maple_be/src/cg/aarch64/yy new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/mapleall/maple_be/src/cg/cg_option.cpp+ b/src/mapleall/maple_be/src/cg/cg_option.cpp+ new file mode 100644 index 0000000000000000000000000000000000000000..1ce690c57b4a338e0d80f1565322aa5ae13e11cc --- /dev/null +++ b/src/mapleall/maple_be/src/cg/cg_option.cpp+ @@ -0,0 +1,1595 @@ +/* + * Copyright (c) [2020-2021] Huawei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "cg_option.h" +#include +#include +#include +#include "mpl_logging.h" +#include "parser_opt.h" +#include "mir_parser.h" +#include "option_parser.h" +#include "string_utils.h" + +namespace maplebe { +using namespace maple; +using namespace mapleOption; + +const std::string kMplcgVersion = ""; + +bool CGOptions::dumpBefore = false; +bool CGOptions::dumpAfter = false; +bool CGOptions::timePhases = false; +std::unordered_set CGOptions::dumpPhases = {}; +std::unordered_set CGOptions::skipPhases = {}; +std::unordered_map> CGOptions::cyclePatternMap = {}; +std::string CGOptions::skipFrom = ""; +std::string CGOptions::skipAfter = ""; +std::string CGOptions::dumpFunc = "*"; +std::string CGOptions::globalVarProfile = ""; +std::string CGOptions::profileData = ""; +std::string CGOptions::profileFuncData = ""; +std::string CGOptions::profileClassData = ""; +#ifdef TARGARM32 +std::string CGOptions::duplicateAsmFile = ""; +#else +std::string CGOptions::duplicateAsmFile = "maple/mrt/codetricks/arch/arm64/duplicateFunc.s"; +#endif +Range CGOptions::range = Range(); +std::string CGOptions::fastFuncsAsmFile = ""; +Range CGOptions::spillRanges = Range(); +uint8 CGOptions::fastAllocMode = 0; /* 0: fast, 1: spill all */ +bool CGOptions::fastAlloc = false; +uint64 CGOptions::lsraBBOptSize = 150000; +uint64 CGOptions::lsraInsnOptSize = 200000; +uint64 CGOptions::overlapNum = 28; +#if TARGAARCH64 || TARGRISCV64 +bool CGOptions::useBarriersForVolatile = false; +#else +bool CGOptions::useBarriersForVolatile = true; +#endif +bool CGOptions::exclusiveEH = false; +bool CGOptions::doEBO = false; +bool CGOptions::doCFGO = false; +bool CGOptions::doICO = false; +bool CGOptions::doStoreLoadOpt = false; +bool CGOptions::doGlobalOpt = false; +bool CGOptions::doVregRename = false; +bool CGOptions::doMultiPassColorRA = true; +bool CGOptions::doPrePeephole = false; +bool CGOptions::doPeephole = false; +bool CGOptions::doSchedule = false; +bool CGOptions::doWriteRefFieldOpt = false; +bool CGOptions::dumpOptimizeCommonLog = false; +bool CGOptions::checkArrayStore = false; +bool CGOptions::doPIC = false; +bool CGOptions::noDupBB = false; +bool CGOptions::noCalleeCFI = true; +bool CGOptions::emitCyclePattern = false; +bool CGOptions::insertYieldPoint = false; +bool CGOptions::mapleLinker = false; +bool CGOptions::printFunction = false; +bool CGOptions::nativeOpt = false; +bool CGOptions::lazyBinding = false; +bool CGOptions::hotFix = false; +bool CGOptions::debugSched = false; +bool CGOptions::bruteForceSched = false; +bool CGOptions::simulateSched = false; +CGOptions::ABIType CGOptions::abiType = kABIHard; +CGOptions::EmitFileType CGOptions::emitFileType = kAsm; +bool CGOptions::genLongCalls = false; +bool CGOptions::functionSections = false; +bool CGOptions::useFramePointer = false; +bool CGOptions::gcOnly = false; +bool CGOptions::quiet = false; +bool CGOptions::doPatchLongBranch = false; +bool CGOptions::doPreSchedule = false; +bool CGOptions::emitBlockMarker = true; +bool CGOptions::inRange = false; +bool CGOptions::doPreLSRAOpt = false; +bool CGOptions::doLocalRefSpill = false; +bool CGOptions::doCalleeToSpill = false; +bool CGOptions::replaceASM = false; +bool CGOptions::generalRegOnly = false; +bool CGOptions::fastMath = false; + +enum OptionIndex : uint64 { + kCGQuiet = kCommonOptionEnd + 1, + kPie, + kPic, + kCGVerbose, + kCGVerboseCG, + kCGMapleLinker, + kCgen, + kEbo, + kCfgo, + kIco, + kSlo, + kGo, + kPreLSRAOpt, + kLocalrefSpill, + kOptCallee, + kPrepeep, + kPeep, + kPreSchedule, + kSchedule, + kVregRename, + kMultiPassRA, + kWriteRefFieldOpt, + kDumpOlog, + kCGNativeOpt, + kInsertCall, + kTrace, + kCGClassList, + kGenDef, + kGenGctib, + kCGBarrier, + kGenPrimorList, + kRaLinear, + kRaColor, + kPatchBranch, + kConstFoldOpt, + kSuppressFinfo, + kEhList, + kObjMap, + kCGDumpcfg, + kCGDumpBefore, + kCGDumpAfter, + kCGTimePhases, + kCGDumpFunc, + kDebuggingInfo, + kStackGuard, + kDebugGenDwarf, + kDebugUseSrc, + kDebugUseMix, + kDebugAsmMix, + kProfilingInfo, + kProfileEnable, + kLSRABB, + kLSRAInsn, + kLSRAOverlap, + kCGO0, + kCGO1, + kCGO2, + kProepilogue, + kYieldPoing, + kLocalRc, + kCGRange, + kFastAlloc, + kSpillRange, + kDuplicateBB, + kCalleeCFI, + kCyclePatternList, + kDuplicateToDelPlt, + kDuplicateToDelPlt2, + kReplaceAsm, + kUseGeneralRegOnly, + kEmitBlockMarker, + kInsertSoe, + kCheckArrayStore, + kPrintFunction, + kCGDumpPhases, + kCGSkipPhases, + kCGSkipFrom, + kCGSkipAfter, + kCGLazyBinding, + kCGHotFix, + kDebugSched, + kBruteForceSched, + kSimulateSched, + kCrossLoc, + kABIType, + kEmitFileType, + kLongCalls, + kFunctionSections, + kOmitFramePointer, + kFastMath, +}; + +const Descriptor kUsage[] = { + { kPie, + kEnable, + "", + "pie", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --pie \tGenerate position-independent executable\n" + " --no-pie\n", + "mplcg", + {} }, + { kPic, + kEnable, + "", + "fpic", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --fpic \tGenerate position-independent shared library\n" + " --no-fpic\n", + "mplcg", + {} }, + { kCGVerbose, + kEnable, + "", + "verbose-asm", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --verbose-asm \tAdd comments to asm output\n" + " --no-verbose-asm\n", + "mplcg", + {} }, + { kCGVerboseCG, + kEnable, + "", + "verbose-cg", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --verbose-cg \tAdd comments to cg output\n" + " --no-verbose-cg\n", + "mplcg", + {} }, + { kCGMapleLinker, + kEnable, + "", + "maplelinker", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --maplelinker \tGenerate the MapleLinker .s format\n" + " --no-maplelinker\n", + "mplcg", + {} }, + { kCGQuiet, + kEnable, + "", + "quiet", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --quiet \tBe quiet (don't output debug messages)\n" + " --no-quiet\n", + "mplcg", + {} }, + { kCgen, + kEnable, + "", + "cg", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --cg \tGenerate the output .s file\n" + " --no-cg\n", + "mplcg", + {} }, + { kReplaceAsm, + kEnable, + "", + "replaceasm", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --replaceasm \tReplace the the assembly code\n" + " --no-replaceasm\n", + "mplcg", + {} }, + { kUseGeneralRegOnly, + kEnable, + "", + "general-reg-only", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --general-reg-only \tdisable floating-point or Advanced SIMD registers\n" + " --no-general-reg-only\n", + "mplcg", + {} }, + { kCGLazyBinding, + kEnable, + "", + "lazy-binding", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --lazy-binding \tBind class symbols lazily[default off]\n", + "mplcg", + {} }, + { kCGHotFix, + kEnable, + "", + "hot-fix", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --hot-fix \tOpen for App hot fix[default off]\n" + " --no-hot-fix\n", + "mplcg", + {} }, + { kEbo, + kEnable, + "", + "ebo", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --ebo \tPerform Extend block optimization\n" + " --no-ebo\n", + "mplcg", + {} }, + { kCfgo, + kEnable, + "", + "cfgo", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --cfgo \tPerform control flow optimization\n" + " --no-cfgo\n", + "mplcg", + {} }, + { kIco, + kEnable, + "", + "ico", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --ico \tPerform if-conversion optimization\n" + " --no-ico\n", + "mplcg", + {} }, + { kSlo, + kEnable, + "", + "storeloadopt", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --storeloadopt \tPerform global store-load optimization\n" + " --no-storeloadopt\n", + "mplcg", + {} }, + { kGo, + kEnable, + "", + "globalopt", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --globalopt \tPerform global optimization\n" + " --no-globalopt\n", + "mplcg", + {} }, + { kPreLSRAOpt, + kEnable, + "", + "prelsra", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --prelsra \tPerform live interval simplification in LSRA\n" + " --no-prelsra\n", + "mplcg", + {} }, + { kLocalrefSpill, + kEnable, + "", + "lsra-lvarspill", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --lsra-lvarspill \tPerform LSRA spill using local ref var stack locations\n" + " --no-lsra-lvarspill\n", + "mplcg", + {} }, + { kOptCallee, + kEnable, + "", + "lsra-optcallee", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --lsra-optcallee \tSpill callee if only one def to use\n" + " --no-lsra-optcallee\n", + "mplcg", + {} }, + { kPrepeep, + kEnable, + "", + "prepeep", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --prepeep \tPerform peephole optimization before RA\n" + " --no-prepeep\n", + "mplcg", + {} }, + { kPeep, + kEnable, + "", + "peep", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --peep \tPerform peephole optimization after RA\n" + " --no-peep\n", + "mplcg", + {} }, + { kPreSchedule, + kEnable, + "", + "preschedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --preschedule \tPerform prescheduling\n" + " --no-preschedule\n", + "mplcg", + {} }, + { kSchedule, + kEnable, + "", + "schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --schedule \tPerform scheduling\n" + " --no-schedule\n", + "mplcg", + {} }, + { kVregRename, + kEnable, + "", + "vreg-rename", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --vreg-rename \tPerform rename of long live range around loops in coloring RA\n" + " --no-vreg-rename\n", + "mplcg", + {} }, + { kMultiPassRA, + kEnable, + "", + "fullcolor", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --fullcolor \tPerform multi-pass coloring RA\n" + " --no-fullcolor\n", + "mplcg", + {} }, + { kWriteRefFieldOpt, + kEnable, + "", + "writefieldopt", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --writefieldopt \tPerform WriteRefFieldOpt\n" + " --no-writefieldopt\n", + "mplcg", + {} }, + { kDumpOlog, + kEnable, + "", + "dump-olog", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --dump-olog \tDump CFGO and ICO debug information\n" + " --no-dump-olog\n", + "mplcg", + {} }, + { kCGNativeOpt, + kEnable, + "", + "nativeopt", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --nativeopt \tEnable native opt\n" + " --no-nativeopt\n", + "mplcg", + {} }, + { kObjMap, + kEnable, + "", + "objmap", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --objmap \tCreate object maps (GCTIBs) inside the main output (.s) file\n" + " --no-objmap\n", + "mplcg", + {} }, + { kYieldPoing, + kEnable, + "", + "yieldpoint", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --yieldpoint \tGenerate yieldpoints [default]\n" + " --no-yieldpoint\n", + "mplcg", + {} }, + { kProepilogue, + kEnable, + "", + "proepilogue", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --proepilogue \tDo tail call optimization and eliminate unnecessary prologue and epilogue.\n" + " --no-proepilogue\n", + "mplcg", + {} }, + { kLocalRc, + kEnable, + "", + "local-rc", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --local-rc \tHandle Local Stack RC [default]\n" + " --no-local-rc\n", + "mplcg", + {} }, + { kInsertCall, + 0, + "", + "insert-call", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --insert-call=name \tInsert a call to the named function\n", + "mplcg", + {} }, + { kTrace, + 0, + "", + "add-debug-trace", + kBuildTypeProduct, + kArgCheckPolicyNone, + " --add-debug-trace \tInstrument the output .s file to print call traces at runtime\n", + "mplcg", + {} }, + { kProfileEnable, + 0, + "", + "add-func-profile", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --add-func-profile \tInstrument the output .s file to record func at runtime\n", + "mplcg", + {} }, + { kCGClassList, + 0, + "", + "class-list-file", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --class-list-file \tSet the class list file for the following generation options,\n" + " \tif not given, generate for all visible classes\n" + " \t--class-list-file=class_list_file\n", + "mplcg", + {} }, + { kGenDef, + kEnable, + "", + "gen-c-macro-def", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --gen-c-macro-def \tGenerate a .def file that contains extra type metadata, including the\n" + " \tclass instance sizes and field offsets (default)\n" + " --no-gen-c-macro-def\n", + "mplcg", + {} }, + { kGenGctib, + kEnable, + "", + "gen-gctib-file", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --gen-gctib-file \tGenerate a separate .s file for GCTIBs. Usually used together with\n" + " \t--no-objmap (not implemented yet)\n" + " --no-gen-gctib-file\n", + "mplcg", + {} }, + { kStackGuard, + kEnable, + "", + "stackguard", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " -stackguard \tadd stack guard\n" + " -no-stackguard\n", + "mplcg", + {} }, + { kDebuggingInfo, + 0, + "g", + "", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " -g \tGenerate debug information\n", + "mplcg", + {} }, + { kDebugGenDwarf, + 0, + "", + "gdwarf", + kBuildTypeProduct, + kArgCheckPolicyNone, + " --gdwarf \tGenerate dwarf infomation\n", + "mplcg", + {} }, + { kDebugUseSrc, + 0, + "", + "gsrc", + kBuildTypeProduct, + kArgCheckPolicyNone, + " --gsrc \tUse original source file instead of mpl file for debugging\n", + "mplcg", + {} }, + { kDebugUseMix, + 0, + "", + "gmixedsrc", + kBuildTypeProduct, + kArgCheckPolicyNone, + " --gmixedsrc \tUse both original source file and mpl file for debugging\n", + "mplcg", + {} }, + { kDebugAsmMix, + 0, + "", + "gmixedasm", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --gmixedasm \tComment out both original source file and mpl file for debugging\n", + "mplcg", + {} }, + { kProfilingInfo, + 0, + "p", + "", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " -p \tGenerate profiling infomation\n", + "mplcg", + {} }, + { kRaLinear, + 0, + "", + "with-ra-linear-scan", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --with-ra-linear-scan \tDo linear-scan register allocation\n", + "mplcg", + {} }, + { kRaColor, + 0, + "", + "with-ra-graph-color", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --with-ra-graph-color \tDo coloring-based register allocation\n", + "mplcg", + {} }, + { kPatchBranch, + 0, + "", + "patch-long-branch", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --patch-long-branch \tEnable patching long distance branch with jumping pad\n", + "mplcg", + {} }, + { kConstFoldOpt, + 0, + "", + "const-fold", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --const-fold \tEnable constant folding\n", + "mplcg", + {} }, + { kEhList, + 0, + "", + "eh-exclusive-list", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --eh-exclusive-list \tFor generating gold files in unit testing\n" + " \t--eh-exclusive-list=list_file\n", + "mplcg", + {} }, + { kCGO0, + 0, + "", + "O0", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " -O0 \tNo optimization.\n", + "mplcg", + {} }, + { kCGO1, + 0, + "", + "O1", + kBuildTypeExperimental, + kArgCheckPolicyOptional, + " -O1 \tDo some optimization.\n", + "mplcg", + {} }, + { kCGO2, + 0, + "", + "O2", + kBuildTypeProduct, + kArgCheckPolicyOptional, + " -O2 \tDo some optimization.\n", + "mplcg", + {} }, + { kLSRABB, + 0, + "", + "lsra-bb", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --lsra-bb=NUM \tSwitch to spill mode if number of bb in function exceeds NUM\n", + "mplcg", + {} }, + { kLSRAInsn, + 0, + "", + "lsra-insn", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --lsra-insn=NUM \tSwitch to spill mode if number of instructons in function exceeds NUM\n", + "mplcg", + {} }, + { kLSRAOverlap, + 0, + "", + "lsra-overlap", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --lsra-overlap=NUM \toverlap NUM to decide pre spill in lsra\n", + "mplcg", + {} }, + { kSuppressFinfo, + 0, + "", + "suppress-fileinfo", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --suppress-fileinfo \tFor generating gold files in unit testing\n", + "mplcg", + {} }, + { kCGDumpcfg, + 0, + "", + "dump-cfg", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --dump-cfg\n", + "mplcg", + {} }, + { kCGDumpPhases, + 0, + "", + "dump-phases", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --dump-phases=PHASENAME,... \tEnable debug trace for specified phases in the comma separated list\n", + "mplcg", + {} }, + { kCGSkipPhases, + 0, + "", + "skip-phases", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --skip-phases=PHASENAME,... \tSkip the phases specified in the comma separated list\n", + "mplcg", + {} }, + { kCGSkipFrom, + 0, + "", + "skip-from", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --skip-from=PHASENAME \tSkip the rest phases from PHASENAME(included)\n", + "mplcg", + {} }, + { kCGSkipAfter, + 0, + "", + "skip-after", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --skip-after=PHASENAME \tSkip the rest phases after PHASENAME(excluded)\n", + "mplcg", + {} }, + { kCGDumpFunc, + 0, + "", + "dump-func", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --dump-func=FUNCNAME \tDump/trace only for functions whose names contain FUNCNAME as substring\n" + " \t(can only specify once)\n", + "mplcg", + {} }, + { kCGDumpBefore, + kEnable, + "", + "dump-before", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --dump-before \tDo extra IR dump before the specified phase\n" + " --no-dump-before \tDon't extra IR dump before the specified phase\n", + "mplcg", + {} }, + { kCGDumpAfter, + kEnable, + "", + "dump-after", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --dump-after \tDo extra IR dump after the specified phase\n" + " --no-dump-after \tDon't extra IR dump after the specified phase\n", + "mplcg", + {} }, + { kCGTimePhases, + kEnable, + "", + "time-phases", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --time-phases \tCollect compilation time stats for each phase\n" + " --no-time-phases \tDon't Collect compilation time stats for each phase\n", + "mplcg", + {} }, + { kCGBarrier, + kEnable, + "", + "use-barriers-for-volatile", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --use-barriers-for-volatile \tOptimize volatile load/str\n" + " --no-use-barriers-for-volatile\n", + "mplcg", + {} }, + { kCGRange, + 0, + "", + "range", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --range=NUM0,NUM1 \tOptimize only functions in the range [NUM0, NUM1]\n", + "mplcg", + {} }, + { kFastAlloc, + 0, + "", + "fast-alloc", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --fast-alloc=[0/1] \tO2 RA fast mode, set to 1 to spill all registers\n", + "mplcg", + {} }, + { kSpillRange, + 0, + "", + "spill_range", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --spill_range=NUM0,NUM1 \tO2 RA spill registers in the range [NUM0, NUM1]\n", + "mplcg", + {} }, + { kDuplicateBB, + kEnable, + "", + "dup-bb", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --dup-bb \tAllow cfg optimizer to duplicate bb\n" + " --no-dup-bb \tDon't allow cfg optimizer to duplicate bb\n", + "mplcg", + {} }, + { kCalleeCFI, + kEnable, + "", + "callee-cfi", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --callee-cfi \tcallee cfi message will be generated\n" + " --no-callee-cfi \tcallee cfi message will not be generated\n", + "mplcg", + {} }, + { kPrintFunction, + kEnable, + "", + "print-func", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --print-func\n" + " --no-print-func\n", + "mplcg", + {} }, + { kCyclePatternList, + 0, + "", + "cycle-pattern-list", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --cycle-pattern-list \tFor generating cycle pattern meta\n" + " \t--cycle-pattern-list=list_file\n", + "mplcg", + {} }, + { kDuplicateToDelPlt, + 0, + "", + "duplicate_asm_list", + kBuildTypeProduct, + kArgCheckPolicyRequired, + " --duplicate_asm_list \tDuplicate asm functions to delete plt call\n" + " \t--duplicate_asm_list=list_file\n", + "mplcg", + {} }, + { kDuplicateToDelPlt2, + 0, + "", + "duplicate_asm_list2", + kBuildTypeProduct, + kArgCheckPolicyRequired, + " --duplicate_asm_list2 \tDuplicate more asm functions to delete plt call\n" + " \t--duplicate_asm_list2=list_file\n", + "mplcg", + {} }, + { kEmitBlockMarker, + 0, + "", + "block-marker", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --block-marker \tEmit block marker symbols in emitted assembly files\n", + "mplcg", + {} }, + { kInsertSoe, + 0, + "", + "soe-check", + kBuildTypeExperimental, + kArgCheckPolicyNone, + " --soe-check \tInsert a soe check instruction[default off]\n", + "mplcg", + {} }, + { kCheckArrayStore, + kEnable, + "", + "check-arraystore", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --check-arraystore \tcheck arraystore exception[default off]\n" + " --no-check-arraystore\n", + "mplcg", + {} }, + { kDebugSched, + kEnable, + "", + "debug-schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --debug-schedule \tdump scheduling information\n" + " --no-debug-schedule\n", + "mplcg", + {} }, + { kBruteForceSched, + kEnable, + "", + "bruteforce-schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --bruteforce-schedule \tdo brute force schedule\n" + " --no-bruteforce-schedule\n", + "mplcg", + {} }, + { kSimulateSched, + kEnable, + "", + "simulate-schedule", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --simulate-schedule \tdo simulate schedule\n" + " --no-simulate-schedule\n", + "mplcg", + {} }, + { kCrossLoc, + kEnable, + "", + "cross-loc", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --cross-loc \tcross loc insn schedule\n" + " --no-cross-loc\n", + "mplcg", + {} }, + { kABIType, + 0, + "", + "float-abi", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --float-abi=name \tPrint the abi type.\n" + " \tname=hard: abi-hard (Default)\n" + " \tname=soft: abi-soft\n" + " \tname=softfp: abi-softfp\n", + "mplcg", + {} }, + { kEmitFileType, + 0, + "", + "filetype", + kBuildTypeExperimental, + kArgCheckPolicyRequired, + " --filetype=name \tChoose a file type.\n" + " \tname=asm: Emit an assembly file (Default)\n" + " \tname=obj: Emit an object file\n" + " \tname=null: not support yet\n", + "mplcg", + {} }, + { kLongCalls, + kEnable, + "", + "long-calls", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --long-calls \tgenerate long call\n" + " --no-long-calls\n", + "mplcg", + {} }, + { kFunctionSections, + kEnable, + "", + "function-sections", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --function-sections \t \n" + " --no-function-sections\n", + "mplcg", + {} }, + { kOmitFramePointer, + kEnable, + "", + "omit-frame-pointer", + kBuildTypeProduct, + kArgCheckPolicyBool, + " --omit-frame-pointer \t do not use frame pointer \n" + " --no-omit-frame-pointer\n", + "mplcg", + {} }, + { kFastMath, + kEnable, + "", + "fast-math", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --fast-math \tPerform fast math\n" + " --no-fast-math\n", + "mplcg", + {} }, +// End + { kUnknown, + 0, + "", + "", + kBuildTypeAll, + kArgCheckPolicyNone, + "", + "mplcg", + {} } +}; + +CGOptions &CGOptions::GetInstance() { + static CGOptions instance; + return instance; +} + +CGOptions::CGOptions() { + CreateUsages(kUsage); +} + +void CGOptions::DecideMplcgRealLevel(const std::vector &inputOptions, bool isDebug) { + int realLevel = -1; + for (const mapleOption::Option &opt : inputOptions) { + switch (opt.Index()) { + case kCGO0: + realLevel = CGOptions::kLevel0; + break; + case kCGO1: + realLevel = CGOptions::kLevel1; + break; + case kCGO2: + realLevel = CGOptions::kLevel2; + break; + default: + break; + } + } + if (isDebug) { + LogInfo::MapleLogger() << "Real Mplcg level:" << std::to_string(realLevel) << "\n"; + } + if (realLevel == CGOptions::kLevel0) { + EnableO0(); + } else if (realLevel == CGOptions::kLevel1) { + EnableO1(); + } else if (realLevel == CGOptions::kLevel2) { + EnableO2(); + } +} + +bool CGOptions::SolveOptions(const std::vector