From 38c8fbd63202e7c37343c745a58e47f2e3bd5297 Mon Sep 17 00:00:00 2001 From: Pavel Kopyl Date: Tue, 29 Oct 2024 12:41:13 +0100 Subject: [PATCH] [EVM] Support commutable operations, re-enable inlining and add some NFC fixes. --- llvm/lib/Target/EVM/CMakeLists.txt | 2 +- llvm/lib/Target/EVM/EVM.h | 4 +- llvm/lib/Target/EVM/EVMAssembly.cpp | 16 +- llvm/lib/Target/EVM/EVMAssembly.h | 17 +-- ... EVMBackwardPropagationStackification.cpp} | 41 ++++-- llvm/lib/Target/EVM/EVMControlFlowGraph.h | 24 +-- .../Target/EVM/EVMControlFlowGraphBuilder.cpp | 68 +++++---- .../Target/EVM/EVMControlFlowGraphBuilder.h | 9 +- llvm/lib/Target/EVM/EVMISelDAGToDAG.cpp | 2 +- llvm/lib/Target/EVM/EVMISelLowering.cpp | 4 +- llvm/lib/Target/EVM/EVMInstrInfo.td | 24 +-- llvm/lib/Target/EVM/EVMLinkRuntime.cpp | 8 - .../Target/EVM/EVMOptimizedCodeTransform.cpp | 137 +++++++++++++----- .../Target/EVM/EVMOptimizedCodeTransform.h | 19 ++- llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp | 3 +- llvm/lib/Target/EVM/EVMStackDebug.cpp | 3 +- llvm/lib/Target/EVM/EVMStackDebug.h | 2 +- .../Target/EVM/EVMStackLayoutGenerator.cpp | 107 ++++++-------- llvm/lib/Target/EVM/EVMStackLayoutGenerator.h | 12 +- llvm/lib/Target/EVM/EVMStackShuffler.h | 18 ++- llvm/lib/Target/EVM/EVMTargetMachine.cpp | 4 +- llvm/lib/Target/EVM/EVMTargetTransformInfo.h | 5 +- llvm/test/CodeGen/EVM/stack-ops-commutable.ll | 137 ++++++++++++++++-- llvm/test/CodeGen/EVM/stack-ops.ll | 20 +-- .../CodeGen/EVM/unused_function_arguments.ll | 4 +- .../Generic/2008-08-07-PtrToInt-SmallerInt.ll | 4 +- 26 files changed, 435 insertions(+), 259 deletions(-) rename llvm/lib/Target/EVM/{EVMStackifyEF.cpp => EVMBackwardPropagationStackification.cpp} (54%) diff --git a/llvm/lib/Target/EVM/CMakeLists.txt b/llvm/lib/Target/EVM/CMakeLists.txt index 5a523008aea2..5f9d00e568c4 100644 --- a/llvm/lib/Target/EVM/CMakeLists.txt +++ b/llvm/lib/Target/EVM/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(EVMCodeGen EVMArgumentMove.cpp EVMAsmPrinter.cpp EVMAssembly.cpp + EVMBackwardPropagationStackification.cpp EVMCodegenPrepare.cpp EVMControlFlowGraphBuilder.cpp EVMFrameLowering.cpp @@ -40,7 +41,6 @@ add_llvm_target(EVMCodeGen EVMStackDebug.cpp EVMStackLayoutGenerator.cpp EVMStackify.cpp - EVMStackifyEF.cpp EVMSubtarget.cpp EVMTargetMachine.cpp EVMTargetTransformInfo.cpp diff --git a/llvm/lib/Target/EVM/EVM.h b/llvm/lib/Target/EVM/EVM.h index f9846fa615e5..15bed82879b1 100644 --- a/llvm/lib/Target/EVM/EVM.h +++ b/llvm/lib/Target/EVM/EVM.h @@ -52,7 +52,7 @@ FunctionPass *createEVMRegColoring(); FunctionPass *createEVMSingleUseExpression(); FunctionPass *createEVMSplitCriticalEdges(); FunctionPass *createEVMStackify(); -FunctionPass *createEVMStackifyEF(); +FunctionPass *createEVMBPStackification(); // PassRegistry initialization declarations. void initializeEVMCodegenPreparePass(PassRegistry &); @@ -65,7 +65,7 @@ void initializeEVMRegColoringPass(PassRegistry &); void initializeEVMSingleUseExpressionPass(PassRegistry &); void initializeEVMSplitCriticalEdgesPass(PassRegistry &); void initializeEVMStackifyPass(PassRegistry &); -void initializeEVMStackifyEFPass(PassRegistry &); +void initializeEVMBPStackificationPass(PassRegistry &); struct EVMLinkRuntimePass : PassInfoMixin { EVMLinkRuntimePass() = default; diff --git a/llvm/lib/Target/EVM/EVMAssembly.cpp b/llvm/lib/Target/EVM/EVMAssembly.cpp index 4a63578058d1..02f809c16ba5 100644 --- a/llvm/lib/Target/EVM/EVMAssembly.cpp +++ b/llvm/lib/Target/EVM/EVMAssembly.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements the EVMAssembly class that generates machine IR -// with all the required stack manipulation instructions. -// Resulting machine instructions still have explicit operands, but some of the -// auxiliary instructions (ARGUMENT, RET, EVM::CONST_I256, COPY_I256 -// FCALLARGUMENT) are removed after this step, beaking use-def chains. So, the -// resulting Machine IR breaks the MachineVerifier checks. +// This file creates Machine IR in stackified form. It provides different +// callbacks when the EVMOptimizedCodeTransform needs to emit operation, +// stack manipulation instruction, and so on. It the end, it walks over MIR +// instructions removing register operands. // //===----------------------------------------------------------------------===// @@ -51,12 +49,14 @@ void EVMAssembly::setCurrentLocation(MachineBasicBlock *MBB) { } void EVMAssembly::appendInstruction(MachineInstr *MI) { +#ifndef NDEBUG unsigned Opc = MI->getOpcode(); assert(Opc != EVM::JUMP && Opc != EVM::JUMPI && Opc != EVM::ARGUMENT && Opc != EVM::RET && Opc != EVM::CONST_I256 && Opc != EVM::COPY_I256 && Opc != EVM::FCALL); +#endif // NDEBUG - auto Ret = AssemblyInstrs.insert(MI); + [[maybe_unused]] auto Ret = AssemblyInstrs.insert(MI); assert(Ret.second); int StackAdj = (2 * static_cast(MI->getNumExplicitDefs())) - static_cast(MI->getNumExplicitOperands()); @@ -69,7 +69,7 @@ void EVMAssembly::appendSWAPInstruction(unsigned Depth) { unsigned Opc = EVM::getSWAPOpcode(Depth); CurMIIt = BuildMI(*CurMBB, CurMIIt, DebugLoc(), TII->get(Opc)); AssemblyInstrs.insert(&*CurMIIt); - dumpInst(&*CurMIIt); + LLVM_DEBUG(dumpInst(&*CurMIIt)); CurMIIt = std::next(CurMIIt); } diff --git a/llvm/lib/Target/EVM/EVMAssembly.h b/llvm/lib/Target/EVM/EVMAssembly.h index 2e74d31ed8d2..fa2d30d5a9cb 100644 --- a/llvm/lib/Target/EVM/EVMAssembly.h +++ b/llvm/lib/Target/EVM/EVMAssembly.h @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// // -// This file implements the EVMAssembly class that generates machine IR -// with all the required stack manipulation instructions. -// Resulting machine instructions still have explicit operands, but some of the -// auxiliary instructions (ARGUMENT, RET, EVM::CONST_I256, COPY_I256 -// FCALLARGUMENT) are removed after this step, beaking use-def chains. So, the -// resulting Machine IR breaks the MachineVerifier checks. +// This file creates Machine IR in stackified form. It provides different +// callbacks when the EVMOptimizedCodeTransform needs to emit operation, +// stack manipulation instruction, and so on. It the end, it walks over MIR +// instructions removing register operands. // //===----------------------------------------------------------------------===// @@ -46,7 +44,8 @@ class EVMAssembly { : MF(MF), TII(TII) {} // Retrieve the current height of the stack. - // This does not have to be zero at the beginning. + // This does not have to be zero at the MF beginning because of + // possible arguments. int getStackHeight() const; void setStackHeight(int Height); @@ -84,8 +83,8 @@ class EVMAssembly { MCSymbol *createFuncRetSymbol(); - // Removes unused codegen-only instructions and - // stackifies remaining ones. + // Erases unused codegen-only instructions and removes register operands + // of the remaining ones. void finalize(); private: diff --git a/llvm/lib/Target/EVM/EVMStackifyEF.cpp b/llvm/lib/Target/EVM/EVMBackwardPropagationStackification.cpp similarity index 54% rename from llvm/lib/Target/EVM/EVMStackifyEF.cpp rename to llvm/lib/Target/EVM/EVMBackwardPropagationStackification.cpp index d891f4e3db8d..7a571204c899 100644 --- a/llvm/lib/Target/EVM/EVMStackifyEF.cpp +++ b/llvm/lib/Target/EVM/EVMBackwardPropagationStackification.cpp @@ -1,4 +1,4 @@ -//===----- EVMStackifyEF.cpp - Split Critical Edges ------*- C++ -*--===// +//===----- EVMBPStackification.cpp - BP stackification ---------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,20 @@ // //===----------------------------------------------------------------------===// // -// This file performs spliting of critical edges. +// This file implements backward propagation (BP) stackification. +// Original idea was taken from the Ethereum's compiler (solc) stackification +// algorithm. +// The algorithm is broken into following components: +// - CFG (Control Flow Graph) and CFG builder. Stackification CFG has similar +// structure to LLVM CFG one, but employs wider notion of instruction. +// - Stack layout generator. Contains information about the stack layout at +// entry and exit of each CFG::BasicBlock. It also contains input/output +// stack layout for each operation. +// - Code transformation into stakified form. This component uses both CFG +// and the stack layout information to get stackified LLVM MIR. +// - Stack shuffler. Finds optimal (locally) transformation between two stack +// layouts using three primitives: POP, PUSHn, DUPn. The stack shuffler +// is used by the components above. // //===----------------------------------------------------------------------===// @@ -28,11 +41,11 @@ using namespace llvm; #define DEBUG_TYPE "evm-ethereum-stackify" namespace { -class EVMStackifyEF final : public MachineFunctionPass { +class EVMBPStackification final : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid - EVMStackifyEF() : MachineFunctionPass(ID) {} + EVMBPStackification() : MachineFunctionPass(ID) {} private: StringRef getPassName() const override { @@ -54,19 +67,21 @@ class EVMStackifyEF final : public MachineFunctionPass { }; } // end anonymous namespace -char EVMStackifyEF::ID = 0; +char EVMBPStackification::ID = 0; -INITIALIZE_PASS_BEGIN(EVMStackifyEF, DEBUG_TYPE, "Ethereum stackification", - false, false) +INITIALIZE_PASS_BEGIN(EVMBPStackification, DEBUG_TYPE, + "Backward propagation stackification", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(EVMStackifyEF, DEBUG_TYPE, "Ethereum stackification", false, - false) +INITIALIZE_PASS_END(EVMBPStackification, DEBUG_TYPE, + "Backward propagation stackification", false, false) -FunctionPass *llvm::createEVMStackifyEF() { return new EVMStackifyEF(); } +FunctionPass *llvm::createEVMBPStackification() { + return new EVMBPStackification(); +} -bool EVMStackifyEF::runOnMachineFunction(MachineFunction &MF) { +bool EVMBPStackification::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG({ - dbgs() << "********** Ethereum stackification **********\n" + dbgs() << "********** Backward propagation stackification **********\n" << "********** Function: " << MF.getName() << '\n'; }); @@ -78,7 +93,7 @@ bool EVMStackifyEF::runOnMachineFunction(MachineFunction &MF) { // We don't preserve SSA form. MRI.leaveSSA(); - assert(MRI.tracksLiveness() && "Stackify expects liveness"); + assert(MRI.tracksLiveness() && "Stackification expects liveness"); EVMAssembly Assembly(&MF, TII); EVMOptimizedCodeTransform::run(Assembly, MF, LIS, MLI); diff --git a/llvm/lib/Target/EVM/EVMControlFlowGraph.h b/llvm/lib/Target/EVM/EVMControlFlowGraph.h index 86bfb57eff5b..2f76f93455d4 100644 --- a/llvm/lib/Target/EVM/EVMControlFlowGraph.h +++ b/llvm/lib/Target/EVM/EVMControlFlowGraph.h @@ -1,4 +1,4 @@ -//===----- EVMControlFlowGraph.h - CFG for stackification -------*- C++ -*-===// +//===----- EVMControlFlowGraph.h - CFG for BP stackification ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines Control Flow Graph used for the stackification algorithm. +// This file defines Control Flow Graph used for the backward propagation +// stackification algorithm. // //===----------------------------------------------------------------------===// @@ -31,7 +32,7 @@ class MachineBasicBlock; class MachineInstr; /// The following structs describe different kinds of stack slots. /// Each stack slot is equality- and less-than-comparable and -/// specifies an attribute ``canBeFreelyGenerated`` that is true, +/// specifies an attribute 'canBeFreelyGenerated' that is true, /// if a slot of this kind always has a known value at compile time and /// therefore can safely be removed from the stack at any time and then /// regenerated later. @@ -53,8 +54,8 @@ struct FunctionCallReturnLabelSlot { /// The return jump target of a function while generating the code of the /// function body. I.e. the caller of a function pushes a -/// ``FunctionCallReturnLabelSlot`` (see above) before jumping to the function -/// and this very slot is viewed as ``FunctionReturnLabelSlot`` inside the +/// 'FunctionCallReturnLabelSlot' (see above) before jumping to the function +/// and this very slot is viewed as 'FunctionReturnLabelSlot' inside the /// function body and jumped to when returning from the function. struct FunctionReturnLabelSlot { const MachineFunction *MF = nullptr; @@ -99,7 +100,7 @@ struct LiteralSlot { bool operator<(LiteralSlot const &Rhs) const { return Value.ult(Rhs.Value); } }; -/// A slot containing a Symbol. +/// A slot containing a MCSymbol. struct SymbolSlot { MCSymbol *Symbol; static constexpr bool canBeFreelyGenerated = true; @@ -156,7 +157,7 @@ inline bool canBeFreelyGenerated(StackSlot const &Slot) { Slot); } -/// Control flow graph consisting of ``CFG::BasicBlock``s connected by control +/// Control flow graph consisting of 'CFG::BasicBlock`s' connected by control /// flow. struct CFG { explicit CFG() {} @@ -168,6 +169,9 @@ struct CFG { struct BuiltinCall { MachineInstr *Builtin = nullptr; + // True if this instruction has commutable operands. In EVM ISA + // commutable operands always take top two stack slots. + bool IsCommutable = false; bool TerminatesOrReverts = false; }; @@ -179,8 +183,8 @@ struct CFG { }; struct Assignment { - /// The variables being assigned to also occur as ``output`` in the - /// ``Operation`` containing the assignment, but are also stored here for + /// The variables being assigned to also occur as 'Output' in the + /// 'Operation' containing the assignment, but are also stored here for /// convenience. std::vector Variables; }; @@ -194,7 +198,7 @@ struct CFG { }; struct FunctionInfo; - /// A basic control flow block containing ``Operation``s acting on the stack. + /// A basic control flow block containing 'Operation`s' acting on the stack. /// Maintains a list of entry blocks and a typed exit. struct BasicBlock { struct InvalidExit {}; diff --git a/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp b/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp index f84628db7af5..4aaa75841920 100644 --- a/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp +++ b/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file builds the Control Flow Graph used for the stackification -// algorithm. +// This file builds the Control Flow Graph used for the backward propagation +// stackification algorithm. // //===----------------------------------------------------------------------===// @@ -168,6 +168,8 @@ StackSlot ControlFlowGraphBuilder::getDefiningSlot(const MachineInstr &MI, LiveQueryResult LRQ = LI->Query(Idx); const VNInfo *VNI = LRQ.valueIn(); assert(VNI && "Use of non-existing value"); + // If the virtual register defines a constant and this is the only + // definition, emit the literal slot as MI's input. if (LI->containsOneValue()) { assert(!VNI->isPHIDef()); const MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); @@ -182,26 +184,30 @@ StackSlot ControlFlowGraphBuilder::getDefiningSlot(const MachineInstr &MI, } void ControlFlowGraphBuilder::collectInstrOperands(const MachineInstr &MI, - Stack &Input, - Stack &Output) const { - for (const auto &MO : reverse(MI.explicit_uses())) { - if (!MO.isReg()) { - if (MO.isMCSymbol()) - Input.push_back(SymbolSlot{MO.getMCSymbol()}); - continue; - } + Stack *Input, + Stack *Output) const { + if (Input) { + for (const auto &MO : reverse(MI.explicit_uses())) { + if (!MO.isReg()) { + if (MO.isMCSymbol()) + Input->push_back(SymbolSlot{MO.getMCSymbol()}); + continue; + } - const Register Reg = MO.getReg(); - // SP is not used anyhow. - if (Reg == EVM::SP) - continue; + const Register Reg = MO.getReg(); + // SP is not used anyhow. + if (Reg == EVM::SP) + continue; - Input.push_back(getDefiningSlot(MI, Reg)); + Input->push_back(getDefiningSlot(MI, Reg)); + } } - unsigned ArgsNumber = 0; - for (const auto &MO : MI.defs()) - Output.push_back(TemporarySlot{&MI, MO.getReg(), ArgsNumber++}); + if (Output) { + unsigned ArgsNumber = 0; + for (const auto &MO : MI.defs()) + Output->push_back(TemporarySlot{&MI, MO.getReg(), ArgsNumber++}); + } } void ControlFlowGraphBuilder::handleMachineInstr(MachineInstr &MI) { @@ -234,8 +240,8 @@ void ControlFlowGraphBuilder::handleMachineInstr(MachineInstr &MI) { break; case EVM::CONST_I256: { const LiveInterval *LI = &LIS.getInterval(MI.getOperand(0).getReg()); - // We can ignore this instruction, as we will directly create the literal - // slot from the immediate value; + // If the virtual register has the only definition, ignore this instruction, + // as we create literal slots from the immediate value at the register uses. if (LI->containsOneValue()) return; } break; @@ -251,10 +257,10 @@ void ControlFlowGraphBuilder::handleMachineInstr(MachineInstr &MI) { [[fallthrough]]; default: { Stack Input, Output; - collectInstrOperands(MI, Input, Output); - CurrentBlock->Operations.emplace_back( - CFG::Operation{std::move(Input), std::move(Output), - CFG::BuiltinCall{&MI, TerminatesOrReverts}}); + collectInstrOperands(MI, &Input, &Output); + CurrentBlock->Operations.emplace_back(CFG::Operation{ + std::move(Input), std::move(Output), + CFG::BuiltinCall{&MI, MI.isCommutable(), TerminatesOrReverts}}); } break; } @@ -279,8 +285,8 @@ void ControlFlowGraphBuilder::handleMachineInstr(MachineInstr &MI) { case EVM::COPY_I256: { // Copy instruction corresponds to the assignment operator, so // we do not need to create intermediate TmpSlots. - Stack In, Out; - collectInstrOperands(MI, In, Out); + Stack In; + collectInstrOperands(MI, &In, nullptr); Input = In; const Register DefReg = MI.getOperand(0).getReg(); Output.push_back(VariableSlot{DefReg}); @@ -313,7 +319,7 @@ void ControlFlowGraphBuilder::handleFunctionCall(const MachineInstr &MI) { CurrentBlock->Exit = CFG::BasicBlock::Terminated{}; else Input.push_back(FunctionCallReturnLabelSlot{&MI}); - collectInstrOperands(MI, Input, Output); + collectInstrOperands(MI, &Input, &Output); CurrentBlock->Operations.emplace_back( CFG::Operation{Input, Output, CFG::FunctionCall{&MI, !IsNoReturn, @@ -322,8 +328,8 @@ void ControlFlowGraphBuilder::handleFunctionCall(const MachineInstr &MI) { void ControlFlowGraphBuilder::handleReturn(const MachineInstr &MI) { Cfg.FuncInfo.Exits.emplace_back(CurrentBlock); - Stack Input, Output; - collectInstrOperands(MI, Input, Output); + Stack Input; + collectInstrOperands(MI, &Input, nullptr); // We need to reverse input operands to restore original ordering, // as it is in the instruction. // Calling convention: return values are passed in stack such that the @@ -363,15 +369,15 @@ void ControlFlowGraphBuilder::handleBasicBlockSuccessors( return; } +#ifndef NDEBUG // This corresponds to a noreturn functions at the end of the MBB. if (std::holds_alternative(CurrentBlock->Exit)) { -#ifndef NDEBUG CFG::FunctionCall *Call = std::get_if( &CurrentBlock->Operations.back().Operation); assert(Call && !Call->CanContinue); -#endif // NDEBUG return; } +#endif // NDEBUG // This corresponds to 'unreachable' at the BB end. if (!TBB && !FBB && MBB.succ_empty()) { diff --git a/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.h b/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.h index 151b5253af4b..efe9b97313d0 100644 --- a/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.h +++ b/llvm/lib/Target/EVM/EVMControlFlowGraphBuilder.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file builds the Control Flow Graph used for the stackification -// algorithm. +// This file builds the Control Flow Graph used for the backward propagation +// stackification algorithm. // //===----------------------------------------------------------------------===// @@ -42,9 +42,8 @@ class ControlFlowGraphBuilder { void handleReturn(const MachineInstr &MI); void handleBasicBlockSuccessors(MachineBasicBlock &MBB); StackSlot getDefiningSlot(const MachineInstr &MI, Register Reg) const; - - void collectInstrOperands(const MachineInstr &MI, Stack &Input, - Stack &Output) const; + void collectInstrOperands(const MachineInstr &MI, Stack *Input, + Stack *Output) const; CFG &Cfg; const LiveIntervals &LIS; diff --git a/llvm/lib/Target/EVM/EVMISelDAGToDAG.cpp b/llvm/lib/Target/EVM/EVMISelDAGToDAG.cpp index 45ba50141438..82a1cdfea2a8 100644 --- a/llvm/lib/Target/EVM/EVMISelDAGToDAG.cpp +++ b/llvm/lib/Target/EVM/EVMISelDAGToDAG.cpp @@ -50,7 +50,7 @@ void EVMDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); + LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); Node->setNodeId(-1); return; } diff --git a/llvm/lib/Target/EVM/EVMISelLowering.cpp b/llvm/lib/Target/EVM/EVMISelLowering.cpp index 6c8738d2d52f..4cbd1a757ddc 100644 --- a/llvm/lib/Target/EVM/EVMISelLowering.cpp +++ b/llvm/lib/Target/EVM/EVMISelLowering.cpp @@ -195,7 +195,7 @@ SDValue EVMTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { assert(MemVT.isScalarInteger() && "Expected scalar load"); assert(MemVTSize < 256 && "Expected < 256-bits sized loads"); - LLVM_DEBUG(errs() << "Special handling of extended LOAD node:\n"; + LLVM_DEBUG(dbgs() << "Special handling of extended LOAD node:\n"; Op.dump(&DAG)); // As the EVM architecture has only 256-bits load, additional handling @@ -238,7 +238,7 @@ SDValue EVMTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { assert(MemVTSize < 256 && "Expected < 256-bits sized stores"); - LLVM_DEBUG(errs() << "Special handling of STORE node:\n"; Op.dump(&DAG)); + LLVM_DEBUG(dbgs() << "Special handling of STORE node:\n"; Op.dump(&DAG)); // As the EVM architecture has only 256-bits stores, additional handling // is required to store smaller types. diff --git a/llvm/lib/Target/EVM/EVMInstrInfo.td b/llvm/lib/Target/EVM/EVMInstrInfo.td index 8038cb0cd7d2..910e8099cfe9 100644 --- a/llvm/lib/Target/EVM/EVMInstrInfo.td +++ b/llvm/lib/Target/EVM/EVMInstrInfo.td @@ -270,17 +270,19 @@ defm SDIV : BinaryInst; defm MOD : BinaryInst; defm SMOD : BinaryInst; -defm ADDMOD - : I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom), - [(set GPR:$dst, - (int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))], - "ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>; - -defm MULMOD - : I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom), - [(set GPR:$dst, - (int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))], - "MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>; +let isCommutable = 1 in { + defm ADDMOD + : I<(outs GPR:$dst), (ins GPR:$add_op1, GPR:$add_op2, GPR:$denom), + [(set GPR:$dst, + (int_evm_addmod GPR:$add_op1, GPR:$add_op2, GPR:$denom))], + "ADDMOD", " $dst, $add_op1, $add_op2, $denom", 0x08, 8>; + + defm MULMOD + : I<(outs GPR:$dst), (ins GPR:$mul_op1, GPR:$mul_op2, GPR:$denom), + [(set GPR:$dst, + (int_evm_mulmod GPR:$mul_op1, GPR:$mul_op2, GPR:$denom))], + "MULMOD", " $dst, $mul_op1, $mul_op2, $denom", 0x09, 8>; +} defm EXP : I<(outs GPR:$dst), (ins GPR:$base, GPR:$exp), diff --git a/llvm/lib/Target/EVM/EVMLinkRuntime.cpp b/llvm/lib/Target/EVM/EVMLinkRuntime.cpp index 6cdf1314621e..69d4c4b62709 100644 --- a/llvm/lib/Target/EVM/EVMLinkRuntime.cpp +++ b/llvm/lib/Target/EVM/EVMLinkRuntime.cpp @@ -81,14 +81,6 @@ static bool EVMLinkRuntimeImpl(Module &M, const char *ModuleToLink) { exit(1); } - // TODO: remove this after ensuring the stackification - // algorithm can deal with a high register pressure. - for (auto &F : M.functions()) { - if (!F.isDeclaration()) { - F.addFnAttr(Attribute::NoInline); - } - } - bool LinkErr = false; LinkErr = L.linkInModule( std::move(RTM), Flags, [](Module &M, const StringSet<> &GVS) { diff --git a/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp b/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp index 0e38d53f96f0..4c803d7d1ba6 100644 --- a/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp +++ b/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.cpp @@ -1,4 +1,4 @@ -//===--- EVMOptimizedCodeTransform.h - Stack layout generator ---*- C++ -*-===// +//===--- EVMOptimizedCodeTransform.h - Create stackified MIR ---*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file transforms the stack layout back into the Machine IR instructions -// in 'stackified' form using the EVMAssembly class. +// This file transforms MIR to the 'stackified' MIR using CFG, StackLayout +// and EVMAssembly classes. // //===----------------------------------------------------------------------===// @@ -17,6 +17,7 @@ #include "EVMStackDebug.h" #include "EVMStackLayoutGenerator.h" #include "EVMStackShuffler.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" #include @@ -35,6 +36,7 @@ void EVMOptimizedCodeTransform::run(EVMAssembly &Assembly, MachineFunction &MF, } void EVMOptimizedCodeTransform::operator()(CFG::FunctionCall const &Call) { +#ifndef NDEBUG // Validate stack. assert(Assembly.getStackHeight() == static_cast(CurrentStack.size())); assert(CurrentStack.size() >= Call.NumArguments + (Call.CanContinue ? 1 : 0)); @@ -45,6 +47,7 @@ void EVMOptimizedCodeTransform::operator()(CFG::FunctionCall const &Call) { &CurrentStack.at(CurrentStack.size() - Call.NumArguments - 1)); assert(returnLabelSlot && returnLabelSlot->Call == Call.Call); } +#endif // NDEBUG // Emit code. const MachineOperand *CalleeOp = Call.Call->explicit_uses().begin(); @@ -57,8 +60,7 @@ void EVMOptimizedCodeTransform::operator()(CFG::FunctionCall const &Call) { if (Call.CanContinue) Assembly.appendLabel(); - // Update stack. - // Remove arguments and return label from CurrentStack. + // Update stack, remove arguments and return label from CurrentStack. for (size_t I = 0; I < Call.NumArguments + (Call.CanContinue ? 1 : 0); ++I) CurrentStack.pop_back(); @@ -82,8 +84,7 @@ void EVMOptimizedCodeTransform::operator()(CFG::BuiltinCall const &Call) { // Emit code. Assembly.appendInstruction(Call.Builtin); - // Update stack. - // Remove arguments from CurrentStack. + // Update stack and remove arguments from CurrentStack. for (size_t i = 0; i < NumArgs; ++i) CurrentStack.pop_back(); @@ -122,12 +123,17 @@ EVMOptimizedCodeTransform::EVMOptimizedCodeTransform(EVMAssembly &Assembly, MachineFunction &MF) : Assembly(Assembly), Layout(Layout), FuncInfo(&Cfg.FuncInfo), MF(MF) {} -void EVMOptimizedCodeTransform::assertLayoutCompatibility( - Stack const &SourceStack, Stack const &TargetStack) { - assert(SourceStack.size() == TargetStack.size()); - for (unsigned Idx = 0; Idx < SourceStack.size(); ++Idx) - assert(std::holds_alternative(TargetStack[Idx]) || - SourceStack[Idx] == TargetStack[Idx]); +bool EVMOptimizedCodeTransform::AreLayoutsCompatible(Stack const &SourceStack, + Stack const &TargetStack) { + if (SourceStack.size() != TargetStack.size()) + return false; + + for (auto [Src, Tgt] : zip_equal(SourceStack, TargetStack)) { + if (!std::holds_alternative(Tgt) && !(Src == Tgt)) + return false; + } + + return true; } void EVMOptimizedCodeTransform::createStackLayout(Stack TargetStack) { @@ -248,39 +254,91 @@ void EVMOptimizedCodeTransform::createStackLayout(Stack TargetStack) { assert(Assembly.getStackHeight() == static_cast(CurrentStack.size())); } -void EVMOptimizedCodeTransform::operator()(CFG::BasicBlock const &Block) { +void EVMOptimizedCodeTransform::createOperationEntryLayout( + const CFG::Operation &Op) { + // Create required layout for entering the Operation. + // Check if we can choose cheaper stack shuffling if the Operation is an + // instruction with commutable arguments. + if (const auto *Inst = std::get_if(&Op.Operation); + Inst && Inst->IsCommutable) { + // Get the stack layout before the instruction. + const Stack &DefaultTargetStack = Layout.operationEntryLayout.at(&Op); + size_t DefaultCost = + EvaluateStackTransform(CurrentStack, DefaultTargetStack); + + // Commutable operands always take top two stack slots. + const unsigned OpIdx1 = 0, OpIdx2 = 1; + // Swap the commutable stack items and measure the stack shuffling cost + // again. + assert(DefaultTargetStack.size() > 1); + Stack CommutedTargetStack = DefaultTargetStack; + std::swap(CommutedTargetStack[CommutedTargetStack.size() - OpIdx1 - 1], + CommutedTargetStack[CommutedTargetStack.size() - OpIdx2 - 1]); + size_t CommutedCost = + EvaluateStackTransform(CurrentStack, CommutedTargetStack); + // Choose the cheapest transformation. + createStackLayout(CommutedCost < DefaultCost ? CommutedTargetStack + : DefaultTargetStack); + +#ifndef NDEBUG + // Assert that we have the inputs of the Operation on stack top. + assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); + assert(CurrentStack.size() >= Op.Input.size()); + Stack StackInput = + EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size())); + // Adjust the StackInput to match the commuted stack. + if (CommutedCost < DefaultCost) { + std::swap(StackInput[StackInput.size() - OpIdx1 - 1], + StackInput[StackInput.size() - OpIdx2 - 1]); + } + assert(AreLayoutsCompatible(StackInput, Op.Input)); +#endif // NDEBUG + } else { + createStackLayout(Layout.operationEntryLayout.at(&Op)); + +#ifndef NDEBUG + // Assert that we have the inputs of the Operation on stack top. + assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); + assert(CurrentStack.size() >= Op.Input.size()); + const Stack StackInput = + EVMUtils::to_vector(EVMUtils::take_last(CurrentStack, Op.Input.size())); + assert(AreLayoutsCompatible(StackInput, Op.Input)); +#endif // NDEBUG + } +} + +void EVMOptimizedCodeTransform::operator()(const CFG::BasicBlock &Block) { // Current location for the entry BB was set up in operator()(). if (&Block != FuncInfo->Entry) Assembly.setCurrentLocation(Block.MBB); // Assert that this is the first visit of the block and mark as generated. - auto It = GeneratedBlocks.insert(&Block); + [[maybe_unused]] auto It = GeneratedBlocks.insert(&Block); assert(It.second); auto const &BlockInfo = Layout.blockInfos.at(&Block); - // Assert that the stack is valid for entering the block. - assertLayoutCompatibility(CurrentStack, BlockInfo.entryLayout); + // Assert that the stack is valid for entering the block. The entry layout + // of the function entry block should is fully determined by the first + // instruction, so we can ignore 'BlockInfo.entryLayout'. + if (&Block != FuncInfo->Entry) { + assert(AreLayoutsCompatible(CurrentStack, BlockInfo.entryLayout)); - // Might set some slots to junk, if not required by the block. - CurrentStack = BlockInfo.entryLayout; + // Might set some slots to junk, if not required by the block. + CurrentStack = BlockInfo.entryLayout; + } assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); // Emit jumpdest, if required. if (EVMUtils::valueOrNullptr(BlockLabels, &Block)) Assembly.appendLabel(); - for (auto const &Operation : Block.Operations) { - // Create required layout for entering the Operation. - createStackLayout(Layout.operationEntryLayout.at(&Operation)); + for (const auto &Operation : Block.Operations) { + createOperationEntryLayout(Operation); - // Assert that we have the inputs of the Operation on stack top. - assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); - assert(CurrentStack.size() >= Operation.Input.size()); +#ifndef NDEBUG size_t BaseHeight = CurrentStack.size() - Operation.Input.size(); - assertLayoutCompatibility(EVMUtils::to_vector(EVMUtils::take_last( - CurrentStack, Operation.Input.size())), - Operation.Input); +#endif // NDEBUG // Perform the Operation. std::visit(*this, Operation.Operation); @@ -289,9 +347,9 @@ void EVMOptimizedCodeTransform::operator()(CFG::BasicBlock const &Block) { assert(static_cast(CurrentStack.size()) == Assembly.getStackHeight()); assert(CurrentStack.size() == BaseHeight + Operation.Output.size()); assert(CurrentStack.size() >= Operation.Output.size()); - assertLayoutCompatibility(EVMUtils::to_vector(EVMUtils::take_last( - CurrentStack, Operation.Output.size())), - Operation.Output); + assert(AreLayoutsCompatible(EVMUtils::to_vector(EVMUtils::take_last( + CurrentStack, Operation.Output.size())), + Operation.Output)); } // Exit the block. @@ -342,11 +400,11 @@ void EVMOptimizedCodeTransform::operator()(CFG::BasicBlock const &Block) { CurrentStack.pop_back(); // Assert that we have a valid stack for both jump targets. - assertLayoutCompatibility( + assert(AreLayoutsCompatible( CurrentStack, - Layout.blockInfos.at(CondJump.NonZero).entryLayout); - assertLayoutCompatibility( - CurrentStack, Layout.blockInfos.at(CondJump.Zero).entryLayout); + Layout.blockInfos.at(CondJump.NonZero).entryLayout)); + assert(AreLayoutsCompatible( + CurrentStack, Layout.blockInfos.at(CondJump.Zero).entryLayout)); { // Restore the stack afterwards for the non-zero case below. @@ -404,8 +462,9 @@ void EVMOptimizedCodeTransform::operator()(CFG::BasicBlock const &Block) { }}, Block.Exit); - // TODO: We could assert that the last emitted assembly item terminated or was - // an (unconditional) jump. + // TODO: + // We could assert that the last emitted assembly item terminated or was + // an (unconditional) jump. CurrentStack.clear(); Assembly.setStackHeight(0); } @@ -428,9 +487,7 @@ void EVMOptimizedCodeTransform::operator()() { Assembly.setStackHeight(static_cast(CurrentStack.size())); Assembly.appendLabel(); - // Create the entry layout of the function body block and visit. - createStackLayout(Layout.blockInfos.at(FuncInfo->Entry).entryLayout); - + // Visit the function entry block. (*this)(*FuncInfo->Entry); Assembly.finalize(); diff --git a/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.h b/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.h index dce8819454bc..2382c68a80b3 100644 --- a/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.h +++ b/llvm/lib/Target/EVM/EVMOptimizedCodeTransform.h @@ -1,4 +1,4 @@ -//===--- EVMOptimizedCodeTransform.h - Stack layout generator ---*- C++ -*-===// +//===--- EVMOptimizedCodeTransform.h - Create stackified MIR ---*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// // -// This file transforms the stack layout back into the Machine IR instructions -// in 'stackified' form using the EVMAssembly class. +// This file transforms MIR to the 'stackified' MIR using CFG, StackLayout +// and EVMAssembly classes. // //===----------------------------------------------------------------------===// @@ -20,7 +20,6 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include #include namespace llvm { @@ -47,16 +46,20 @@ class EVMOptimizedCodeTransform { EVMOptimizedCodeTransform(EVMAssembly &Assembly, const CFG &Cfg, const StackLayout &Layout, MachineFunction &MF); - /// Assert that it is valid to transition from \p SourceStack to \p - /// TargetStack. That is \p SourceStack matches each slot in \p + /// Checks if it's valid to transition from \p SourceStack to \p + /// TargetStack, that is \p SourceStack matches each slot in \p /// TargetStack that is not a JunkSlot exactly. - static void assertLayoutCompatibility(Stack const &SourceStack, - Stack const &TargetStack); + static bool AreLayoutsCompatible(Stack const &SourceStack, + Stack const &TargetStack); /// Shuffles CurrentStack to the desired \p TargetStack while emitting the /// shuffling code to Assembly. void createStackLayout(Stack TargetStack); + /// Creates the Op.Input stack layout from the 'CurrentStack' taking into + /// account commutative property of the operation. + void createOperationEntryLayout(const CFG::Operation &Op); + /// Generate code for the given block \p Block. /// Expects the current stack layout 'CurrentStack' to be a stack layout that /// is compatible with the entry layout expected by the block. Recursively diff --git a/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp b/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp index 39049b898fd9..f96d0deddca7 100644 --- a/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp +++ b/llvm/lib/Target/EVM/EVMSplitCriticalEdges.cpp @@ -1,5 +1,4 @@ -//===----- EVMSplitCriticalEdges.cpp - Split Critical Edges ------*- C++ -//-*--===// +//===----- EVMSplitCriticalEdges.cpp - Split Critical Edges ----*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/llvm/lib/Target/EVM/EVMStackDebug.cpp b/llvm/lib/Target/EVM/EVMStackDebug.cpp index ee78ef00a03f..773ae26af37b 100644 --- a/llvm/lib/Target/EVM/EVMStackDebug.cpp +++ b/llvm/lib/Target/EVM/EVMStackDebug.cpp @@ -20,7 +20,6 @@ using namespace llvm; -#ifndef NDEBUG static StringRef getInstName(const MachineInstr *MI) { const MachineFunction *MF = MI->getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -81,6 +80,7 @@ std::string llvm::stackSlotToString(const StackSlot &Slot) { ; } +#ifndef NDEBUG void ControlFlowGraphPrinter::operator()(const CFG &Cfg) { (*this)(Cfg.FuncInfo); for (const auto &Block : Cfg.Blocks) @@ -307,5 +307,4 @@ std::string StackLayoutPrinter::getBlockId(CFG::BasicBlock const &Block) { BlocksToPrint.emplace_back(&Block); return std::to_string(Id) + "(" + Name + ")"; } - #endif // NDEBUG diff --git a/llvm/lib/Target/EVM/EVMStackDebug.h b/llvm/lib/Target/EVM/EVMStackDebug.h index e18ce24e6e9b..efe39f20f084 100644 --- a/llvm/lib/Target/EVM/EVMStackDebug.h +++ b/llvm/lib/Target/EVM/EVMStackDebug.h @@ -26,11 +26,11 @@ namespace llvm { struct StackLayout; -#ifndef NDEBUG const Function *getCalledFunction(const MachineInstr &MI); std::string stackSlotToString(const StackSlot &Slot); std::string stackToString(Stack const &S); +#ifndef NDEBUG class ControlFlowGraphPrinter { public: ControlFlowGraphPrinter(raw_ostream &OS) : OS(OS) {} diff --git a/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp index c67b76c297e7..9b2f1dd0625e 100644 --- a/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp +++ b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.cpp @@ -260,7 +260,7 @@ Stack StackLayoutGenerator::propagateStackThroughOperation( // Determine the ideal permutation of the slots in ExitLayout that are not // operation outputs (and not to be generated on the fly), s.t. shuffling the - // `IdealStack + Operation.output` to ExitLayout is cheap. + // 'IdealStack + Operation.output' to ExitLayout is cheap. Stack IdealStack = createIdealLayout(Operation.Output, ExitStack, generateSlotOnTheFly); @@ -354,7 +354,7 @@ void StackLayoutGenerator::processEntryPoint( for (auto [JumpingBlock, Target] : BackwardsJumps) { // This block jumps backwards, but does not provide all slots required by // the jump target on exit. Therefore we need to visit the subgraph - // between ``Target`` and ``JumpingBlock`` again. + // between 'Target' and 'JumpingBlock' again. auto StartIt = std::begin(Layout.blockInfos[Target].entryLayout); auto EndIt = std::end(Layout.blockInfos[Target].entryLayout); if (std::any_of(StartIt, EndIt, @@ -362,10 +362,10 @@ void StackLayoutGenerator::processEntryPoint( StackSlot const &Slot) { return !EVMUtils::contains(exitLayout, Slot); })) { - // In particular we can visit backwards starting from ``JumpingBlock`` - // and mark all entries to-be-visited again until we hit ``Target``. + // In particular we can visit backwards starting from 'JumpingBlock' + // and mark all entries to-be-visited again until we hit 'Target'. ToVisit.emplace_front(JumpingBlock); - // Since we are likely to permute the entry layout of ``Target``, we + // Since we are likely to permute the entry layout of 'Target', we // also visit its entries again. This is not required for correctness, // since the set of stack slots will match, but it may move some // required stack shuffling from the loop condition to outside the loop. @@ -381,7 +381,7 @@ void StackLayoutGenerator::processEntryPoint( for (auto const *Entry : Block->Entries) AddChild(Entry); }); - // While the shuffled layout for ``Target`` will be compatible, it can + // While the shuffled layout for 'Target' will be compatible, it can // be worthwhile propagating it further up once more. This would mean // not stopping at Block == Target above, resp. even doing // Visited.clear() here, revisiting the entire graph. This is a tradeoff @@ -538,11 +538,13 @@ void StackLayoutGenerator::stitchConditionalJumps( for (auto &Slot : NewEntryLayout) if (!EVMUtils::contains(OriginalEntryLayout, Slot)) Slot = JunkSlot{}; +#ifndef NDEBUG // Make sure everything the block being jumped to requires is // actually present or can be generated. for (auto const &Slot : OriginalEntryLayout) assert(canBeFreelyGenerated(Slot) || EVMUtils::contains(NewEntryLayout, Slot)); +#endif // NDEBUG return NewEntryLayout; }; @@ -654,7 +656,7 @@ Stack StackLayoutGenerator::combineStack(Stack const &Stack1, } ++C[I]; // Note that for a proper implementation of the Heap algorithm this would - // need to revert back to ``I = 1.`` However, the incorrect implementation + // need to revert back to 'I = 1'. However, the incorrect implementation // produces decent result and the proper version would have N! complexity // and is thereby not feasible. ++I; @@ -698,6 +700,37 @@ Stack StackLayoutGenerator::compressStack(Stack CurStack) { return CurStack; } +/// Returns the number of operations required to transform stack \p Source to +/// \p Target. +size_t llvm::EvaluateStackTransform(Stack Source, Stack const &Target) { + size_t OpGas = 0; + auto Swap = [&](unsigned SwapDepth) { + if (SwapDepth > 16) + OpGas += 1000; + else + OpGas += 3; // SWAP* gas price; + }; + + auto DupOrPush = [&](StackSlot const &Slot) { + if (canBeFreelyGenerated(Slot)) + OpGas += 3; + else { + auto Depth = EVMUtils::findOffset(EVMUtils::get_reverse(Source), Slot); + if (!Depth) + llvm_unreachable("No slot in the stack"); + + if (*Depth < 16) + OpGas += 3; // DUP* gas price + else + OpGas += 1000; + } + }; + auto Pop = [&]() { OpGas += 2; }; + + createStackLayout(Source, Target, Swap, DupOrPush, Pop); + return OpGas; +} + void StackLayoutGenerator::fillInJunk(CFG::BasicBlock const &Block, CFG::FunctionInfo const *FunctionInfo) { /// Recursively adds junk to the subgraph starting on \p Entry. @@ -739,70 +772,16 @@ void StackLayoutGenerator::fillInJunk(CFG::BasicBlock const &Block, }); }; - /// Returns the number of operations required to transform \p Source to \p - /// Target. - auto EvaluateTransform = [&](Stack Source, Stack const &Target) -> size_t { - size_t OpGas = 0; - auto Swap = [&](unsigned SwapDepth) { - if (SwapDepth > 16) - OpGas += 1000; - else - OpGas += 3; // SWAP* gas price; - }; - - auto DupOrPush = [&](StackSlot const &Slot) { - if (canBeFreelyGenerated(Slot)) - OpGas += 3; - else { - if (auto Depth = - EVMUtils::findOffset(EVMUtils::get_reverse(Source), Slot)) { - if (*Depth < 16) - OpGas += 3; // gas price for DUP - else - OpGas += 1000; - } else { - // This has to be a previously unassigned return variable. - // We at least sanity-check that it is among the return variables at - // all. -#ifndef NDEBUG - bool VarExists = false; - assert(std::holds_alternative(Slot)); - for (CFG::BasicBlock *Exit : FunctionInfo->Exits) { - const Stack &RetValues = - std::get(Exit->Exit).RetValues; - - for (const StackSlot &Val : RetValues) { - if (const VariableSlot *VarSlot = std::get_if(&Val)) - if (*VarSlot == std::get(Slot)) - VarExists = true; - } - } - assert(VarExists); -#endif // NDEBUG - // Strictly speaking the cost of the - // PUSH0 depends on the targeted EVM version, but the difference will - // not matter here. - OpGas += 2; - } - } - }; - - auto Pop = [&]() { OpGas += 2; }; - - createStackLayout(Source, Target, Swap, DupOrPush, Pop); - return OpGas; - }; - /// Returns the number of junk slots to be prepended to \p TargetLayout for /// an optimal transition from \p EntryLayout to \p TargetLayout. auto GetBestNumJunk = [&](Stack const &EntryLayout, Stack const &TargetLayout) -> size_t { - size_t BestCost = EvaluateTransform(EntryLayout, TargetLayout); + size_t BestCost = EvaluateStackTransform(EntryLayout, TargetLayout); size_t BestNumJunk = 0; size_t MaxJunk = EntryLayout.size(); for (size_t NumJunk = 1; NumJunk <= MaxJunk; ++NumJunk) { - size_t Cost = EvaluateTransform(EntryLayout, Stack{NumJunk, JunkSlot{}} + - TargetLayout); + size_t Cost = EvaluateStackTransform( + EntryLayout, Stack{NumJunk, JunkSlot{}} + TargetLayout); if (Cost < BestCost) { BestCost = Cost; BestNumJunk = NumJunk; diff --git a/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h index 75b2beb233c7..ff675b88f30d 100644 --- a/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h +++ b/llvm/lib/Target/EVM/EVMStackLayoutGenerator.h @@ -24,6 +24,10 @@ namespace llvm { +/// Returns the number of operations required to transform stack \p Source to +/// \p Target. +size_t EvaluateStackTransform(Stack Source, Stack const &Target); + struct StackLayout { struct BlockInfo { /// Complete stack layout that is required for entering a block. @@ -84,7 +88,7 @@ class StackLayoutGenerator { std::set const &Visited, std::list &DependencyList) const; - /// Returns a pair of ``{jumpingBlock, targetBlock}`` for each backwards jump + /// Returns a pair of '{jumpingBlock, targetBlock}' for each backwards jump /// in the graph starting at \p Eentry. std::list> collectBackwardsJumps(CFG::BasicBlock const &Entry) const; @@ -92,12 +96,12 @@ class StackLayoutGenerator { /// After the main algorithms, layouts at conditional jumps are merely /// compatible, i.e. the exit layout of the jumping block is a superset of the /// entry layout of the target block. This function modifies the entry layouts - /// of conditional jump targets, s.t. the entry layout of target blocks match + /// of conditional jump targets, s.t., the entry layout of target blocks match /// the exit layout of the jumping block exactly, except that slots not - /// required after the jump are marked as `JunkSlot`s. + /// required after the jump are marked as 'JunkSlot's. void stitchConditionalJumps(CFG::BasicBlock const &Block); - /// Calculates the ideal stack layout, s.t. both \p Stack1 and \p Stack2 can + /// Calculates the ideal stack layout, s.t., both \p Stack1 and \p Stack2 can /// be achieved with minimal stack shuffling when starting from the returned /// layout. static Stack combineStack(Stack const &Stack1, Stack const &Stack2); diff --git a/llvm/lib/Target/EVM/EVMStackShuffler.h b/llvm/lib/Target/EVM/EVMStackShuffler.h index 9968972f8b13..52432682a32d 100644 --- a/llvm/lib/Target/EVM/EVMStackShuffler.h +++ b/llvm/lib/Target/EVM/EVMStackShuffler.h @@ -232,8 +232,10 @@ class Shuffler { })) { // Bring up all remaining target slots, if any, or terminate otherwise. if (Ops.sourceSize() < Ops.targetSize()) { - if (!dupDeepSlotIfRequired(Ops)) - assert(bringUpTargetSlot(Ops, Ops.sourceSize())); + if (!dupDeepSlotIfRequired(Ops)) { + [[maybe_unused]] bool Res = bringUpTargetSlot(Ops, Ops.sourceSize()); + assert(Res); + } return true; } return false; @@ -303,8 +305,10 @@ class Shuffler { Ops.targetSize() && // There is a target slot at this position. !Ops.targetIsArbitrary( Offset)) { // And that target slot is not arbitrary. - if (!dupDeepSlotIfRequired(Ops)) - assert(bringUpTargetSlot(Ops, Offset)); + if (!dupDeepSlotIfRequired(Ops)) { + [[maybe_unused]] bool Res = bringUpTargetSlot(Ops, Offset); + assert(Res); + } return true; } @@ -326,8 +330,10 @@ class Shuffler { // If we still need more slots, produce a suitable one. if (Ops.sourceSize() < Ops.targetSize()) { - if (!dupDeepSlotIfRequired(Ops)) - assert(bringUpTargetSlot(Ops, Ops.sourceSize())); + if (!dupDeepSlotIfRequired(Ops)) { + [[maybe_unused]] bool Res = bringUpTargetSlot(Ops, Ops.sourceSize()); + assert(Res); + } return true; } diff --git a/llvm/lib/Target/EVM/EVMTargetMachine.cpp b/llvm/lib/Target/EVM/EVMTargetMachine.cpp index ccbe26b43389..46ac8a52e607 100644 --- a/llvm/lib/Target/EVM/EVMTargetMachine.cpp +++ b/llvm/lib/Target/EVM/EVMTargetMachine.cpp @@ -56,7 +56,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMTarget() { initializeEVMSingleUseExpressionPass(PR); initializeEVMSplitCriticalEdgesPass(PR); initializeEVMStackifyPass(PR); - initializeEVMStackifyEFPass(PR); + initializeEVMBPStackificationPass(PR); } static std::string computeDataLayout() { @@ -208,7 +208,7 @@ void EVMPassConfig::addPreEmitPass() { addPass(createEVMRegColoring()); addPass(createEVMStackify()); } else { - addPass(createEVMStackifyEF()); + addPass(createEVMBPStackification()); } } } diff --git a/llvm/lib/Target/EVM/EVMTargetTransformInfo.h b/llvm/lib/Target/EVM/EVMTargetTransformInfo.h index a0f0f62168b1..0f1b41d429f6 100644 --- a/llvm/lib/Target/EVM/EVMTargetTransformInfo.h +++ b/llvm/lib/Target/EVM/EVMTargetTransformInfo.h @@ -35,7 +35,7 @@ class EVMTTIImpl final : public BasicTTIImplBase { const EVMTargetLowering *getTLI() const { return TLI; } public: - enum SyncVMRegisterClass { Vector /* Unsupported */, GPR }; + enum EVMRegisterClass { Vector /* Unsupported */, GPR }; EVMTTIImpl(const EVMTargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), @@ -100,9 +100,6 @@ class EVMTTIImpl final : public BasicTTIImplBase { OpsOut.push_back(Type::getIntNTy(Context, RemainingBytes * 8)); } - // TODO: The value is copied from SyncVM, needs to be checked. - unsigned getInliningThresholdMultiplier() const { return 11; } - void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE); diff --git a/llvm/test/CodeGen/EVM/stack-ops-commutable.ll b/llvm/test/CodeGen/EVM/stack-ops-commutable.ll index 9a14eb4c59cb..93f9e1c34938 100644 --- a/llvm/test/CodeGen/EVM/stack-ops-commutable.ll +++ b/llvm/test/CodeGen/EVM/stack-ops-commutable.ll @@ -8,7 +8,6 @@ define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) nor ; CHECK-LABEL: no_manipulations_needed_with_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP1 ; CHECK-NEXT: ADD ; CHECK-NEXT: PUSH0 ; CHECK-NEXT: REVERT @@ -17,6 +16,75 @@ define void @no_manipulations_needed_with_junk(i256 %a1, i256 %a2, i256 %a3) nor unreachable } +define void @no_manipulations_needed_with_junk_eq(i256 %a1, i256 %a2, i256 %a3) noreturn { + %cmp = icmp eq i256 %a1, %a2 + %x1 = zext i1 %cmp to i256 + call void @llvm.evm.revert(ptr addrspace(1) null, i256 %x1) + unreachable + +; CHECK-LABEL: no_manipulations_needed_with_junk_eq: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: EQ +; CHECK-NEXT: PUSH0 +; CHECK-NEXT: REVERT +} + +define i256 @no_manipulations_needed_no_junk_addmod(i256 %a1, i256 %a2, i256 %a3) { +; CHECK-LABEL: no_manipulations_needed_no_junk_addmod: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: ADDMOD +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = call i256 @llvm.evm.addmod(i256 %a2, i256 %a1, i256 %a3) + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_mulmod(i256 %a1, i256 %a2, i256 %a3) { +; CHECK-LABEL: no_manipulations_needed_no_junk_mulmod: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: MULMOD +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = call i256 @llvm.evm.mulmod(i256 %a2, i256 %a1, i256 %a3) + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_and(i256 %a1, i256 %a2) { +; CHECK-LABEL: no_manipulations_needed_no_junk_and: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: AND +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = and i256 %a2, %a1 + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_or(i256 %a1, i256 %a2) { +; CHECK-LABEL: no_manipulations_needed_no_junk_or: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: OR +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = or i256 %a2, %a1 + ret i256 %x1 +} + +define i256 @no_manipulations_needed_no_junk_xor(i256 %a1, i256 %a2) { +; CHECK-LABEL: no_manipulations_needed_no_junk_xor: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: XOR +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP + %x1 = xor i256 %a2, %a1 + ret i256 %x1 +} + define i256 @no_manipulations_needed_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: no_manipulations_needed_no_junk: ; CHECK: ; %bb.0: @@ -34,7 +102,6 @@ define void @reorder_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn { ; CHECK-LABEL: reorder_with_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP1 ; CHECK-NEXT: ADD ; CHECK-NEXT: PUSH0 ; CHECK-NEXT: REVERT @@ -61,7 +128,6 @@ define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 ; CHECK-NEXT: ADD ; CHECK-NEXT: PUSH0 ; CHECK-NEXT: REVERT @@ -70,6 +136,20 @@ define void @swap_first_with_junk(i256 %a1, i256 %a2, i256 %a3) noreturn { unreachable } +define i256 @two_commutable(i256 %a1, i256 %a2, i256 %a3) { + %x1 = add i256 %a3, %a2 + %x2 = add i256 %a1, %x1 + ret i256 %x2 +; CHECK-LABEL: two_commutable: +; CHECK: ; %bb.0: +; CHECK-NEXT: JUMPDEST +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: ADD +; CHECK-NEXT: ADD +; CHECK-NEXT: SWAP1 +; CHECK-NEXT: JUMP +} + define void @swap_second_with_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) noreturn { ; CHECK-LABEL: swap_second_with_junk: ; CHECK: ; %bb.0: @@ -87,7 +167,6 @@ define i256 @swap_first_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwind ; CHECK-LABEL: swap_first_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP3 ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: POP ; CHECK-NEXT: POP @@ -102,7 +181,6 @@ define i256 @swap_second_no_junk(i256 %a1, i256 %a2, i256 %a3, i256 %a4) nounwin ; CHECK-LABEL: swap_second_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP3 ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: POP ; CHECK-NEXT: POP @@ -179,11 +257,10 @@ define i256 @second_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: DUP2 +; CHECK-NEXT: PUSH1 4 ; CHECK-NEXT: SWAP3 +; CHECK-NEXT: SWAP4 ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: PUSH1 4 -; CHECK-NEXT: SWAP2 ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: SUB @@ -220,10 +297,10 @@ define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: both_arg_alive_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP2 +; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 ; CHECK-NEXT: POP ; CHECK-NEXT: DUP2 -; CHECK-NEXT: DUP2 ; CHECK-NEXT: DIV ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: ADD @@ -241,9 +318,9 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: DUP2 @@ -255,4 +332,40 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ret i256 %x1 } +define void @commutable_not_in_function_entry() noreturn { + +; CHECK-LABEL: .BB{{[0-9]+}}_3: +; CHECK: JUMPDEST +; CHECK-NEXT: PUSH4 4294967295 +; CHECK-NEXT: AND +; CHECK-NEXT: PUSH0 + +enter: + %offset = inttoptr i256 0 to ptr addrspace(2) + %load = call i256 @llvm.evm.calldataload(ptr addrspace(2) %offset) + %calldata = trunc i256 %load to i32 + br label %header + +header: + %phi = phi i32 [ %calldata, %enter ], [ %inc, %do ] + %phi2 = phi i32 [ 1, %enter ], [ %mul, %do ] + %cmp = icmp sgt i32 %phi, 0 + br i1 %cmp, label %do, label %exit + +do: + %mul = mul nsw i32 %phi2, %phi + %inc = add nsw i32 %phi, -1 + br label %header + +exit: + %res = zext i32 %phi2 to i256 + store i256 %res, ptr addrspace(1) null, align 4 + call void @llvm.evm.return(ptr addrspace(1) null, i256 32) + unreachable +} + +declare i256 @llvm.evm.addmod(i256, i256, i256) +declare i256 @llvm.evm.mulmod(i256, i256, i256) +declare i256 @llvm.evm.calldataload(ptr addrspace(2)) +declare void @llvm.evm.return(ptr addrspace(1), i256) declare void @llvm.evm.revert(ptr addrspace(1), i256) diff --git a/llvm/test/CodeGen/EVM/stack-ops.ll b/llvm/test/CodeGen/EVM/stack-ops.ll index 92dfaf24887f..40fe299cf9f8 100644 --- a/llvm/test/CodeGen/EVM/stack-ops.ll +++ b/llvm/test/CodeGen/EVM/stack-ops.ll @@ -247,10 +247,10 @@ define i256 @both_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: both_arg_alive_no_junk: ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST -; CHECK-NEXT: SWAP2 +; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 ; CHECK-NEXT: POP ; CHECK-NEXT: DUP2 -; CHECK-NEXT: DUP2 ; CHECK-NEXT: DIV ; CHECK-NEXT: SWAP2 ; CHECK-NEXT: SUB @@ -268,9 +268,9 @@ define i256 @same_arg_dead_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: DUP2 @@ -287,9 +287,9 @@ define i256 @same_arg_dead_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: JUMP @@ -302,10 +302,10 @@ define i256 @same_arg_alive_with_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 @@ -324,10 +324,10 @@ define i256 @same_arg_alive_no_junk(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK: ; %bb.0: ; CHECK-NEXT: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP3 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 diff --git a/llvm/test/CodeGen/EVM/unused_function_arguments.ll b/llvm/test/CodeGen/EVM/unused_function_arguments.ll index 0aa142b88b10..50e68f31e91c 100644 --- a/llvm/test/CodeGen/EVM/unused_function_arguments.ll +++ b/llvm/test/CodeGen/EVM/unused_function_arguments.ll @@ -22,9 +22,9 @@ define i256 @wat(i256 %a1, i256 %a2, i256 %a3) nounwind { ; CHECK-LABEL: @wat ; CHECK: JUMPDEST ; CHECK-NEXT: POP -; CHECK-NEXT: SWAP1 -; CHECK-NEXT: POP ; CHECK-NEXT: DUP1 +; CHECK-NEXT: SWAP2 +; CHECK-NEXT: POP ; CHECK-NEXT: ADD ; CHECK-NEXT: SWAP1 ; CHECK-NEXT: JUMP diff --git a/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll index c3fb54e3dd53..e2760d6066ac 100644 --- a/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll +++ b/llvm/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll @@ -1,4 +1,6 @@ -; XFAIL: target=eravm{{.*}}, target=evm{{.*}} +; XFAIL: target=eravm{{.*}} +; UNSUPPORTED: target=evm{{.*}} + ; TODO: CPR-920 support operators ; RUN: llc < %s ; PR2603