From 1edd22030ccb9603f21d13150847ea40a4136d45 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 29 Sep 2024 10:37:56 +0100 Subject: [PATCH] [LV] Retrieve reduction resume values directly for epilogue vec. (NFC) Use the reduction resume values from the phis in the scalar header, instead of collecting them in a map. This removes some complexity from the general executePlan code paths and pushes it to only the epilogue vectorization part. --- .../Vectorize/LoopVectorizationPlanner.h | 9 +++---- .../Transforms/Vectorize/LoopVectorize.cpp | 26 ++++++------------- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 00eec0a6f7b14e..f24cd43a93bc7d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -435,11 +435,10 @@ class LoopVectorizationPlanner { /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop /// to re-use expansion results generated during main plan execution. /// - /// Returns a mapping of SCEVs to their expanded IR values and a mapping for - /// the reduction resume values. Note that this is a temporary workaround - /// needed due to the current epilogue handling. - std::pair, - DenseMap> + /// Returns a mapping of SCEVs to their expanded IR values. + /// Note that this is a temporary workaround needed due to the current + /// epilogue handling. + DenseMap executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool IsEpilogueVectorization, diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d2a7abf4a8d9a7..cb346be8ffe5e2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -676,11 +676,6 @@ class InnerLoopVectorizer { /// Structure to hold information about generated runtime checks, responsible /// for cleaning the checks, if vectorization turns out unprofitable. GeneratedRTChecks &RTChecks; - - // Holds the resume values for reductions in the loops, used to set the - // correct start value of reduction PHIs when vectorizing the epilogue. - SmallMapVector - ReductionResumeValues; }; /// Encapsulate information regarding vectorization of a loop and its epilogue. @@ -7426,10 +7421,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { } // Check if \p RedResult is a ComputeReductionResult instruction, and if it is -// create a merge phi node for it and add it to \p ReductionResumeValues. +// create a merge phi node for it. static void createAndCollectMergePhiForReduction( VPInstruction *RedResult, - DenseMap &ReductionResumeValues, VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) { if (!RedResult || @@ -7487,13 +7481,9 @@ static void createAndCollectMergePhiForReduction( OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); - - ReductionResumeValues[&RdxDesc] = BCBlockPhi; } -std::pair, - DenseMap> -LoopVectorizationPlanner::executePlan( +DenseMap LoopVectorizationPlanner::executePlan( ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization, const DenseMap *ExpandedSCEVs) { @@ -7579,12 +7569,11 @@ LoopVectorizationPlanner::executePlan( BestVPlan.execute(&State); // 2.5 Collect reduction resume values. - DenseMap ReductionResumeValues; auto *ExitVPBB = cast(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); for (VPRecipeBase &R : *ExitVPBB) { createAndCollectMergePhiForReduction( - dyn_cast(&R), ReductionResumeValues, State, OrigLoop, + dyn_cast(&R), State, OrigLoop, State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); } @@ -7634,7 +7623,7 @@ LoopVectorizationPlanner::executePlan( setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false); } - return {State.ExpandedSCEVs, ReductionResumeValues}; + return State.ExpandedSCEVs; } //===--------------------------------------------------------------------===// @@ -10121,8 +10110,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { EPI, &LVL, &CM, BFI, PSI, Checks); std::unique_ptr BestMainPlan(BestPlan.duplicate()); - const auto &[ExpandedSCEVs, ReductionResumeValues] = LVP.executePlan( - EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, true); + auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, + *BestMainPlan, MainILV, DT, true); ++LoopsVectorized; // Second pass vectorizes the epilogue and adjusts the control flow @@ -10167,10 +10156,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast(&R)) { + ResumeV = cast(ReductionPhi->getUnderlyingInstr()) + ->getIncomingValueForBlock(L->getLoopPreheader()); const RecurrenceDescriptor &RdxDesc = ReductionPhi->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); - ResumeV = ReductionResumeValues.find(&RdxDesc)->second; if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { // VPReductionPHIRecipes for AnyOf reductions expect a boolean as // start value; compare the final value from the main vector loop