From 34a5327e2fef63f00c743cc2634d74593ed34a4f Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Tue, 19 Mar 2024 15:29:45 -0700
Subject: [PATCH] Clang-Tidy 17: cppcoreguidelines-missing-std-forward (#3818)

---
 Src/Base/AMReX_BaseFabUtility.H               |  4 +-
 Src/Base/AMReX_CTOParallelForImpl.H           | 12 ++---
 Src/Base/AMReX_FabArrayUtility.H              | 36 ++++++-------
 Src/Base/AMReX_GpuAtomic.H                    |  4 +-
 Src/Base/AMReX_GpuContainers.H                |  2 +-
 Src/Base/AMReX_GpuLaunch.H                    |  4 +-
 Src/Base/AMReX_GpuLaunchFunctsC.H             | 28 +++++------
 Src/Base/AMReX_GpuLaunchFunctsG.H             | 50 +++++++++----------
 Src/Base/AMReX_Loop.H                         | 34 ++++++-------
 Src/Base/AMReX_MFParallelForC.H               |  4 +-
 Src/Base/AMReX_MFParallelForG.H               |  2 +-
 Src/Base/AMReX_MultiFabUtil.H                 |  8 +--
 Src/Base/AMReX_PhysBCFunct.H                  |  4 +-
 Src/Base/AMReX_Reduce.H                       | 42 ++++++++--------
 Src/Base/AMReX_RungeKutta.H                   | 12 ++---
 Src/Base/AMReX_Scan.H                         | 10 ++--
 Src/Base/AMReX_Tuple.H                        |  6 +--
 Src/Base/AMReX_TypeList.H                     |  4 +-
 Src/Base/Parser/AMReX_Parser_Y.cpp            |  2 +-
 Src/EB/AMReX_EB2_IF_Intersection.H            |  8 +--
 Src/EB/AMReX_EB2_IF_Union.H                   |  8 +--
 .../MLMG/AMReX_MLEBNodeFDLap_2D_K.H           |  4 +-
 .../MLMG/AMReX_MLEBNodeFDLap_3D_K.H           |  2 +-
 .../MLMG/AMReX_MLEBNodeFDLap_K.H              |  6 +--
 .../MLMG/AMReX_MLEBNodeFDLaplacian.H          |  4 +-
 Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H |  4 +-
 Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H |  4 +-
 Src/Particle/AMReX_DenseBins.H                | 12 ++---
 Src/Particle/AMReX_ParticleCommunication.H    |  4 +-
 Src/Particle/AMReX_ParticleContainer.H        |  2 +-
 Src/Particle/AMReX_ParticleContainerI.H       |  2 +-
 Src/Particle/AMReX_ParticleLocator.H          |  4 +-
 Src/Particle/AMReX_ParticleMesh.H             |  4 +-
 Src/Particle/AMReX_ParticleReduce.H           | 12 ++---
 Src/Particle/AMReX_ParticleTransformation.H   | 25 ++++++----
 Src/Particle/AMReX_ParticleUtil.H             |  6 +--
 Src/Particle/AMReX_SparseBins.H               |  2 +-
 Src/Particle/AMReX_WriteBinaryParticleData.H  |  6 +--
 38 files changed, 195 insertions(+), 192 deletions(-)
diff --git a/Src/Base/AMReX_BaseFabUtility.H b/Src/Base/AMReX_BaseFabUtility.H
index e7449a11bf1..c0b8cf26c45 100644
--- a/Src/Base/AMReX_BaseFabUtility.H
+++ b/Src/Base/AMReX_BaseFabUtility.H
@@ -26,7 +26,7 @@ template <typename STRUCT, typename F,
                                   AMREX_IS_TRIVIALLY_COPYABLE(STRUCT) &&
                                   std::is_trivially_destructible<STRUCT>::value,
                                   int>::type FOO = 0>
-void fill (BaseFab<STRUCT>& aos_fab, F && f)
+void fill (BaseFab<STRUCT>& aos_fab, F const& f)
 {
     Box const& box = aos_fab.box();
     auto const& aos = aos_fab.array();
@@ -88,7 +88,7 @@ void fill (BaseFab<STRUCT>& aos_fab, F && f)
     } else
 #endif
     {
-        amrex::LoopOnCpu(box, [=] (int i, int j, int k) noexcept
+        amrex::LoopOnCpu(box, [&] (int i, int j, int k) noexcept
         {
             f(aos(i,j,k), i, j, k);
         });
diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H
index 73ca3a25e87..3157dad63f0 100644
--- a/Src/Base/AMReX_CTOParallelForImpl.H
+++ b/Src/Base/AMReX_CTOParallelForImpl.H
@@ -28,7 +28,7 @@ namespace detail
 {
     template <int MT, typename T, class F, typename... As>
     std::enable_if_t<std::is_integral<T>::value || std::is_same<T,Box>::value, bool>
-    ParallelFor_helper2 (T const& N, F&& f, TypeList<As...>,
+    ParallelFor_helper2 (T const& N, F const& f, TypeList<As...>,
                          std::array<int,sizeof...(As)> const& runtime_options)
     {
         if (runtime_options == std::array<int,sizeof...(As)>{As::value...}) {
@@ -51,7 +51,7 @@ namespace detail
 
     template <int MT, typename T, class F, typename... As>
     std::enable_if_t<std::is_integral<T>::value, bool>
-    ParallelFor_helper2 (Box const& box, T ncomp, F&& f, TypeList<As...>,
+    ParallelFor_helper2 (Box const& box, T ncomp, F const& f, TypeList<As...>,
                          std::array<int,sizeof...(As)> const& runtime_options)
     {
         if (runtime_options == std::array<int,sizeof...(As)>{As::value...}) {
@@ -67,11 +67,11 @@ namespace detail
 
     template <int MT, typename T, class F, typename... PPs, typename RO>
     std::enable_if_t<std::is_integral<T>::value || std::is_same<T,Box>::value>
-    ParallelFor_helper1 (T const& N, F&& f, TypeList<PPs...>,
+    ParallelFor_helper1 (T const& N, F const& f, TypeList<PPs...>,
                          RO const& runtime_options)
     {
         bool found_option = (false || ... ||
-                             ParallelFor_helper2<MT>(N, std::forward<F>(f),
+                             ParallelFor_helper2<MT>(N, f,
                                                      PPs{}, runtime_options));
         amrex::ignore_unused(found_option);
         AMREX_ASSERT(found_option);
@@ -79,11 +79,11 @@ namespace detail
 
     template <int MT, typename T, class F, typename... PPs, typename RO>
     std::enable_if_t<std::is_integral<T>::value>
-    ParallelFor_helper1 (Box const& box, T ncomp, F&& f, TypeList<PPs...>,
+    ParallelFor_helper1 (Box const& box, T ncomp, F const& f, TypeList<PPs...>,
                          RO const& runtime_options)
     {
         bool found_option = (false || ... ||
-                             ParallelFor_helper2<MT>(box, ncomp, std::forward<F>(f),
+                             ParallelFor_helper2<MT>(box, ncomp, f,
                                                      PPs{}, runtime_options));
         amrex::ignore_unused(found_option);
         AMREX_ASSERT(found_option);
diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H
index 311e1fab43c..5fe87e56365 100644
--- a/Src/Base/AMReX_FabArrayUtility.H
+++ b/Src/Base/AMReX_FabArrayUtility.H
@@ -21,7 +21,7 @@ namespace fudetail {
 template <class FAB, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
 typename FAB::value_type
-ReduceSum_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceSum_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB::value_type;
     value_type sm = 0;
@@ -47,7 +47,7 @@ std::enable_if_t<IsBaseFab<FAB>::value,
                  std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
                                     std::is_same<OP,ReduceOpLogicalOr>::value,
                                     int, typename FAB::value_type> >
-ReduceMF (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceMF (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
 {
     using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
                                  std::is_same<OP,ReduceOpLogicalOr>::value,
@@ -66,7 +66,7 @@ std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value,
                  std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
                                     std::is_same<OP,ReduceOpLogicalOr>::value,
                                     int, typename FAB1::value_type> >
-ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, IntVect const& nghost, F&& f)
+ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2, IntVect const& nghost, F const& f)
 {
     using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
                                  std::is_same<OP,ReduceOpLogicalOr>::value,
@@ -88,7 +88,7 @@ std::enable_if_t<IsBaseFab<FAB1>::value && IsBaseFab<FAB2>::value && IsBaseFab<F
                                     std::is_same<OP,ReduceOpLogicalOr>::value,
                                     int, typename FAB1::value_type> >
 ReduceMF (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-          FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
+          FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
 {
     using T = std::conditional_t<std::is_same<OP,ReduceOpLogicalAnd>::value ||
                                  std::is_same<OP,ReduceOpLogicalOr>::value,
@@ -156,7 +156,7 @@ template <class FAB1, class FAB2, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 typename FAB1::value_type
 ReduceSum_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                IntVect const& nghost, F&& f)
+                IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB1::value_type;
     value_type sm = 0;
@@ -234,7 +234,7 @@ template <class FAB1, class FAB2, class FAB3, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 typename FAB1::value_type
 ReduceSum_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
+                FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB1::value_type;
     value_type sm = 0;
@@ -311,7 +311,7 @@ namespace fudetail {
 template <class FAB, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
 typename FAB::value_type
-ReduceMin_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceMin_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB::value_type;
     value_type r = std::numeric_limits<value_type>::max();
@@ -382,7 +382,7 @@ template <class FAB1, class FAB2, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 typename FAB1::value_type
 ReduceMin_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                IntVect const& nghost, F&& f)
+                IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB1::value_type;
     value_type r = std::numeric_limits<value_type>::max();
@@ -460,7 +460,7 @@ template <class FAB1, class FAB2, class FAB3, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 typename FAB1::value_type
 ReduceMin_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
+                FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB1::value_type;
     value_type r = std::numeric_limits<value_type>::max();
@@ -537,7 +537,7 @@ namespace fudetail {
 template <class FAB, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
 typename FAB::value_type
-ReduceMax_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceMax_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB::value_type;
     value_type r = std::numeric_limits<value_type>::lowest();
@@ -609,7 +609,7 @@ template <class FAB1, class FAB2, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 typename FAB1::value_type
 ReduceMax_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                IntVect const& nghost, F&& f)
+                IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB1::value_type;
     value_type r = std::numeric_limits<value_type>::lowest();
@@ -687,7 +687,7 @@ template <class FAB1, class FAB2, class FAB3, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 typename FAB1::value_type
 ReduceMax_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                FabArray<FAB3> const& fa3, IntVect const& nghost, F&& f)
+                FabArray<FAB3> const& fa3, IntVect const& nghost, F const& f)
 {
     using value_type = typename FAB1::value_type;
     value_type r = std::numeric_limits<value_type>::lowest();
@@ -764,7 +764,7 @@ namespace fudetail {
 template <class FAB, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
 bool
-ReduceLogicalAnd_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceLogicalAnd_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
 {
     int r = true;
 
@@ -836,7 +836,7 @@ template <class FAB1, class FAB2, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 bool
 ReduceLogicalAnd_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                       IntVect const& nghost, F&& f)
+                       IntVect const& nghost, F const& f)
 {
     int r = true;
 
@@ -911,7 +911,7 @@ namespace fudetail {
 template <class FAB, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
 bool
-ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F const& f)
 {
     int r = false;
 
@@ -940,9 +940,9 @@ ReduceLogicalOr_host_wrapper (FabArray<FAB> const& fa, IntVect const& nghost, F&
 
 template <class FAB, class F>
 std::enable_if_t<amrex::DefinitelyNotHostRunnable<F>::value, bool>
-ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& f)
+ReduceLogicalOr_host (FabArray<FAB> const& fa, IntVect const& nghost, F&& /*f*/)
 {
-    amrex::ignore_unused(fa,nghost,f);
+    amrex::ignore_unused(fa,nghost);
     amrex::Abort("ReduceLogicalOr: Launch Region is off. Device lambda cannot be called by host.");
     return 0;
 }
@@ -983,7 +983,7 @@ template <class FAB1, class FAB2, class F,
           class bar = std::enable_if_t<IsBaseFab<FAB1>::value> >
 bool
 ReduceLogicalOr_host (FabArray<FAB1> const& fa1, FabArray<FAB2> const& fa2,
-                      IntVect const& nghost, F&& f)
+                      IntVect const& nghost, F const& f)
 {
     int r = false;
 
diff --git a/Src/Base/AMReX_GpuAtomic.H b/Src/Base/AMReX_GpuAtomic.H
index 9adc655298e..0a056ab5c38 100644
--- a/Src/Base/AMReX_GpuAtomic.H
+++ b/Src/Base/AMReX_GpuAtomic.H
@@ -258,8 +258,8 @@ namespace detail {
         ))
         AMREX_IF_ON_HOST((
             T old = *add;
-            T const tmp = op(old, value);
-            if (cond(tmp)) {
+            T const tmp = std::forward<Op>(op)(old, value);
+            if (std::forward<Cond>(cond)(tmp)) {
                 *add = tmp;
                 return true;
             } else {
diff --git a/Src/Base/AMReX_GpuContainers.H b/Src/Base/AMReX_GpuContainers.H
index 9ecb144b58c..012941b0055 100644
--- a/Src/Base/AMReX_GpuContainers.H
+++ b/Src/Base/AMReX_GpuContainers.H
@@ -407,7 +407,7 @@ namespace amrex::Gpu {
                                std::is_trivially_copyable_v<T> &&
                                amrex::IsCallable<F, T&, Long>::value,
                                int> FOO = 0>
-    void fillAsync (IT first, IT last, F&& f) noexcept
+    void fillAsync (IT first, IT last, F const& f) noexcept
     {
         auto N = static_cast<Long>(std::distance(first, last));
         if (N <= 0) { return; }
diff --git a/Src/Base/AMReX_GpuLaunch.H b/Src/Base/AMReX_GpuLaunch.H
index 620659ff43f..435a11f342b 100644
--- a/Src/Base/AMReX_GpuLaunch.H
+++ b/Src/Base/AMReX_GpuLaunch.H
@@ -62,11 +62,11 @@ namespace amrex {
 // CPU variation
 
     template<class L>
-    void launch_host (L&& f0) noexcept { f0(); }
+    void launch_host (L&& f0) noexcept { std::forward<L>(f0)(); }
 
     template<class L, class... Lambdas>
     void launch_host (L&& f0, Lambdas&&... fs) noexcept {
-        f0();
+        std::forward<L>(f0)();
         launch_host(std::forward<Lambdas>(fs)...);
     }
 
diff --git a/Src/Base/AMReX_GpuLaunchFunctsC.H b/Src/Base/AMReX_GpuLaunchFunctsC.H
index 6ce9cca0f3a..a309b4b6f32 100644
--- a/Src/Base/AMReX_GpuLaunchFunctsC.H
+++ b/Src/Base/AMReX_GpuLaunchFunctsC.H
@@ -57,18 +57,18 @@ namespace detail {
 template<typename T, typename L>
 void launch (T const& n, L&& f) noexcept
 {
-    f(n);
+    std::forward<L>(f)(n);
 }
 
 template<int MT, typename T, typename L>
 void launch (T const& n, L&& f) noexcept
 {
     amrex::ignore_unused(MT);
-    f(n);
+    std::forward<L>(f)(n);
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void For (T n, L&& f) noexcept
+void For (T n, L const& f) noexcept
 {
     for (T i = 0; i < n; ++i) {
         detail::call_f(f,i);
@@ -96,7 +96,7 @@ void For (Gpu::KernelInfo const&, T n, L&& f) noexcept
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelFor (T n, L&& f) noexcept
+void ParallelFor (T n, L const& f) noexcept
 {
     AMREX_PRAGMA_SIMD
     for (T i = 0; i < n; ++i) {
@@ -125,7 +125,7 @@ void ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept
 }
 
 template <typename L>
-void For (Box const& box, L&& f) noexcept
+void For (Box const& box, L const& f) noexcept
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
@@ -157,7 +157,7 @@ void For (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept
 }
 
 template <typename L>
-void ParallelFor (Box const& box, L&& f) noexcept
+void ParallelFor (Box const& box, L const& f) noexcept
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
@@ -190,7 +190,7 @@ void ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void For (Box const& box, T ncomp, L&& f) noexcept
+void For (Box const& box, T ncomp, L const& f) noexcept
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
@@ -224,7 +224,7 @@ void For (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelFor (Box const& box, T ncomp, L&& f) noexcept
+void ParallelFor (Box const& box, T ncomp, L const& f) noexcept
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
@@ -432,14 +432,14 @@ void ParallelFor (Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept
 template <typename L1, typename L2>
 void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept
 {
-    ParallelFor(box1,box2,f1,f2);
+    ParallelFor(box1,box2,std::forward<L1>(f1),std::forward<L2>(f2));
 }
 
 template <int MT, typename L1, typename L2>
 void ParallelFor (Gpu::KernelInfo const&, Box const& box1, Box const& box2, L1&& f1, L2&& f2) noexcept
 {
     amrex::ignore_unused(MT);
-    ParallelFor(box1,box2,f1,f2);
+    ParallelFor(box1,box2,std::forward<L1>(f1),std::forward<L2>(f2));
 }
 
 template <typename L1, typename L2, typename L3>
@@ -1037,7 +1037,7 @@ void HostDeviceFor (Gpu::KernelInfo const&,
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelForRNG (T n, L&& f) noexcept
+void ParallelForRNG (T n, L const& f) noexcept
 {
     for (T i = 0; i < n; ++i) {
         f(i,RandomEngine{});
@@ -1045,7 +1045,7 @@ void ParallelForRNG (T n, L&& f) noexcept
 }
 
 template <typename L>
-void ParallelForRNG (Box const& box, L&& f) noexcept
+void ParallelForRNG (Box const& box, L const& f) noexcept
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
@@ -1057,7 +1057,7 @@ void ParallelForRNG (Box const& box, L&& f) noexcept
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept
+void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
@@ -1073,7 +1073,7 @@ void ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept
 template <typename L>
 void single_task (L&& f) noexcept
 {
-    f();
+    std::forward<L>(f)();
 }
 
 }
diff --git a/Src/Base/AMReX_GpuLaunchFunctsG.H b/Src/Base/AMReX_GpuLaunchFunctsG.H
index 5e28ba20884..07f28d0944a 100644
--- a/Src/Base/AMReX_GpuLaunchFunctsG.H
+++ b/Src/Base/AMReX_GpuLaunchFunctsG.H
@@ -7,7 +7,7 @@ namespace amrex {
 #ifdef AMREX_USE_SYCL
 
 template <typename L>
-void single_task (gpuStream_t stream, L&& f) noexcept
+void single_task (gpuStream_t stream, L const& f) noexcept
 {
     auto& q = *(stream.queue);
     try {
@@ -21,7 +21,7 @@ void single_task (gpuStream_t stream, L&& f) noexcept
 
 template<typename L>
 void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes,
-             gpuStream_t stream, L&& f) noexcept
+             gpuStream_t stream, L const& f) noexcept
 {
     const auto nthreads_total = std::size_t(nthreads_per_block) * nblocks;
     const std::size_t shared_mem_numull = (shared_mem_bytes+sizeof(unsigned long long)-1)
@@ -45,7 +45,7 @@ void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes,
 }
 
 template<typename L>
-void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noexcept
+void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L const& f) noexcept
 {
     const auto nthreads_total = std::size_t(nthreads_per_block) * nblocks;
     auto& q = *(stream.queue);
@@ -66,7 +66,7 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe
 
 template <int MT, typename L>
 void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream,
-             L&& f) noexcept
+             L const& f) noexcept
 {
     const auto nthreads_total = MT * std::size_t(nblocks);
     const std::size_t shared_mem_numull = (shared_mem_bytes+sizeof(unsigned long long)-1)
@@ -91,7 +91,7 @@ void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream,
 }
 
 template <int MT, typename L>
-void launch (int nblocks, gpuStream_t stream, L&& f) noexcept
+void launch (int nblocks, gpuStream_t stream, L const& f) noexcept
 {
     const auto nthreads_total = MT * std::size_t(nblocks);
     auto& q = *(stream.queue);
@@ -112,7 +112,7 @@ void launch (int nblocks, gpuStream_t stream, L&& f) noexcept
 }
 
 template<int MT, typename T, typename L>
-void launch (T const& n, L&& f) noexcept
+void launch (T const& n, L const& f) noexcept
 {
     if (amrex::isEmpty(n)) { return; }
     const auto ec = Gpu::makeExecutionConfig<MT>(n);
@@ -188,7 +188,7 @@ namespace detail {
 }
 
 template <int MT, typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
+void ParallelFor (Gpu::KernelInfo const& info, T n, L const& f) noexcept
 {
     if (amrex::isEmpty(n)) { return; }
     const auto ec = Gpu::makeExecutionConfig<MT>(n);
@@ -236,7 +236,7 @@ void ParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
 }
 
 template <int MT, typename L>
-void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept
+void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     const BoxIndexer indexer(box);
@@ -287,7 +287,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, L&& f) noexcept
 }
 
 template <int MT, typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) noexcept
+void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     const BoxIndexer indexer(box);
@@ -339,7 +339,7 @@ void ParallelFor (Gpu::KernelInfo const& info, Box const& box, T ncomp, L&& f) n
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelForRNG (T n, L&& f) noexcept
+void ParallelForRNG (T n, L const& f) noexcept
 {
     if (amrex::isEmpty(n)) { return; }
     const auto ec = Gpu::ExecutionConfig(n);
@@ -372,7 +372,7 @@ void ParallelForRNG (T n, L&& f) noexcept
 }
 
 template <typename L>
-void ParallelForRNG (Box const& box, L&& f) noexcept
+void ParallelForRNG (Box const& box, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     const BoxIndexer indexer(box);
@@ -408,7 +408,7 @@ void ParallelForRNG (Box const& box, L&& f) noexcept
 }
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
-void ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept
+void ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     const BoxIndexer indexer(box);
@@ -629,7 +629,7 @@ void ParallelFor (Gpu::KernelInfo const& /*info*/,
 // CUDA or HIP
 
 template <typename L>
-void single_task (gpuStream_t stream, L&& f) noexcept
+void single_task (gpuStream_t stream, L const& f) noexcept
 {
     AMREX_LAUNCH_KERNEL(Gpu::Device::warp_size, 1, 1, 0, stream,
                         [=] AMREX_GPU_DEVICE () noexcept {f();});
@@ -638,7 +638,7 @@ void single_task (gpuStream_t stream, L&& f) noexcept
 
 template <int MT, typename L>
 void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream,
-             L&& f) noexcept
+             L const& f) noexcept
 {
     AMREX_LAUNCH_KERNEL(MT, nblocks, MT, shared_mem_bytes, stream,
                         [=] AMREX_GPU_DEVICE () noexcept { f(); });
@@ -646,7 +646,7 @@ void launch (int nblocks, std::size_t shared_mem_bytes, gpuStream_t stream,
 }
 
 template <int MT, typename L>
-void launch (int nblocks, gpuStream_t stream, L&& f) noexcept
+void launch (int nblocks, gpuStream_t stream, L const& f) noexcept
 {
     AMREX_LAUNCH_KERNEL(MT, nblocks, MT, 0, stream,
                         [=] AMREX_GPU_DEVICE () noexcept { f(); });
@@ -655,7 +655,7 @@ void launch (int nblocks, gpuStream_t stream, L&& f) noexcept
 
 template<typename L>
 void launch (int nblocks, int nthreads_per_block, std::size_t shared_mem_bytes,
-             gpuStream_t stream, L&& f) noexcept
+             gpuStream_t stream, L const& f) noexcept
 {
     AMREX_ASSERT(nthreads_per_block <= AMREX_GPU_MAX_THREADS);
     AMREX_LAUNCH_KERNEL(AMREX_GPU_MAX_THREADS, nblocks, nthreads_per_block, shared_mem_bytes,
@@ -670,7 +670,7 @@ void launch (int nblocks, int nthreads_per_block, gpuStream_t stream, L&& f) noe
 }
 
 template<int MT, typename T, typename L>
-void launch (T const& n, L&& f) noexcept
+void launch (T const& n, L const& f) noexcept
 {
     if (amrex::isEmpty(n)) { return; }
     const auto ec = Gpu::makeExecutionConfig<MT>(n);
@@ -735,7 +735,7 @@ namespace detail {
 
 template <int MT, typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
 std::enable_if_t<MaybeDeviceRunnable<L>::value>
-ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept
+ParallelFor (Gpu::KernelInfo const&, T n, L const& f) noexcept
 {
     if (amrex::isEmpty(n)) { return; }
     const auto ec = Gpu::makeExecutionConfig<MT>(n);
@@ -751,7 +751,7 @@ ParallelFor (Gpu::KernelInfo const&, T n, L&& f) noexcept
 
 template <int MT, typename L>
 std::enable_if_t<MaybeDeviceRunnable<L>::value>
-ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept
+ParallelFor (Gpu::KernelInfo const&, Box const& box, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     const BoxIndexer indexer(box);
@@ -770,7 +770,7 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, L&& f) noexcept
 
 template <int MT, typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
 std::enable_if_t<MaybeDeviceRunnable<L>::value>
-ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept
+ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     const BoxIndexer indexer(box);
@@ -788,7 +788,7 @@ ParallelFor (Gpu::KernelInfo const&, Box const& box, T ncomp, L&& f) noexcept
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
 std::enable_if_t<MaybeDeviceRunnable<L>::value>
-ParallelForRNG (T n, L&& f) noexcept
+ParallelForRNG (T n, L const& f) noexcept
 {
     if (amrex::isEmpty(n)) { return; }
     randState_t* rand_state = getRandState();
@@ -809,7 +809,7 @@ ParallelForRNG (T n, L&& f) noexcept
 
 template <typename L>
 std::enable_if_t<MaybeDeviceRunnable<L>::value>
-ParallelForRNG (Box const& box, L&& f) noexcept
+ParallelForRNG (Box const& box, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     randState_t* rand_state = getRandState();
@@ -832,7 +832,7 @@ ParallelForRNG (Box const& box, L&& f) noexcept
 
 template <typename T, typename L, typename M=std::enable_if_t<std::is_integral<T>::value> >
 std::enable_if_t<MaybeDeviceRunnable<L>::value>
-ParallelForRNG (Box const& box, T ncomp, L&& f) noexcept
+ParallelForRNG (Box const& box, T ncomp, L const& f) noexcept
 {
     if (amrex::isEmpty(box)) { return; }
     randState_t* rand_state = getRandState();
@@ -1416,7 +1416,7 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
         amrex::Abort("amrex:: HOST_DEVICE disabled for Intel.  It takes too long to compile");
 #else
         AMREX_PRAGMA_SIMD
-        for (T i = 0; i < n; ++i) f(i);
+        for (T i = 0; i < n; ++i) { f(i); }
 #endif
     }
 }
@@ -1432,7 +1432,7 @@ HostDeviceParallelFor (Gpu::KernelInfo const& info, T n, L&& f) noexcept
         amrex::Abort("amrex:: HOST_DEVICE disabled for Intel.  It takes too long to compile");
 #else
         AMREX_PRAGMA_SIMD
-        for (T i = 0; i < n; ++i) f(i);
+        for (T i = 0; i < n; ++i) { f(i); }
 #endif
     }
 }
diff --git a/Src/Base/AMReX_Loop.H b/Src/Base/AMReX_Loop.H
index 19e1c3e5191..f45a2198c56 100644
--- a/Src/Base/AMReX_Loop.H
+++ b/Src/Base/AMReX_Loop.H
@@ -8,7 +8,7 @@ namespace amrex {
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void Loop (Dim3 lo, Dim3 hi, F&& f) noexcept
+void Loop (Dim3 lo, Dim3 hi, F const& f) noexcept
 {
     for (int k = lo.z; k <= hi.z; ++k) {
     for (int j = lo.y; j <= hi.y; ++j) {
@@ -19,7 +19,7 @@ void Loop (Dim3 lo, Dim3 hi, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void Loop (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
+void Loop (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
 {
     for (int n = 0; n < ncomp; ++n) {
     for (int k = lo.z; k <= hi.z; ++k) {
@@ -31,7 +31,7 @@ void Loop (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void LoopConcurrent (Dim3 lo, Dim3 hi, F&& f) noexcept
+void LoopConcurrent (Dim3 lo, Dim3 hi, F const& f) noexcept
 {
     for (int k = lo.z; k <= hi.z; ++k) {
     for (int j = lo.y; j <= hi.y; ++j) {
@@ -43,7 +43,7 @@ void LoopConcurrent (Dim3 lo, Dim3 hi, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void LoopConcurrent (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
+void LoopConcurrent (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
 {
     for (int n = 0; n < ncomp; ++n) {
     for (int k = lo.z; k <= hi.z; ++k) {
@@ -56,7 +56,7 @@ void LoopConcurrent (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void Loop (Box const& bx, F&& f) noexcept
+void Loop (Box const& bx, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -69,7 +69,7 @@ void Loop (Box const& bx, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void Loop (Box const& bx, int ncomp, F&& f) noexcept
+void Loop (Box const& bx, int ncomp, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -83,7 +83,7 @@ void Loop (Box const& bx, int ncomp, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void LoopConcurrent (Box const& bx, F&& f) noexcept
+void LoopConcurrent (Box const& bx, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -97,7 +97,7 @@ void LoopConcurrent (Box const& bx, F&& f) noexcept
 
 template <class F>
 AMREX_GPU_HOST_DEVICE
-void LoopConcurrent (Box const& bx, int ncomp, F&& f) noexcept
+void LoopConcurrent (Box const& bx, int ncomp, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -116,7 +116,7 @@ void LoopConcurrent (Box const& bx, int ncomp, F&& f) noexcept
 // of the warning, we have to use the functions below for those situations.
 
 template <class F>
-void LoopOnCpu (Dim3 lo, Dim3 hi, F&& f) noexcept
+void LoopOnCpu (Dim3 lo, Dim3 hi, F const& f) noexcept
 {
     for (int k = lo.z; k <= hi.z; ++k) {
     for (int j = lo.y; j <= hi.y; ++j) {
@@ -126,7 +126,7 @@ void LoopOnCpu (Dim3 lo, Dim3 hi, F&& f) noexcept
 }
 
 template <class F>
-void LoopOnCpu (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
+void LoopOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
 {
     for (int n = 0; n < ncomp; ++n) {
     for (int k = lo.z; k <= hi.z; ++k) {
@@ -137,7 +137,7 @@ void LoopOnCpu (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
 }
 
 template <class F>
-void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, F&& f) noexcept
+void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, F const& f) noexcept
 {
     for (int k = lo.z; k <= hi.z; ++k) {
     for (int j = lo.y; j <= hi.y; ++j) {
@@ -148,7 +148,7 @@ void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, F&& f) noexcept
 }
 
 template <class F>
-void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
+void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
 {
     for (int n = 0; n < ncomp; ++n) {
     for (int k = lo.z; k <= hi.z; ++k) {
@@ -160,7 +160,7 @@ void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, int ncomp, F&& f) noexcept
 }
 
 template <class F>
-void LoopOnCpu (Box const& bx, F&& f) noexcept
+void LoopOnCpu (Box const& bx, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -172,7 +172,7 @@ void LoopOnCpu (Box const& bx, F&& f) noexcept
 }
 
 template <class F>
-void LoopOnCpu (Box const& bx, int ncomp, F&& f) noexcept
+void LoopOnCpu (Box const& bx, int ncomp, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -185,7 +185,7 @@ void LoopOnCpu (Box const& bx, int ncomp, F&& f) noexcept
 }
 
 template <class F>
-void LoopConcurrentOnCpu (Box const& bx, F&& f) noexcept
+void LoopConcurrentOnCpu (Box const& bx, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -198,7 +198,7 @@ void LoopConcurrentOnCpu (Box const& bx, F&& f) noexcept
 }
 
 template <class F>
-void LoopConcurrentOnCpu (Box const& bx, int ncomp, F&& f) noexcept
+void LoopConcurrentOnCpu (Box const& bx, int ncomp, F const& f) noexcept
 {
     const auto lo = amrex::lbound(bx);
     const auto hi = amrex::ubound(bx);
@@ -227,7 +227,7 @@ void LoopConcurrentOnCpu (Box const& bx, int ncomp, F&& f) noexcept
 
 template<auto I, auto N, class F>
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
-constexpr void constexpr_for (F&& f)
+constexpr void constexpr_for (F const& f)
 {
     if constexpr (I < N) {
         f(std::integral_constant<decltype(I), I>());
diff --git a/Src/Base/AMReX_MFParallelForC.H b/Src/Base/AMReX_MFParallelForC.H
index cd050b2e331..b2269df1ea1 100644
--- a/Src/Base/AMReX_MFParallelForC.H
+++ b/Src/Base/AMReX_MFParallelForC.H
@@ -10,7 +10,7 @@ namespace amrex::experimental::detail {
 
 template <typename MF, typename F>
 std::enable_if_t<IsFabArray<MF>::value>
-ParallelFor (MF const& mf, IntVect const& nghost, IntVect const& ts, bool dynamic, F&& f)
+ParallelFor (MF const& mf, IntVect const& nghost, IntVect const& ts, bool dynamic, F const& f)
 {
 #ifdef AMREX_USE_OMP
 #pragma omp parallel
@@ -33,7 +33,7 @@ ParallelFor (MF const& mf, IntVect const& nghost, IntVect const& ts, bool dynami
 
 template <typename MF, typename F>
 std::enable_if_t<IsFabArray<MF>::value>
-ParallelFor (MF const& mf, IntVect const& nghost, int ncomp, IntVect const& ts, bool dynamic, F&& f)
+ParallelFor (MF const& mf, IntVect const& nghost, int ncomp, IntVect const& ts, bool dynamic, F const& f)
 {
 #ifdef AMREX_USE_OMP
 #pragma omp parallel
diff --git a/Src/Base/AMReX_MFParallelForG.H b/Src/Base/AMReX_MFParallelForG.H
index 80260ca0eb5..066e46f3b89 100644
--- a/Src/Base/AMReX_MFParallelForG.H
+++ b/Src/Base/AMReX_MFParallelForG.H
@@ -72,7 +72,7 @@ namespace parfor_mf_detail {
 
 template <int MT, typename MF, typename F>
 std::enable_if_t<IsFabArray<MF>::value>
-ParallelFor (MF const& mf, IntVect const& nghost, int ncomp, IntVect const&, bool, F&& f)
+ParallelFor (MF const& mf, IntVect const& nghost, int ncomp, IntVect const&, bool, F const& f)
 {
     const auto& index_array = mf.IndexArray();
     const int nboxes = index_array.size();
diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H
index ca9b1ab7fff..5c05850f346 100644
--- a/Src/Base/AMReX_MultiFabUtil.H
+++ b/Src/Base/AMReX_MultiFabUtil.H
@@ -648,7 +648,7 @@ template <typename F>
 Real
 NormHelper (const MultiFab& x, int xcomp,
             const MultiFab& y, int ycomp,
-            F && f,
+            F const& f,
             int numcomp, IntVect nghost, bool local)
 {
     BL_ASSERT(x.boxArray() == y.boxArray());
@@ -709,8 +709,8 @@ Real
 NormHelper (const MMF& mask,
                const MultiFab& x, int xcomp,
                const MultiFab& y, int ycomp,
-               Pred && pf,
-               F && f,
+               Pred const& pf,
+               F const& f,
                int numcomp, IntVect nghost, bool local)
 {
     BL_ASSERT(x.boxArray() == y.boxArray());
@@ -1036,7 +1036,7 @@ template <typename Op, typename T, typename FAB, typename F,
 #endif
                            , int> FOO>
 BaseFab<T>
-ReduceToPlane (int direction, Box const& domain, FabArray<FAB> const& mf, F&& f)
+ReduceToPlane (int direction, Box const& domain, FabArray<FAB> const& mf, F const& f)
 {
     Box domain2d = domain;
     domain2d.setRange(direction, 0);
diff --git a/Src/Base/AMReX_PhysBCFunct.H b/Src/Base/AMReX_PhysBCFunct.H
index 2d0906f27ad..ff8b73b5f0e 100644
--- a/Src/Base/AMReX_PhysBCFunct.H
+++ b/Src/Base/AMReX_PhysBCFunct.H
@@ -78,7 +78,7 @@ public:
                    int dcomp, int numcomp,
                    Geometry const& geom, Real time,
                    const Vector<BCRec>& bcr, int bcomp,
-                   int orig_comp, FF&& fillfunc);
+                   int orig_comp, FF const& fillfunc);
 
     void nddoit (Box const& bx, FArrayBox& dest,
                  int dcomp, int numcomp,
@@ -355,7 +355,7 @@ GpuBndryFuncFab<F>::ccfcdoit (Box const& bx, FArrayBox& dest,
                               int dcomp, int numcomp,
                               Geometry const& geom, Real time,
                               const Vector<BCRec>& bcr, int bcomp,
-                              int orig_comp, FF&& fillfunc)
+                              int orig_comp, FF const& fillfunc)
 {
     const IntVect& len = bx.length();
 
diff --git a/Src/Base/AMReX_Reduce.H b/Src/Base/AMReX_Reduce.H
index bf49c3df0a8..3477ce0ff60 100644
--- a/Src/Base/AMReX_Reduce.H
+++ b/Src/Base/AMReX_Reduce.H
@@ -366,7 +366,7 @@ public:
 
     // This is public for CUDA
     template <typename I, typename MF, typename D, typename F>
-    void eval_mf (I, MF const& mf, IntVect const& nghost, int ncomp, D& reduce_data, F&&f)
+    void eval_mf (I, MF const& mf, IntVect const& nghost, int ncomp, D& reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
         const int nboxes = mf.local_size();
@@ -491,7 +491,7 @@ public:
     }
 
     template <typename D, typename F>
-    void eval (Box const& box, D & reduce_data, F&& f)
+    void eval (Box const& box, D & reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
         auto const& stream = Gpu::gpuStream();
@@ -551,7 +551,7 @@ public:
 
     template <typename N, typename D, typename F,
               typename M=std::enable_if_t<std::is_integral<N>::value> >
-    void eval (Box const& box, N ncomp, D & reduce_data, F&& f)
+    void eval (Box const& box, N ncomp, D & reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
         auto const& stream = Gpu::gpuStream();
@@ -612,7 +612,7 @@ public:
 
     template <typename N, typename D, typename F,
               typename M=std::enable_if_t<std::is_integral<N>::value> >
-    void eval (N n, D & reduce_data, F&& f)
+    void eval (N n, D & reduce_data, F const& f)
     {
         if (n <= 0) { return; }
         using ReduceTuple = typename D::Type;
@@ -766,7 +766,7 @@ T Sum (N n, T const* v, T init_val = 0)
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-T Sum (N n, F&& f, T init_val = 0)
+T Sum (N n, F const& f, T init_val = 0)
 {
     ReduceOps<ReduceOpSum> reduce_op;
     ReduceData<T> reduce_data(reduce_op);
@@ -789,7 +789,7 @@ T Min (N n, T const* v, T init_val = std::numeric_limits<T>::max())
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-T Min (N n, F&& f, T init_val = std::numeric_limits<T>::max())
+T Min (N n, F const& f, T init_val = std::numeric_limits<T>::max())
 {
     ReduceOps<ReduceOpMin> reduce_op;
     ReduceData<T> reduce_data(reduce_op);
@@ -812,7 +812,7 @@ T Max (N n, T const* v, T init_val = std::numeric_limits<T>::lowest())
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-T Max (N n, F&& f, T init_val = std::numeric_limits<T>::lowest())
+T Max (N n, F const& f, T init_val = std::numeric_limits<T>::lowest())
 {
     ReduceOps<ReduceOpMax> reduce_op;
     ReduceData<T> reduce_data(reduce_op);
@@ -837,7 +837,7 @@ std::pair<T,T> MinMax (N n, T const* v)
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-std::pair<T,T> MinMax (N n, F&& f)
+std::pair<T,T> MinMax (N n, F const& f)
 {
     ReduceOps<ReduceOpMin,ReduceOpMax> reduce_op;
     ReduceData<T,T> reduce_data(reduce_op);
@@ -851,7 +851,7 @@ std::pair<T,T> MinMax (N n, F&& f)
 }
 
 template <typename T, typename N, typename P, typename M=std::enable_if_t<std::is_integral<N>::value> >
-bool AnyOf (N n, T const* v, P&& pred)
+bool AnyOf (N n, T const* v, P const& pred)
 {
     Gpu::LaunchSafeGuard lsg(true);
     Gpu::DeviceScalar<int> ds(0);
@@ -907,7 +907,7 @@ bool AnyOf (N n, T const* v, P&& pred)
 }
 
 template <typename P>
-bool AnyOf (Box const& box, P&& pred)
+bool AnyOf (Box const& box, P const& pred)
 {
     Gpu::LaunchSafeGuard lsg(true);
     Gpu::DeviceScalar<int> ds(0);
@@ -1064,7 +1064,7 @@ public:
 
     template <typename MF, typename D, typename F>
     std::enable_if_t<IsFabArray<MF>::value && IsCallable<F, int, int, int, int>::value>
-    eval (MF const& mf, IntVect const& nghost, D & reduce_data, F&& f)
+    eval (MF const& mf, IntVect const& nghost, D & reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
 #ifdef AMREX_USE_OMP
@@ -1086,7 +1086,7 @@ public:
 
     template <typename MF, typename D, typename F>
     std::enable_if_t<IsFabArray<MF>::value && IsCallable<F, int, int, int, int, int>::value>
-    eval (MF const& mf, IntVect const& nghost, int ncomp, D & reduce_data, F&& f)
+    eval (MF const& mf, IntVect const& nghost, int ncomp, D & reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
 #ifdef AMREX_USE_OMP
@@ -1111,12 +1111,12 @@ public:
     void eval (Box const& box, D & reduce_data, F&& f)
     {
         auto& rr = reduce_data.reference(OpenMP::get_thread_num());
-        call_f<D>(box, rr, f);
+        call_f<D>(box, rr, std::forward<F>(f));
     }
 
     template <typename N, typename D, typename F,
               typename M=std::enable_if_t<std::is_integral<N>::value> >
-    void eval (Box const& box, N ncomp, D & reduce_data, F&& f)
+    void eval (Box const& box, N ncomp, D & reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
         auto& rr = reduce_data.reference(OpenMP::get_thread_num());
@@ -1132,7 +1132,7 @@ public:
 
     template <typename N, typename D, typename F,
               typename M=std::enable_if_t<std::is_integral<N>::value> >
-    void eval (N n, D & reduce_data, F&& f)
+    void eval (N n, D & reduce_data, F const& f)
     {
         using ReduceTuple = typename D::Type;
         auto& rr = reduce_data.reference(OpenMP::get_thread_num());
@@ -1166,7 +1166,7 @@ namespace Reduce {
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-T Sum (N n, F&& f, T init_val = 0)
+T Sum (N n, F const& f, T init_val = 0)
 {
     T r = init_val;
 #ifdef AMREX_USE_OMP
@@ -1186,7 +1186,7 @@ T Sum (N n, T const* v, T init_val = 0)
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-T Min (N n, F&& f, T init_val = std::numeric_limits<T>::max())
+T Min (N n, F const& f, T init_val = std::numeric_limits<T>::max())
 {
     T r = init_val;
 #ifdef AMREX_USE_OMP
@@ -1206,7 +1206,7 @@ T Min (N n, T const* v, T init_val = std::numeric_limits<T>::max())
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-T Max (N n, F&& f, T init_val = std::numeric_limits<T>::lowest())
+T Max (N n, F const& f, T init_val = std::numeric_limits<T>::lowest())
 {
     T r = init_val;
 #ifdef AMREX_USE_OMP
@@ -1226,7 +1226,7 @@ T Max (N n, T const* v, T init_val = std::numeric_limits<T>::lowest())
 
 template <typename T, typename N, typename F,
           typename M=std::enable_if_t<std::is_integral<N>::value> >
-std::pair<T,T> Min (N n, F&& f)
+std::pair<T,T> Min (N n, F const& f)
 {
     T r_min = std::numeric_limits<T>::max();
     T r_max = std::numeric_limits<T>::lowest();
@@ -1250,11 +1250,11 @@ std::pair<T,T> MinMax (N n, T const* v)
 template <typename T, typename N, typename P, typename M=std::enable_if_t<std::is_integral<N>::value> >
 bool AnyOf (N n, T const* v, P&& pred)
 {
-    return std::any_of(v, v+n, pred);
+    return std::any_of(v, v+n, std::forward<P>(pred));
 }
 
 template <typename P>
-bool AnyOf (Box const& box, P&&pred)
+bool AnyOf (Box const& box, P const& pred)
 {
     const auto lo = amrex::lbound(box);
     const auto hi = amrex::ubound(box);
diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H
index d68bf00bfb4..2dc8514f9c6 100644
--- a/Src/Base/AMReX_RungeKutta.H
+++ b/Src/Base/AMReX_RungeKutta.H
@@ -155,8 +155,8 @@ void rk4_update_4 (MF& Unew, MF const& Uold, Array<MF,4> const& rkk, Real dt6)
  * \param post_stage post-processing stage results
  */
 template <typename MF, typename F, typename FB, typename P = PostStageNoOp>
-void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry,
-          P&& post_stage = PostStageNoOp())
+void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F const& frhs, FB const& fillbndry,
+          P const& post_stage = PostStageNoOp())
 {
     BL_PROFILE("RungeKutta2");
 
@@ -193,8 +193,8 @@ void RK2 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry,
  */
 template <typename MF, typename F, typename FB, typename R,
           typename P = PostStageNoOp>
-void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry,
-          R&& store_crse_data, P&& post_stage = PostStageNoOp())
+void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F const& frhs, FB const& fillbndry,
+          R const& store_crse_data, P const& post_stage = PostStageNoOp())
 {
     BL_PROFILE("RungeKutta3");
 
@@ -243,8 +243,8 @@ void RK3 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry,
  */
 template <typename MF, typename F, typename FB, typename R,
           typename P = PostStageNoOp>
-void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F&& frhs, FB&& fillbndry,
-          R&& store_crse_data, P&& post_stage = PostStageNoOp())
+void RK4 (MF& Uold, MF& Unew, Real time, Real dt, F const& frhs, FB const& fillbndry,
+          R const& store_crse_data, P const& post_stage = PostStageNoOp())
 {
     BL_PROFILE("RungeKutta4");
 
diff --git a/Src/Base/AMReX_Scan.H b/Src/Base/AMReX_Scan.H
index 4ed44c8c749..0a11157d540 100644
--- a/Src/Base/AMReX_Scan.H
+++ b/Src/Base/AMReX_Scan.H
@@ -187,7 +187,7 @@ struct BlockStatus<T, false>
 
 #ifndef AMREX_SYCL_NO_MULTIPASS_SCAN
 template <typename T, typename N, typename FIN, typename FOUT, typename TYPE>
-T PrefixSum_mp (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum)
+T PrefixSum_mp (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum)
 {
     if (n <= 0) { return 0; }
     constexpr int nwarps_per_block = 8;
@@ -627,7 +627,7 @@ template <typename T, typename N, typename FIN, typename FOUT, typename TYPE,
           typename M=std::enable_if_t<std::is_integral<N>::value &&
                                       (std::is_same<std::decay_t<TYPE>,Type::Inclusive>::value ||
                                        std::is_same<std::decay_t<TYPE>,Type::Exclusive>::value)> >
-T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum = retSum)
+T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = retSum)
 {
     if (n <= 0) { return 0; }
     constexpr int nwarps_per_block = 4;
@@ -770,7 +770,7 @@ template <typename T, typename N, typename FIN, typename FOUT, typename TYPE,
           typename M=std::enable_if_t<std::is_integral<N>::value &&
                                       (std::is_same<std::decay_t<TYPE>,Type::Inclusive>::value ||
                                        std::is_same<std::decay_t<TYPE>,Type::Exclusive>::value)> >
-T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum = retSum)
+T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = retSum)
 {
     if (n <= 0) { return 0; }
     constexpr int nwarps_per_block = 8;
@@ -903,7 +903,7 @@ template <typename T, typename N, typename FIN, typename FOUT, typename TYPE,
           typename M=std::enable_if_t<std::is_integral<N>::value &&
                                       (std::is_same<std::decay_t<TYPE>,Type::Inclusive>::value ||
                                        std::is_same<std::decay_t<TYPE>,Type::Exclusive>::value)> >
-T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE, RetSum a_ret_sum = retSum)
+T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum a_ret_sum = retSum)
 {
     if (n <= 0) { return 0; }
     constexpr int nwarps_per_block = 4;
@@ -1284,7 +1284,7 @@ template <typename T, typename N, typename FIN, typename FOUT, typename TYPE,
           typename M=std::enable_if_t<std::is_integral<N>::value &&
                                       (std::is_same<std::decay_t<TYPE>,Type::Inclusive>::value ||
                                        std::is_same<std::decay_t<TYPE>,Type::Exclusive>::value)> >
-T PrefixSum (N n, FIN && fin, FOUT && fout, TYPE, RetSum = retSum)
+T PrefixSum (N n, FIN const& fin, FOUT const& fout, TYPE, RetSum = retSum)
 {
     if (n <= 0) { return 0; }
     T totalsum = 0;
diff --git a/Src/Base/AMReX_Tuple.H b/Src/Base/AMReX_Tuple.H
index 93b3136aa6d..fd56d227e7c 100644
--- a/Src/Base/AMReX_Tuple.H
+++ b/Src/Base/AMReX_Tuple.H
@@ -270,7 +270,7 @@ namespace detail {
 
     template <typename R, typename TP1, typename TP2, std::size_t... N1, std::size_t... N2>
     AMREX_GPU_HOST_DEVICE constexpr R
-    make_tuple (TP1 && a, TP2 && b,
+    make_tuple (TP1 const& a, TP2 const& b,
                 std::index_sequence<N1...> const& /*n1*/, std::index_sequence<N2...> const& /*n2*/)
     {
         return R(amrex::get<N1>(a)..., amrex::get<N2>(b)...);
@@ -297,7 +297,7 @@ TupleCat (TP1 && a, TP2 && b) -> typename detail::tuple_cat_result<detail::tuple
     using ReturnType =  typename detail::tuple_cat_result<detail::tuple_decay_t<TP1>,
                                                           detail::tuple_decay_t<TP2> >::type;
     return detail::make_tuple<ReturnType>
-        (a, b,
+        (std::forward<TP1>(a), std::forward<TP2>(b),
          std::make_index_sequence<GpuTupleSize<typename std::decay<TP1>::type>::value>(),
          std::make_index_sequence<GpuTupleSize<typename std::decay<TP2>::type>::value>());
 }
@@ -345,7 +345,7 @@ namespace detail {
     apply_impl (F&& f, TP&& t, std::index_sequence<N...> /*is*/)
         -> typename detail::apply_result<F,detail::tuple_decay_t<TP> >::type
     {
-        return f(amrex::get<N>(std::forward<TP>(t))...);
+        return std::forward<F>(f)(amrex::get<N>(std::forward<TP>(t))...);
     }
 }
 
diff --git a/Src/Base/AMReX_TypeList.H b/Src/Base/AMReX_TypeList.H
index 8aab1184141..75201087b7d 100644
--- a/Src/Base/AMReX_TypeList.H
+++ b/Src/Base/AMReX_TypeList.H
@@ -34,13 +34,13 @@ using TypeAt = typename detail::TypeListGet<I,T>::type;
 namespace detail
 {
     template <typename TL, typename F, std::size_t...N>
-    constexpr void for_each_impl (F&&f, std::index_sequence<N...>)
+    constexpr void for_each_impl (F const&f, std::index_sequence<N...>)
     {
         (f(TypeAt<N,TL>{}), ...);
     }
 
     template <typename TL, typename F, std::size_t...N>
-    constexpr bool for_each_until_impl (F&&f, std::index_sequence<N...>)
+    constexpr bool for_each_until_impl (F const&f, std::index_sequence<N...>)
     {
         return (f(TypeAt<N,TL>{}) || ...);
     }
diff --git a/Src/Base/Parser/AMReX_Parser_Y.cpp b/Src/Base/Parser/AMReX_Parser_Y.cpp
index dc9caf9133d..5bd99d036be 100644
--- a/Src/Base/Parser/AMReX_Parser_Y.cpp
+++ b/Src/Base/Parser/AMReX_Parser_Y.cpp
@@ -527,7 +527,7 @@ namespace {
 
     template <typename F>
     bool group_combinables (struct parser_node*& a, struct parser_node*& b,
-                            F&& f, parser_node_t type)
+                            F const& f, parser_node_t type)
     {
         if (a->type == type && f(a->l, b))
         {
diff --git a/Src/EB/AMReX_EB2_IF_Intersection.H b/Src/EB/AMReX_EB2_IF_Intersection.H
index 4da055ba7ef..58b23d920f7 100644
--- a/Src/EB/AMReX_EB2_IF_Intersection.H
+++ b/Src/EB/AMReX_EB2_IF_Intersection.H
@@ -19,27 +19,27 @@ namespace IIF_detail {
     template <typename F>
     [[nodiscard]] inline Real do_min (const RealArray& p, F&& f) noexcept
     {
-        return f(p);
+        return std::forward<F>(f)(p);
     }
 
     template <typename F, typename... Fs>
     [[nodiscard]] inline Real do_min (const RealArray& p, F&& f, Fs&... fs) noexcept
     {
-        return amrex::min(f(p), do_min(p, std::forward<Fs>(fs)...));
+        return amrex::min(std::forward<F>(f)(p), do_min(p, std::forward<Fs>(fs)...));
     }
 
     template <typename F>
     [[nodiscard]] AMREX_GPU_HOST_DEVICE inline
     Real do_min (AMREX_D_DECL(Real x, Real y, Real z), F&& f) noexcept
     {
-        return f(AMREX_D_DECL(x,y,z));
+        return std::forward<F>(f)(AMREX_D_DECL(x,y,z));
     }
 
     template <typename F, typename... Fs>
     [[nodiscard]] AMREX_GPU_HOST_DEVICE inline
     Real do_min (AMREX_D_DECL(Real x, Real y, Real z), F&& f, Fs&... fs)
     {
-        return amrex::min(f(AMREX_D_DECL(x,y,z)), do_min(AMREX_D_DECL(x,y,z), std::forward<Fs>(fs)...));
+        return amrex::min(std::forward<F>(f)(AMREX_D_DECL(x,y,z)), do_min(AMREX_D_DECL(x,y,z), std::forward<Fs>(fs)...));
     }
 }
 
diff --git a/Src/EB/AMReX_EB2_IF_Union.H b/Src/EB/AMReX_EB2_IF_Union.H
index 0c8b38cb80d..5ed391760a4 100644
--- a/Src/EB/AMReX_EB2_IF_Union.H
+++ b/Src/EB/AMReX_EB2_IF_Union.H
@@ -19,27 +19,27 @@ namespace UIF_detail {
     template <typename F>
     [[nodiscard]] inline Real do_max (const RealArray& p, F&& f) noexcept
     {
-        return f(p);
+        return std::forward<F>(f)(p);
     }
 
     template <typename F, typename... Fs>
     [[nodiscard]] inline Real do_max (const RealArray& p, F&& f, Fs&... fs) noexcept
     {
-        return amrex::max(f(p), do_max(p, std::forward<Fs>(fs)...));
+        return amrex::max(std::forward<F>(f)(p), do_max(p, std::forward<Fs>(fs)...));
     }
 
     template <typename F>
     [[nodiscard]] AMREX_GPU_HOST_DEVICE inline
     Real do_max (AMREX_D_DECL(Real x, Real y, Real z), F&& f) noexcept
     {
-        return f(AMREX_D_DECL(x,y,z));
+        return std::forward<F>(f)(AMREX_D_DECL(x,y,z));
     }
 
     template <typename F, typename... Fs>
     [[nodiscard]] AMREX_GPU_HOST_DEVICE inline
     Real do_max (AMREX_D_DECL(Real x, Real y, Real z), F&& f, Fs&... fs) noexcept
     {
-        return amrex::max(f(AMREX_D_DECL(x,y,z)), do_max(AMREX_D_DECL(x,y,z), std::forward<Fs>(fs)...));
+        return amrex::max(std::forward<F>(f)(AMREX_D_DECL(x,y,z)), do_max(AMREX_D_DECL(x,y,z), std::forward<Fs>(fs)...));
     }
 }
 
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H
index e9176b94864..c23797f8f7b 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_2D_K.H
@@ -24,7 +24,7 @@ void mlebndfdlap_adotx_eb_doit (int i, int j, int k, Array4<Real> const& y,
                                 Array4<Real const> const& x, Array4<Real const> const& levset,
                                 Array4<int const> const& dmsk,
                                 Array4<Real const> const& ecx, Array4<Real const> const& ecy,
-                                F && xeb, Real bx, Real by) noexcept
+                                F const& xeb, Real bx, Real by) noexcept
 {
     if (dmsk(i,j,k)) {
         y(i,j,k) = Real(0.0);
@@ -199,7 +199,7 @@ void mlebndfdlap_adotx_rz_eb_doit (int i, int j, int k, Array4<Real> const& y,
                                    Array4<Real const> const& x, Array4<Real const> const& levset,
                                    Array4<int const> const& dmsk,
                                    Array4<Real const> const& ecx, Array4<Real const> const& ecy,
-                                   F && xeb, Real sigr, Real dr, Real dz, Real rlo, Real alpha) noexcept
+                                   F const& xeb, Real sigr, Real dr, Real dz, Real rlo, Real alpha) noexcept
 {
     Real const r = rlo + Real(i) * dr;
     if (dmsk(i,j,k) || (r == Real(0.0) && alpha != Real(0.0))) {
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H
index 8d00c5f1cbb..bebaa33bbc2 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_3D_K.H
@@ -26,7 +26,7 @@ void mlebndfdlap_adotx_eb_doit (int i, int j, int k, Array4<Real> const& y,
                                 Array4<Real const> const& x, Array4<Real const> const& levset,
                                 Array4<int const> const& dmsk,
                                 Array4<Real const> const& ecx, Array4<Real const> const& ecy,
-                                Array4<Real const> const& ecz, F && xeb,
+                                Array4<Real const> const& ecz, F const& xeb,
                                 Real bx, Real by, Real bz) noexcept
 {
     if (dmsk(i,j,k)) {
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H
index 1d081760389..d389af59d18 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLap_K.H
@@ -19,7 +19,7 @@ template <typename F>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlebndfdlap_grad_x_doit (int i, int j, int k, Array4<Real> const& px,
                               Array4<Real const> const& p, Array4<int const> const& dmsk,
-                              Array4<Real const> const& ecx, F&& phieb, Real dxi)
+                              Array4<Real const> const& ecx, F const& phieb, Real dxi)
 {
     if (dmsk(i,j,k) >= 0 && dmsk(i+1,j,k) >= 0) {
         px(i,j,k) = dxi * (p(i+1,j,k) - p(i,j,k));
@@ -36,7 +36,7 @@ template <typename F>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlebndfdlap_grad_y_doit (int i, int j, int k, Array4<Real> const& py,
                               Array4<Real const> const& p, Array4<int const> const& dmsk,
-                              Array4<Real const> const& ecy, F&& phieb, Real dyi)
+                              Array4<Real const> const& ecy, F const& phieb, Real dyi)
 {
     if (dmsk(i,j,k) >= 0 && dmsk(i,j+1,k) >= 0) {
         py(i,j,k) = dyi * (p(i,j+1,k) - p(i,j,k));
@@ -54,7 +54,7 @@ template <typename F>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void mlebndfdlap_grad_z_doit (int i, int j, int k, Array4<Real> const& pz,
                               Array4<Real const> const& p, Array4<int const> const& dmsk,
-                              Array4<Real const> const& ecz, F&& phieb, Real dzi)
+                              Array4<Real const> const& ecz, F const& phieb, Real dzi)
 {
     if (dmsk(i,j,k) >= 0 && dmsk(i,j,k+1) >= 0) {
         pz(i,j,k) = dzi * (p(i,j,k+1) - p(i,j,k));
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H
index d63267ea89a..6e7559d21d9 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLEBNodeFDLaplacian.H
@@ -61,7 +61,7 @@ public:
     //
     template <typename F>
     std::enable_if_t<IsCallableR<Real,F,AMREX_D_DECL(Real,Real,Real)>::value>
-    setEBDirichlet (F&& f);
+    setEBDirichlet (F const& f);
 
     void define (const Vector<Geometry>& a_geom,
                  const Vector<BoxArray>& a_grids,
@@ -126,7 +126,7 @@ private:
 
 template <typename F>
 std::enable_if_t<IsCallableR<Real,F,AMREX_D_DECL(Real,Real,Real)>::value>
-MLEBNodeFDLaplacian::setEBDirichlet (F&& f)
+MLEBNodeFDLaplacian::setEBDirichlet (F const& f)
 {
     m_phi_eb.resize(m_num_amr_levels);
     for (int amrlev = 0; amrlev < m_num_amr_levels; ++amrlev) {
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H
index 32c75224e78..b251de33b7a 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_2D_K.H
@@ -1376,7 +1376,7 @@ void mlndlap_crse_resid (int i, int j, int k, Array4<Real> const& resid,
 namespace {
     template <typename P, typename S>
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    Real mlndlap_sum_Ax (P && pred, S && sig, int i, int j, Real facx, Real facy,
+    Real mlndlap_sum_Ax (P const& pred, S const& sig, int i, int j, Real facx, Real facy,
                          Array4<Real const> const& phi, bool is_rz) noexcept
     {
         Real Ax = Real(0.0);
@@ -1418,7 +1418,7 @@ namespace {
 
     template <int rr, typename S>
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    void mlndlap_Ax_fine_contrib_doit (S&& sig, int i, int j, Box const& ndbx, Box const& ccbx,
+    void mlndlap_Ax_fine_contrib_doit (S const& sig, int i, int j, Box const& ndbx, Box const& ccbx,
                                        Array4<Real> const& f, Array4<Real const> const& res,
                                        Array4<Real const> const& rhs,
                                        Array4<Real const> const& phi,
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H
index da16ff58b13..2824b26f894 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLNodeLap_3D_K.H
@@ -2482,7 +2482,7 @@ void mlndlap_crse_resid (int i, int j, int k, Array4<Real> const& resid,
 namespace {
     template <typename P, typename S>
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    Real mlndlap_sum_Ax (P && pred, S && sig,
+    Real mlndlap_sum_Ax (P const& pred, S const& sig,
                          int i, int j, int k, Real facx, Real facy, Real facz,
                          Array4<Real const> const& phi) noexcept
     {
@@ -2604,7 +2604,7 @@ namespace {
 
     template <int rr, typename S>
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    void mlndlap_Ax_fine_contrib_doit (S&& sig,
+    void mlndlap_Ax_fine_contrib_doit (S const& sig,
                                        int i, int j, int k, Box const& ndbx, Box const& ccbx,
                                        Array4<Real> const& f, Array4<Real const> const& res,
                                        Array4<Real const> const& rhs,
diff --git a/Src/Particle/AMReX_DenseBins.H b/Src/Particle/AMReX_DenseBins.H
index 3bdd398fee9..f55217ed2b9 100644
--- a/Src/Particle/AMReX_DenseBins.H
+++ b/Src/Particle/AMReX_DenseBins.H
@@ -182,7 +182,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F&& f)
+    void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
     {
         const auto lo = lbound(bx);
         const auto hi = ubound(bx);
@@ -223,7 +223,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F&& f)
+    void build (BinPolicy::GPUBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
     {
         BL_PROFILE("DenseBins<T>::buildGPU");
 
@@ -286,7 +286,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F&& f)
+    void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
     {
         const auto lo = lbound(bx);
         const auto hi = ubound(bx);
@@ -328,7 +328,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F&& f)
+    void build (BinPolicy::OpenMPBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
     {
         BL_PROFILE("DenseBins<T>::buildOpenMP");
 
@@ -425,7 +425,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F&& f)
+    void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, const Box& bx, F const& f)
     {
         const auto lo = lbound(bx);
         const auto hi = ubound(bx);
@@ -467,7 +467,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F&& f)
+    void build (BinPolicy::SerialBinPolicy, N nitems, const_pointer_input_type v, int nbins, F const& f)
     {
         BL_PROFILE("DenseBins<T>::buildSerial");
 
diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H
index ca222f64187..b627e65ca5f 100644
--- a/Src/Particle/AMReX_ParticleCommunication.H
+++ b/Src/Particle/AMReX_ParticleCommunication.H
@@ -395,7 +395,7 @@ void packBuffer (const PC& pc, const ParticleCopyOp& op, const ParticleCopyPlan&
 
 template <class PC, class Buffer, class UnpackPolicy,
           std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
-void unpackBuffer (PC& pc, const ParticleCopyPlan& plan, const Buffer& snd_buffer, const UnpackPolicy&& policy)
+void unpackBuffer (PC& pc, const ParticleCopyPlan& plan, const Buffer& snd_buffer, UnpackPolicy const& policy)
 {
     BL_PROFILE("amrex::unpackBuffer");
 
@@ -555,7 +555,7 @@ void communicateParticlesFinish (const ParticleCopyPlan& plan);
 
 template <class PC, class Buffer, class UnpackPolicy,
           std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
-void unpackRemotes (PC& pc, const ParticleCopyPlan& plan, Buffer& rcv_buffer, UnpackPolicy&& policy)
+void unpackRemotes (PC& pc, const ParticleCopyPlan& plan, Buffer& rcv_buffer, UnpackPolicy const& policy)
 {
     BL_PROFILE("amrex::unpackRemotes");
 
diff --git a/Src/Particle/AMReX_ParticleContainer.H b/Src/Particle/AMReX_ParticleContainer.H
index 966ca0a8441..b2af8deaf56 100644
--- a/Src/Particle/AMReX_ParticleContainer.H
+++ b/Src/Particle/AMReX_ParticleContainer.H
@@ -711,7 +711,7 @@ public:
     template <class F, class PCType,
               std::enable_if_t<IsParticleContainer<PCType>::value, int> foo = 0,
               std::enable_if_t<! std::is_integral<F>::value, int> bar = 0>
-    void addParticles (const PCType& other, F&& f, bool local=false);
+    void addParticles (const PCType& other, F const& f, bool local=false);
 
     /**
     * \brief Write a contiguous chunk of real particle data to an ostream.
diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H
index fe83ce5ba1c..1de3edcef1a 100644
--- a/Src/Particle/AMReX_ParticleContainerI.H
+++ b/Src/Particle/AMReX_ParticleContainerI.H
@@ -1027,7 +1027,7 @@ template <class F, class PCType,
           std::enable_if_t<! std::is_integral<F>::value, int> bar>
 void
 ParticleContainer_impl<ParticleType, NArrayReal, NArrayInt, Allocator, CellAssignor>::
-addParticles (const PCType& other, F&& f, bool local)
+addParticles (const PCType& other, F const& f, bool local)
 {
     BL_PROFILE("ParticleContainer::addParticles");
 
diff --git a/Src/Particle/AMReX_ParticleLocator.H b/Src/Particle/AMReX_ParticleLocator.H
index 44bc4904c66..0a07c85553d 100644
--- a/Src/Particle/AMReX_ParticleLocator.H
+++ b/Src/Particle/AMReX_ParticleLocator.H
@@ -46,7 +46,7 @@ struct AssignGrid
 
     template <typename P, typename Assignor = DefaultAssignor>
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-    int operator() (const P& p, int nGrow=0, Assignor&& assignor = Assignor{}) const noexcept
+    int operator() (const P& p, int nGrow=0, Assignor const& assignor = Assignor{}) const noexcept
     {
         const auto iv = assignor(p, m_plo, m_dxi, m_domain);
         return this->operator()(iv, nGrow);
@@ -213,7 +213,7 @@ struct AmrAssignGrid
     template <typename P, typename Assignor = DefaultAssignor>
     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     GpuTuple<int, int> operator() (const P& p, int lev_min=-1, int lev_max=-1, int nGrow=0,
-                                   Assignor&& assignor = {}) const noexcept
+                                   Assignor const& assignor = {}) const noexcept
     {
         lev_min = (lev_min == -1) ? 0 : lev_min;
         lev_max = (lev_max == -1) ? m_size - 1 : lev_max;
diff --git a/Src/Particle/AMReX_ParticleMesh.H b/Src/Particle/AMReX_ParticleMesh.H
index 536804a7bcb..8c0d56da487 100644
--- a/Src/Particle/AMReX_ParticleMesh.H
+++ b/Src/Particle/AMReX_ParticleMesh.H
@@ -40,7 +40,7 @@ auto call_f (F const& f,
 
 template <class PC, class MF, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 void
-ParticleToMesh (PC const& pc, MF& mf, int lev, F&& f, bool zero_out_input=true)
+ParticleToMesh (PC const& pc, MF& mf, int lev, F const& f, bool zero_out_input=true)
 {
     BL_PROFILE("amrex::ParticleToMesh");
 
@@ -125,7 +125,7 @@ ParticleToMesh (PC const& pc, MF& mf, int lev, F&& f, bool zero_out_input=true)
 
 template <class PC, class MF, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 void
-MeshToParticle (PC& pc, MF const& mf, int lev, F&& f)
+MeshToParticle (PC& pc, MF const& mf, int lev, F const& f)
 {
     BL_PROFILE("amrex::MeshToParticle");
 
diff --git a/Src/Particle/AMReX_ParticleReduce.H b/Src/Particle/AMReX_ParticleReduce.H
index 9fafcd5de76..a05ebcba3ec 100644
--- a/Src/Particle/AMReX_ParticleReduce.H
+++ b/Src/Particle/AMReX_ParticleReduce.H
@@ -173,7 +173,7 @@ ReduceSum (PC const& pc, int lev, F&& f)
  */
 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 auto
-ReduceSum (PC const& pc, int lev_min, int lev_max, F&& f)
+ReduceSum (PC const& pc, int lev_min, int lev_max, F const& f)
     -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
 {
     using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()));
@@ -373,7 +373,7 @@ ReduceMax (PC const& pc, int lev, F&& f)
  */
 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 auto
-ReduceMax (PC const& pc, int lev_min, int lev_max, F&& f)
+ReduceMax (PC const& pc, int lev_min, int lev_max, F const& f)
     -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
 {
     using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()));
@@ -574,7 +574,7 @@ ReduceMin (PC const& pc, int lev, F&& f)
  */
 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 auto
-ReduceMin (PC const& pc, int lev_min, int lev_max, F&& f)
+ReduceMin (PC const& pc, int lev_min, int lev_max, F const& f)
     -> decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()))
 {
     using value_type = decltype(particle_detail::call_f(f, typename PC::ParticleTileType::ConstParticleTileDataType(), int()));
@@ -773,7 +773,7 @@ ReduceLogicalAnd (PC const& pc, int lev, F&& f)
  */
 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 bool
-ReduceLogicalAnd (PC const& pc, int lev_min, int lev_max, F&& f)
+ReduceLogicalAnd (PC const& pc, int lev_min, int lev_max, F const& f)
 {
     int r = true;
 
@@ -969,7 +969,7 @@ ReduceLogicalOr (PC const& pc, int lev, F&& f)
  */
 template <class PC, class F, std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 bool
-ReduceLogicalOr (PC const& pc, int lev_min, int lev_max, F&& f)
+ReduceLogicalOr (PC const& pc, int lev_min, int lev_max, F const& f)
 {
     int r = false;
 
@@ -1235,7 +1235,7 @@ ParticleReduce (PC const& pc, int lev, F&& f, ReduceOps& reduce_ops)
 template <class RD, class PC, class F, class ReduceOps,
           std::enable_if_t<IsParticleContainer<PC>::value, int> foo = 0>
 typename RD::Type
-ParticleReduce (PC const& pc, int lev_min, int lev_max, F&& f, ReduceOps& reduce_ops)
+ParticleReduce (PC const& pc, int lev_min, int lev_max, F const& f, ReduceOps& reduce_ops)
 {
     RD reduce_data(reduce_ops);
     for (int lev = lev_min; lev <= lev_max; ++lev) {
diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H
index b55f5211eee..41d46957995 100644
--- a/Src/Particle/AMReX_ParticleTransformation.H
+++ b/Src/Particle/AMReX_ParticleTransformation.H
@@ -234,7 +234,7 @@ void transformParticles (DstTile& dst, const SrcTile& src, F&& f) noexcept
 template <typename DstTile, typename SrcTile, typename Index, typename N, typename F,
           std::enable_if_t<std::is_integral<Index>::value, int> foo = 0>
 void transformParticles (DstTile& dst, const SrcTile& src,
-                         Index src_start, Index dst_start, N n, F&& f) noexcept
+                         Index src_start, Index dst_start, N n, F const& f) noexcept
 {
     const auto src_data = src.getConstParticleTileData();
           auto dst_data = dst.getParticleTileData();
@@ -294,7 +294,7 @@ template <typename DstTile1, typename DstTile2, typename SrcTile,
           typename Index, typename N, typename F,
           std::enable_if_t<std::is_integral<Index>::value, int> foo = 0>
 void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src,
-                         Index src_start, Index dst1_start, Index dst2_start, N n, F&& f) noexcept
+                         Index src_start, Index dst1_start, Index dst2_start, N n, F const& f) noexcept
 {
     const auto src_data  = src.getConstParticleTileData();
           auto dst1_data = dst1.getParticleTileData();
@@ -382,10 +382,11 @@ Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask,
  * \param p predicate function - particles will be copied if p returns true
  *
  */
-template <typename DstTile, typename SrcTile, typename Pred>
+template <typename DstTile, typename SrcTile, typename Pred,
+          std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,int> foo = 0>
 int filterParticles (DstTile& dst, const SrcTile& src, Pred&& p) noexcept
 {
-    return filterParticles(dst, src, p, 0, 0, src.numParticles());
+    return filterParticles(dst, src, std::forward<Pred>(p), 0, 0, src.numParticles());
 }
 
 /**
@@ -407,7 +408,7 @@ int filterParticles (DstTile& dst, const SrcTile& src, Pred&& p) noexcept
  */
 template <typename DstTile, typename SrcTile, typename Pred, typename Index, typename N,
           std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,Index> nvccfoo = 0>
-Index filterParticles (DstTile& dst, const SrcTile& src, Pred&& p,
+Index filterParticles (DstTile& dst, const SrcTile& src, Pred const& p,
                        Index src_start, Index dst_start, N n) noexcept
 {
     Gpu::DeviceVector<Index> mask(n);
@@ -445,7 +446,7 @@ Index filterParticles (DstTile& dst, const SrcTile& src, Pred&& p,
  */
 template <typename DstTile, typename SrcTile, typename Index, typename F,
           std::enable_if_t<std::is_integral<Index>::value, int> foo = 0>
-Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F&& f,
+Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask, F const& f,
                                    Index src_start, Index dst_start) noexcept
 {
     auto np = src.numParticles();
@@ -510,7 +511,8 @@ Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Index* mask
  * \param f defines the transformation that will be applied to the particles on copy
  *
  */
-template <typename DstTile, typename SrcTile, typename Pred, typename F>
+template <typename DstTile, typename SrcTile, typename Pred, typename F,
+          std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,int> foo = 0>
 int filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f) noexcept
 {
     return filterAndTransformParticles(dst, src, std::forward<Pred>(p), std::forward<F>(f), 0, 0);
@@ -536,7 +538,7 @@ int filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&&
 template <typename DstTile1, typename DstTile2, typename SrcTile, typename Index, typename F,
           std::enable_if_t<std::is_integral<Index>::value, int> foo = 0>
 Index filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2,
-                                   const SrcTile& src, Index* mask, F&& f) noexcept
+                                   const SrcTile& src, Index* mask, F const& f) noexcept
 {
     auto np = src.numParticles();
     Gpu::DeviceVector<Index> offsets(np);
@@ -578,9 +580,10 @@ Index filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2,
  * \param f defines the transformation that will be applied to the particles on copy
  *
  */
-template <typename DstTile1, typename DstTile2, typename SrcTile, typename Pred, typename F>
+template <typename DstTile1, typename DstTile2, typename SrcTile, typename Pred, typename F,
+          std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>, int> foo = 0>
 int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src,
-                                 Pred&& p, F&& f) noexcept
+                                 Pred const& p, F&& f) noexcept
 {
     auto np = src.numParticles();
     Gpu::DeviceVector<int> mask(np);
@@ -620,7 +623,7 @@ int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile&
  */
 template <typename DstTile, typename SrcTile, typename Pred, typename F, typename Index,
           std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,Index> nvccfoo = 0>
-Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f,
+Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred const& p, F&& f,
                                    Index src_start, Index dst_start) noexcept
 {
     auto np = src.numParticles();
diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index 037fb48dcf6..0d6954d61f5 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -470,7 +470,7 @@ bool enforcePeriodic (P& p,
  */
 template <typename PTile, typename ParFunc>
 int
-partitionParticles (PTile& ptile, ParFunc&& is_left)
+partitionParticles (PTile& ptile, ParFunc const& is_left)
 {
     const int np = ptile.numParticles();
     if (np == 0) { return 0; }
@@ -585,7 +585,7 @@ removeInvalidParticles (PTile& ptile)
 
 template <typename PTile, typename PLocator, typename CellAssignor>
 int
-partitionParticlesByDest (PTile& ptile, const PLocator& ploc, CellAssignor&& assignor,
+partitionParticlesByDest (PTile& ptile, const PLocator& ploc, CellAssignor const& assignor,
                           const ParticleBufferMap& pmap,
                           const GpuArray<Real,AMREX_SPACEDIM>& plo,
                           const GpuArray<Real,AMREX_SPACEDIM>& phi,
@@ -693,7 +693,7 @@ void clearEmptyEntries (C& c)
 
 template <class index_type, typename F>
 void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type nitems,
-                               index_type nbins, F&& f)
+                               index_type nbins, F const& f)
 {
     BL_PROFILE("PermutationForDeposition()");
 
diff --git a/Src/Particle/AMReX_SparseBins.H b/Src/Particle/AMReX_SparseBins.H
index 6ec80a0a831..a34f77ed332 100644
--- a/Src/Particle/AMReX_SparseBins.H
+++ b/Src/Particle/AMReX_SparseBins.H
@@ -121,7 +121,7 @@ public:
      * \param f a function object that maps items to bins
      */
     template <typename N, typename F>
-    void build (N nitems, const_pointer_input_type v, const Box& bx, F&& f)
+    void build (N nitems, const_pointer_input_type v, const Box& bx, F const& f)
     {
         BL_PROFILE("SparseBins<T>::build");
 
diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H
index 7c31dff8ef3..3fc658bcff8 100644
--- a/Src/Particle/AMReX_WriteBinaryParticleData.H
+++ b/Src/Particle/AMReX_WriteBinaryParticleData.H
@@ -31,7 +31,7 @@ template <template <class, class> class Container,
           class PTile,
           class F>
 typename std::enable_if<RunOnGpu<typename Container<int, Allocator>::allocator_type>::value>::type
-fillFlags (Container<int, Allocator>& pflags, const PTile& ptile, F&& f)
+fillFlags (Container<int, Allocator>& pflags, const PTile& ptile, F const& f)
 {
     const auto ptd = ptile.getConstParticleTileData();
     const auto np = ptile.numParticles();
@@ -55,7 +55,7 @@ template <template <class, class> class Container,
           class PTile,
           class F>
 typename std::enable_if<!RunOnGpu<typename Container<int, Allocator>::allocator_type>::value>::type
-fillFlags (Container<int, Allocator>& pflags, const PTile& ptile, F&& f)
+fillFlags (Container<int, Allocator>& pflags, const PTile& ptile, F const& f)
 {
     const auto ptd = ptile.getConstParticleTileData();
     const auto np = ptile.numParticles();
@@ -394,7 +394,7 @@ void WriteBinaryParticleDataSync (PC const& pc,
                                   const Vector<int>& write_int_comp,
                                   const Vector<std::string>& real_comp_names,
                                   const Vector<std::string>& int_comp_names,
-                                  F&& f, bool is_checkpoint)
+                                  F const& f, bool is_checkpoint)
 {
     BL_PROFILE("WriteBinaryParticleData()");
     AMREX_ASSERT(pc.OK());