From 30500ea85637e9bcc235449c04bbc9ac8fdb3da1 Mon Sep 17 00:00:00 2001 From: James Osborn Date: Tue, 17 Dec 2024 23:00:47 -0600 Subject: [PATCH] fix HISQ force make argument order consistent for overloads of fat7lDeriv --- src/examples/hisq_force.nim | 180 ++++++++++++++++++++++++++++++++++++ src/gauge/fat7lderiv.nim | 12 +-- src/gauge/hisqsmear.nim | 18 ++-- 3 files changed, 195 insertions(+), 15 deletions(-) create mode 100644 src/examples/hisq_force.nim diff --git a/src/examples/hisq_force.nim b/src/examples/hisq_force.nim new file mode 100644 index 0000000..49dfc6d --- /dev/null +++ b/src/examples/hisq_force.nim @@ -0,0 +1,180 @@ +# C.T. Peterson: force test inspired from conversation with Peter Boyle +# See Grid implementation here: +# -https://github.com/paboyle/Grid/blob/develop/tests/forces/Test_bdy.cc +import qex +import gauge/[hisqsmear] +import physics/[stagD,stagSolve] + +qexInit() + +defaultSetup() +var + sg = lo.newGauge() + sgl = lo.newGauge() + f = lo.newGauge() + ff = lo.newGauge() + p = lo.newGauge() + phi = lo.ColorVector() + psi = lo.ColorVector() + r = lo.newRNGField(RngMilc6,123456789) + mass = 0.1 + eps = 0.001 + spa = initSolverParams() + spf = initSolverParams() + info: PerfInfo +let + hisq = newHisq() + stag = newStag3(sg,sgl) + arsq = 1e-20 + frsq = 1e-12 + +spa.r2req = arsq +spa.maxits = 10000 +spf.r2req = frsq +spf.maxits = 10000 +spf.verbosity = 1 + +# -- Generic + +proc smearRephase(g: auto, sg,sgl: auto): auto {.discardable.} = + tic() + let smearedForce = hisq.smearGetForce(g,sg,sgl) + threads: + sg.setBC; sgl.setBC; + threadBarrier() + sg.stagPhase; sgl.stagPhase; + smearedForce + +proc reTrMul(x,y:auto):auto = + var d: type(eval(toDouble(redot(x[0],y[0])))) + for ir in x: d += redot(x[ir].adj, y[ir]) + result = simdSum(d) + x.l.threadRankSum(result) + +# -- Action calculation + +proc action(): float = + var s: float + stag.solve(psi, phi, -mass, spa) + threads: + var st = psi.norm2 + threadMaster: s = st + result = 0.5*s + +# -- Force calculation & momentum update + +proc smearedOneAndThreeLinkForce(f: auto, smearedForce: proc, p: auto, g:auto) = + # reverse accumulation of the derivative + # 1. Dslash + var + f1 = f.newOneOf() + f3 = f.newOneOf() + ff = f.newOneOf() + t,t3: array[4,Shifter[typeof(p),typeof(p[0])]] + for mu in 0..