From 0286e96799527255ae6d3af1bee454ffcb77d66e Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Fri, 19 Apr 2024 14:58:48 -0700 Subject: [PATCH] Attempt to workaround gpu issues in axisymmetric Tomboulides The u_next_rad_comp_gf_ ParGridFunction seems to be causing problems on some gpu systems. This is intended to alias the first (i.e. radial) component of the velocity grid function. It is constructed via u_next_rad_comp_gf_ = new ParGridFunction(pfes_, *u_next_gf_); But, this leads to failures like the following: Verification failed: (IsHostMemory(h_mt)) is false in mfem::MFEM_VERIFY_TYPES, which is called (eventually) from Vector::HostRead. Initially I thought this had to do with a failure to properly call Vector::SyncMemory, since the base memory (in u_next_gf_) is being modified. But, an attempt to use this had no impact on the problem. So, in this workaround, I eliminate the alias and simply copy the first component into a new grid function. This seems to work, but it would be nice to understand the problem with the original approach, b/c clearly we'd rather not copy when it shouldn't be necessary. --- src/tomboulides.cpp | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/tomboulides.cpp b/src/tomboulides.cpp index b33b538a8..f6dd96298 100644 --- a/src/tomboulides.cpp +++ b/src/tomboulides.cpp @@ -253,10 +253,13 @@ void Tomboulides::initializeSelf() { mu_total_gf_ = new ParGridFunction(pfes_); - pp_div_rad_comp_gf_ = new ParGridFunction(pfes_, *pp_div_gf_); - u_next_rad_comp_gf_ = new ParGridFunction(pfes_, *u_next_gf_); + // pp_div_rad_comp_gf_ = new ParGridFunction(pfes_, *pp_div_gf_); + // u_next_rad_comp_gf_ = new ParGridFunction(pfes_, *u_next_gf_); if (axisym_) { + pp_div_rad_comp_gf_ = new ParGridFunction(pfes_); + u_next_rad_comp_gf_ = new ParGridFunction(pfes_); + utheta_gf_ = new ParGridFunction(pfes_); utheta_next_gf_ = new ParGridFunction(pfes_); } @@ -1365,7 +1368,19 @@ void Tomboulides::step() { // Add axisymmetric "forcing" term to rhs if (axisym_) { pp_div_gf_->HostRead(); + { + // Copy radial components of pp_div_gf_. We should be able to + // just make pp_div_rad_comp_gf_ wrap pp_div_gf_, but that is + // causing problems (that aren't understood) on some gpu + // systems. As a workaround, we copy instead. + auto d_pp_div_rad = pp_div_rad_comp_gf_->Write(); + auto d_pp_div = pp_div_gf_->Read(); + MFEM_FORALL(i, pp_div_rad_comp_gf_->Size(), { + d_pp_div_rad[i] = d_pp_div[i]; + }); + } pp_div_rad_comp_gf_->HostRead(); + Faxi_poisson_form_->Update(); Faxi_poisson_form_->Assemble(); Faxi_poisson_form_->ParallelAssemble(Faxi_poisson_vec_); @@ -1469,6 +1484,18 @@ void Tomboulides::step() { evaluateVelocityGradient(); if (axisym_) { + u_next_gf_->HostRead(); + { + // Copy radial components of u_next_gf_. Same comments here + // about pp_div_rad_comp_gf_ above. + auto d_u_next_rad = u_next_rad_comp_gf_->Write(); + auto d_u_next = u_next_gf_->Read(); + MFEM_FORALL(i, u_next_rad_comp_gf_->Size(), { + d_u_next_rad[i] = d_u_next[i]; + }); + } + u_next_rad_comp_gf_->HostRead(); + // Update the Helmholtz operator and inverse Hv_bdfcoeff_.constant = coeff_.bd0 / dt; Hs_form_->Update();