CompFUSE · PDoakORNL · Jan 29, 2024 · Jan 29, 2024 · Jan 29, 2024 · Jan 29, 2024
diff --git a/build-aux/frontier_rocm56_build.sh b/build-aux/frontier_rocm56_build.sh
@@ -0,0 +1,21 @@
+
+#export FFTW_PATH=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/fftw-3.3.10-tajdtzkealhold4bmpuq7wiwzurnclr4
+#export MAGMA_ROOT=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/magma-2.7.2-gbjcrprqdw7y5uplm5upmqbi65zqwubb
+#export OPENBLAS_ROOT=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/openblas-0.3.25-t62dxdtaqba6lzrwoy4uddswlprgma6n
+#export HDF5_ROOT=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/hdf5-1.14.3-3so3g5x2roywum3edvjun7jbhwisei6p
+#export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:/opt/rocm-5.6.0/hip/bin:${HDF5_ROOT}
+#export PATH=/sw/frontier/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/cmake-3.23.2-4r4mpiba7cwdw2hlakh5i7tchi64s3qd/bin:${PATH}
+
+export CMAKE_PREFIX_PATH=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/fftw-3.3.10-tajdtzkealhold4bmpuq7wiwzurnclr4:/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/hdf5-1.14.3-3so3g5x2roywum3edvjun7jbhwisei6p:/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/magma-2.7.2-n5sjmunbzqcm5d4rsp2mrkxxvxd6lnfd:/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/openblas-0.3.25-t62dxdtaqba6lzrwoy4uddswlprgma6n
+
+cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \
+      -DROCM_ROOT=${ROCM_PATH} \
+      -DDCA_WITH_TESTS_FAST=ON \
+      -DTEST_RUNNER="srun" \
+      -DGPU_TARGETS=gfx90a \
+      -DAMDGPU_TARGETS=gfx90a \
+      -DCMAKE_HIP_COMPILER=/opt/rocm-5.6.0/llvm/bin/clang++ \
+      -DDCA_FIX_BROKEN_MPICH=1 \
+      -DCMAKE_C_COMPILER=mpicc \
+      -DCMAKE_CXX_COMPILER=mpic++ \
+      -GNinja ..
diff --git a/build-aux/frontier_rocm56_load_modules.sh b/build-aux/frontier_rocm56_load_modules.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Loads all modules that are required to build DCA++ on ORNL's Frontier.
+# A reset is done at the beginning to restore to the default programming environment on Frontier.
+# This is for development only at this point.
+#
+# Usage: source frontier_rocm56_load_modules.sh
+
+module reset
+module load gcc/12.2.0
+module load rocm/5.6.0
+module load cmake
+module load ninja
+
+# After 2 weeks of digging through opaque linking and runtime errors,
+# I have concluded that cray-libsci causes such a mess
+# that it's much easier to compile your own openblas
+# and magma rather than fuss with it.  I did the latter in 1 day.
+module unload cray-libsci
+
+export CC=mpicc
+export CXX=mpicxx
diff --git a/include/dca/linalg/matrixop.hpp b/include/dca/linalg/matrixop.hpp
@@ -208,24 +208,23 @@ auto difference(const Matrix<Scalar, CPU, ALLOC>& a, const Matrix<Scalar, CPU, A
   }
 
   if (max_diff > diff_threshold) {
-#ifndef NDEBUG
-    std::stringstream s;
-    for (int i = 0; i < a.nrRows(); ++i) {
-      for (int j = 0; j < a.nrCols(); ++j) {
-        if (std::abs(a(i, j) - b(i, j)) <= diff_threshold)
-          s << 0. << "\t";
-        else
-          s << a(i, j) - b(i, j) << "\t";
-      }
-      s << "\n";
-    }
-    s << std::endl;
-    std::cout << s.str();
-#endif  // NDEBUG
-    std::cerr << "matrix difference in excess of threshold!\n";
+    std::stringstream estr;
+    estr << "matrix difference in excess of threshold!\n";
+    // #ifndef NDEBUG
+    //     for (int i = 0; i < a.nrRows(); ++i) {
+    //       for (int j = 0; j < a.nrCols(); ++j) {
+    //         if (std::abs(a(i, j) - b(i, j)) <= diff_threshold)
+    //           s << 0. << "\t";
+    //         else
+    //           s << a(i, j) - b(i, j) << "\t";
+    //       }
+    //       s << "\n";
+    //     }
+    //     s << std::endl;
+    //     std::cout << s.str();
+    // #endif  // NDEBUG
     throw std::logic_error(__FUNCTION__);
   }
-
   return max_diff;
 }
   template <typename Scalar, DeviceType device_name, class ALLOC>
@@ -295,21 +294,29 @@ void insertRow(Matrix<Scalar, CPU, ALLOC>& mat, int i) {
 // Preconditions: mat is a square matrix.
 // Postconditions: ipiv and work are resized to the needed dimension.
 // \todo consider doing inverse at full precision reguardless of incoming Scalar precision
-  template <typename Scalar, DeviceType device_name, class ALLOC, template <typename, DeviceType, class> class MatrixType>
-  void inverse(MatrixType<Scalar, device_name, ALLOC>& mat, Vector<int, CPU>& ipiv,
+template <typename Scalar, DeviceType device_name, class ALLOC, template <typename, DeviceType, class> class MatrixType>
+void inverse(MatrixType<Scalar, device_name, ALLOC>& mat, Vector<int, CPU>& ipiv,
              Vector<Scalar, device_name>& work) {
   assert(mat.is_square());
 
   ipiv.resizeNoCopy(mat.nrRows());
 
+  // This pivot vector has long been a host side vector which seems wrong,
+  // but this had no apparent effect on the frontier segfault issue
+  Vector<int,device_name> device_ipiv;
+  device_ipiv = ipiv;
+  device_ipiv.resizeNoCopy(mat.nrRows());
+
   lapack::UseDevice<device_name>::getrf(mat.nrRows(), mat.nrCols(), mat.ptr(),
-                                        mat.leadingDimension(), ipiv.ptr());
+                                        mat.leadingDimension(), device_ipiv.ptr());
   // Get optimal worksize.
   int lwork = util::getInverseWorkSize(mat);
   work.resizeNoCopy(lwork);
 
-  lapack::UseDevice<device_name>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), ipiv.ptr(),
+  lapack::UseDevice<device_name>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), device_ipiv.ptr(),
                                         work.ptr(), lwork);
+
+  ipiv = device_ipiv;
 }
 
   template <typename Scalar, DeviceType device_name, class ALLOC, template <typename, DeviceType, class> class MatrixType>