Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Frontier Rocm 5.6 build #322

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions build-aux/frontier_rocm56_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@

#export FFTW_PATH=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/fftw-3.3.10-tajdtzkealhold4bmpuq7wiwzurnclr4
#export MAGMA_ROOT=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/magma-2.7.2-gbjcrprqdw7y5uplm5upmqbi65zqwubb
#export OPENBLAS_ROOT=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/openblas-0.3.25-t62dxdtaqba6lzrwoy4uddswlprgma6n
#export HDF5_ROOT=/lustre/orion/world-shared/cph102/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/hdf5-1.14.3-3so3g5x2roywum3edvjun7jbhwisei6p
#export CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}:/opt/rocm-5.6.0/hip/bin:${HDF5_ROOT}
#export PATH=/sw/frontier/spack-envs/base/opt/linux-sles15-x86_64/gcc-7.5.0/cmake-3.23.2-4r4mpiba7cwdw2hlakh5i7tchi64s3qd/bin:${PATH}

export CMAKE_PREFIX_PATH=/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/fftw-3.3.10-tajdtzkealhold4bmpuq7wiwzurnclr4:/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/hdf5-1.14.3-3so3g5x2roywum3edvjun7jbhwisei6p:/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/magma-2.7.2-n5sjmunbzqcm5d4rsp2mrkxxvxd6lnfd:/lustre/orion/cph102/proj-shared/epd/spack/opt/spack/linux-sles15-zen3/gcc-12.2.0/openblas-0.3.25-t62dxdtaqba6lzrwoy4uddswlprgma6n

cmake -DDCA_WITH_CUDA=off -DDCA_WITH_HIP=ON -DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \
-DROCM_ROOT=${ROCM_PATH} \
-DDCA_WITH_TESTS_FAST=ON \
-DTEST_RUNNER="srun" \
-DGPU_TARGETS=gfx90a \
-DAMDGPU_TARGETS=gfx90a \
-DCMAKE_HIP_COMPILER=/opt/rocm-5.6.0/llvm/bin/clang++ \
-DDCA_FIX_BROKEN_MPICH=1 \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpic++ \
-GNinja ..
22 changes: 22 additions & 0 deletions build-aux/frontier_rocm56_load_modules.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
#
# Loads all modules that are required to build DCA++ on ORNL's Frontier.
# A reset is done at the beginning to restore to the default programming environment on Frontier.
# This is for development only at this point.
#
# Usage: source frontier_rocm56_load_modules.sh

module reset
module load gcc/12.2.0
module load rocm/5.6.0
module load cmake
module load ninja

# After 2 weeks of digging through opaque linking and runtime errors,
# I have concluded that cray-libsci causes such a mess
# that it's much easier to compile your own openblas
# and magma rather than fuss with it. I did the latter in 1 day.
module unload cray-libsci

export CC=mpicc
export CXX=mpicxx
47 changes: 27 additions & 20 deletions include/dca/linalg/matrixop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,24 +208,23 @@ auto difference(const Matrix<Scalar, CPU, ALLOC>& a, const Matrix<Scalar, CPU, A
}

if (max_diff > diff_threshold) {
#ifndef NDEBUG
std::stringstream s;
for (int i = 0; i < a.nrRows(); ++i) {
for (int j = 0; j < a.nrCols(); ++j) {
if (std::abs(a(i, j) - b(i, j)) <= diff_threshold)
s << 0. << "\t";
else
s << a(i, j) - b(i, j) << "\t";
}
s << "\n";
}
s << std::endl;
std::cout << s.str();
#endif // NDEBUG
std::cerr << "matrix difference in excess of threshold!\n";
std::stringstream estr;
estr << "matrix difference in excess of threshold!\n";
// #ifndef NDEBUG
// for (int i = 0; i < a.nrRows(); ++i) {
// for (int j = 0; j < a.nrCols(); ++j) {
// if (std::abs(a(i, j) - b(i, j)) <= diff_threshold)
// s << 0. << "\t";
// else
// s << a(i, j) - b(i, j) << "\t";
// }
// s << "\n";
// }
// s << std::endl;
// std::cout << s.str();
// #endif // NDEBUG
throw std::logic_error(__FUNCTION__);
}

return max_diff;
}
template <typename Scalar, DeviceType device_name, class ALLOC>
Expand Down Expand Up @@ -295,21 +294,29 @@ void insertRow(Matrix<Scalar, CPU, ALLOC>& mat, int i) {
// Preconditions: mat is a square matrix.
// Postconditions: ipiv and work are resized to the needed dimension.
// \todo consider doing inverse at full precision reguardless of incoming Scalar precision
template <typename Scalar, DeviceType device_name, class ALLOC, template <typename, DeviceType, class> class MatrixType>
void inverse(MatrixType<Scalar, device_name, ALLOC>& mat, Vector<int, CPU>& ipiv,
template <typename Scalar, DeviceType device_name, class ALLOC, template <typename, DeviceType, class> class MatrixType>
void inverse(MatrixType<Scalar, device_name, ALLOC>& mat, Vector<int, CPU>& ipiv,
Vector<Scalar, device_name>& work) {
assert(mat.is_square());

ipiv.resizeNoCopy(mat.nrRows());

// This pivot vector has long been a host side vector which seems wrong,
// but this had no apparent effect on the frontier segfault issue
Vector<int,device_name> device_ipiv;
device_ipiv = ipiv;
device_ipiv.resizeNoCopy(mat.nrRows());

lapack::UseDevice<device_name>::getrf(mat.nrRows(), mat.nrCols(), mat.ptr(),
mat.leadingDimension(), ipiv.ptr());
mat.leadingDimension(), device_ipiv.ptr());
// Get optimal worksize.
int lwork = util::getInverseWorkSize(mat);
work.resizeNoCopy(lwork);

lapack::UseDevice<device_name>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), ipiv.ptr(),
lapack::UseDevice<device_name>::getri(mat.nrRows(), mat.ptr(), mat.leadingDimension(), device_ipiv.ptr(),
work.ptr(), lwork);

ipiv = device_ipiv;
}

template <typename Scalar, DeviceType device_name, class ALLOC, template <typename, DeviceType, class> class MatrixType>
Expand Down
Loading