Skip to content

Commit

Permalink
Merge branch 'ICLDisco:master' into update/parsec623
Browse files Browse the repository at this point in the history
  • Loading branch information
abouteiller authored Oct 24, 2024
2 parents cdc8984 + 4ad5c19 commit 092e0e8
Show file tree
Hide file tree
Showing 56 changed files with 1,710 additions and 516 deletions.
26 changes: 24 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ set(DPLASMA_VERSION "${DPLASMA_VERSION_MAJOR}.${DPLASMA_VERSION_MINOR}")

############################################################################
# CMake Policies Tuning
if(POLICY CMP0144)
# CMP0144: find_package uses upper-case <PACKAGENAME>_ROOT variables in addition to <PackageName>_ROOT
cmake_policy(SET CMP0144 NEW)
endif(POLICY CMP0144)
set(CMAKE_NO_SYSTEM_FROM_IMPORTED True)

############################################################################
Expand Down Expand Up @@ -231,12 +235,30 @@ endif(NOT TARGET PaRSEC::parsec AND NOT TARGET PaRSEC::parsec_ptgpp)

############################################################################
# Resume configuring dplasma
option(DPLASMA_HAVE_CUDA "Use CUDA to accelerate DPLASMA routines" ${PARSEC_HAVE_CUDA})
if(DPLASMA_HAVE_CUDA)
option(DPLASMA_GPU_WITH_CUDA "Use CUDA to accelerate DPLASMA routines" ${PARSEC_HAVE_CUDA})
if(DPLASMA_GPU_WITH_CUDA)
if(NOT PARSEC_HAVE_CUDA)
message(FATAL_ERROR "CUDA support for DPLASMA requested, but detected PaRSEC does not support it")
endif()
message(STATUS "CUDA support for DPLASMA enabled")
if(NOT TARGET CUDA::cusolver)
find_package(CUDAToolkit REQUIRED)
endif(NOT TARGET CUDA::cusolver)
set(DPLASMA_HAVE_CUDA ${PARSEC_HAVE_CUDA} CACHE BOOL "True if DPLASMA provide support for CUDA")
endif()
option(DPLASMA_GPU_WITH_HIP "Use HIP to accelerate DPLASMA routines" ${PARSEC_HAVE_HIP})
if(DPLASMA_GPU_WITH_HIP)
if(NOT PARSEC_HAVE_HIP)
message(FATAL_ERROR "HIP support for DPLASMA requested, but detected PaRSEC does not support it")
endif()
message(STATUS "HIP support for DPLASMA enabled")
# This is kinda ugly but the PATH and HINTS don't get transmitted to sub-dependents
set(CMAKE_SYSTEM_PREFIX_PATH_save ${CMAKE_SYSTEM_PREFIX_PATH})
list(APPEND CMAKE_SYSTEM_PREFIX_PATH /opt/rocm)
find_package(hipblas REQUIRED)
find_package(rocsolver REQUIRED)
set(CMAKE_SYSTEM_PREFIX_PATH ${CMAKE_SYSTEM_PREFIX_PATH_save})
set(DPLASMA_HAVE_HIP ${PARSEC_HAVE_HIP} CACHE BOOL "True if DPLASMA provide support for HIP")
endif()

############################################################################
Expand Down
2 changes: 1 addition & 1 deletion cmake_modules/FindLAPACKE.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ foreach(_comp ${LAPACKE_FIND_COMPONENTS})
elseif(_comp STREQUAL "BLAS")
if(NOT BLAS_FOUND)
_find_library_with_header(${_comp} "" blas refblas)
set(BLA_VENDOR CACHE "Generic")
set(BLA_VENDOR "Generic" CACHE STRING "BLAS vendor")
else()
set(LAPACKE_BLAS_FOUND 1)
set(LAPACKE_BLAS_LIB_FOUND 1)
Expand Down
20 changes: 17 additions & 3 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ cat <<EOF
compile kernels optimized for the CUDA SM model x, y and z
where x,y,z are two digit numbers representing a valid CUDA architecture (e.g. 35,37,60) (default=autodetect)
--with-hip[=DIR]
use the AMD RoCM accelerator libray [installed in DIR] (default=autodetect)
Some influential environment variables:
CC C compiler command
Expand Down Expand Up @@ -296,6 +299,11 @@ while [ "x$1" != x ]; do
--with-cuda-sm-targets) with_cuda_sm_targets=yes; shift;;
--without-cuda-sm-targets) with_cuda_sm_targets=no; shift;;

# RoCM options
--with-hip=*) with_hip="${1#*=}"; shift;;
--with-hip) with_hip=yes; shift;;
--without-hip) with_hip=no; shift;;

# Python options
--with-python=*) with_python="${1#*=}"; shift;;
--with-python) with_python=yes; shift;;
Expand Down Expand Up @@ -399,12 +407,12 @@ _EOF
mkdir -p "$NATIVE_DIR" && pushd "$NATIVE_DIR"
rm -rf CMakeCache.txt CMakeFiles

# Disable MPI, CUDA, HWLOC when creating the build-tools
# Disable MPI, GPU, HWLOC when creating the build-tools
local NATIVE_MPI="-DPARSEC_DIST_WITH_MPI=OFF"
local NATIVE_CUDA="-DPARSEC_GPU_WITH_CUDA=OFF"
local NATIVE_GPU="-DPARSEC_GPU_WITH_CUDA=OFF -DPARSEC_GPU_WITH_HIP=OFF"
local NATIVE_HWLOC=""
local NATIVE_COMPILERS="-DSUPPORT_FORTRAN=OFF"
local NATIVE_OPTS="-DBUILD_TESTING=OFF -DBUILD_TOOLS=ON -DBUILD_PARSEC=ON -DCMAKE_INSTALL_PREFIX=$NATIVE_PREFIX $NATIVE_MPI $NATIVE_CUDA $NATIVE_HWLOC $NATIVE_COMPILERS"
local NATIVE_OPTS="-DBUILD_TESTING=OFF -DBUILD_TOOLS=ON -DBUILD_PARSEC=ON -DCMAKE_INSTALL_PREFIX=$NATIVE_PREFIX $NATIVE_MPI $NATIVE_GPU $NATIVE_HWLOC $NATIVE_COMPILERS"

set_cmake_executable #may have been changed in the platform file
echo "CC=\"${NATIVE_CC}\" CFLAGS=\"${NATIVE_CFLAGS}\" CXX=\"${NATIVE_CXX}\" CXXFLAGS=\"${NATIVE_CXXFLAGS}\" LDFLAGS=\"${NATIVE_LDFLAGS}\" ${cmake_executable} -G\"${cmake_generator}\" ${NATIVE_OPTS} ${PARSEC_TOOLCHAIN_OPTIONS} $(for i in "$@"; do printf ' %q' "$i"; done) ${srcdir}"
Expand Down Expand Up @@ -621,6 +629,12 @@ x) ;;
*) CMAKE_DEFINES+=" -DCUDA_SM_TARGETS='${with_cuda_sm_targets/,/;}'";;
esac
case x$with_hip in
xno) CMAKE_DEFINES+=" -DPARSEC_GPU_WITH_HIP=OFF -DDPLASMA_GPU_WITH_HIP=OFF";;
xyes) CMAKE_DEFINES+=" -DPARSEC_GPU_WITH_HIP=ON -DDPLASMA_GPU_WITH_HIP=ON";;
x) ;;
*) CMAKE_DEFINES+=" -DPARSEC_GPU_WITH_HIP=ON -DHIP_ROOT=$(printf %q "$with_hip") -DDPLASMA_GPU_WITH_HIP=ON";;
esac
case x$with_python in
xno) echo >&2 "Python is required. Please provide a path to the python executable."; exit 3;;
Expand Down
58 changes: 40 additions & 18 deletions contrib/platforms/macosx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,27 @@ for this reason the Apple VecLib remains the default option.
EOF
with_blas=Apple

# OpenBLAS on OSX is not in system directories,
# lets make sure it gets auto-detected, if requested with with_blas
if [ -d /opt/homebrew/opt/openblas/lib/cmake/OpenBLAS ]; then # brew (arm)
ENVVARS+=" CMAKE_PREFIX_PATH+=':/opt/homebrew/opt/openblas/lib'"
elif [ -d /usr/local/opt/openblas/lib/cmake/OpenBLAS ]; then # brew (intel)
ENVVARS+=" CMAKE_PREFIX_PATH+=':/usr/local/opt/openblas/lib'"
fi
if [ -d /opt/local/lib/cmake/OpenBLAS ]; then # macports
ENVVARS+=" CMAKE_PREFIX_PATH+=':/opt/local/lib'"
fi
if [ -d /opt/sw/lib/cmake/OpenBLAS ]; then # fink (10.15..)
ENVVARS+=" CMAKE_PREFIX_PATH+=':/opt/sw/lib'"
elif [ -d /sw/lib/cmake/OpenBLAS ]; then # fink (..10.14)
ENVVARS+=" CMAKE_PREFIX_PATH+=':/sw/lib'"
fi

#####################################################################
# Everything below is imported from the parsec contrib plaform file
# If you need to modify something below, it probably needs modified
# in parsec first, and backported here second

# if icc/CLang is not set, detect the matching gcc/gfortran pair
# OS X has no fortran by default, and flavor varies between fink, port, self installed, etc
# Lets try to guess and use the same version for all 3 compilers
Expand All @@ -30,24 +51,25 @@ if [ "x$enable_fortran" != xno -a ! -x "$(command -v "$CC")" ]; then
done
fi

# We have Brew OpenBLAS, lets make sure it gets auto-detected, if requested
if [ -d /usr/local/opt/openblas/lib ]; then
ENVVARS+=" CMAKE_PREFIX_PATH+=':/usr/local/opt/openblas/lib'"
fi

# OS-X 12.2 provides Bison 2.3, we need Bison 3 or better
# Try to get the 'brew' Bison if installed
if [ -d /usr/local/opt/bison ]; then
ENVVARS+=" BISON_ROOT=${BISON_ROOT:-/usr/local/opt/bison}"
local bison_version=$(expr "$(bison --version)" : "bison.*\([0-9]\)\.[0-9]")
# do not override valid version in PATH or user choice
if [ "$bison_version" -lt 3 -a -z "$BISON_ROOT" ]; then
# Try to find Bison if installed with brew, ports, fink
if [ -d /opt/homebrew/opt/bison ]; then # brew (arm)
bison_dir=/opt/homebrew/opt/bison
elif [ -d /usr/local/opt/bison ]; then # brew (intel)
bison_dir=/usr/local/opt/bison
elif [ -x /opt/local/bin/bison ]; then # macports
bison_dir=/opt/local
elif [ -x /opt/sw/bin/bison ]; then # fink (10.15..)
bison_dir=/opt/sw
elif [ -x /sw/bin/bison ]; then # fink (..10.14)
bison_dir=/sw
fi
if [ -n "${bison_dir}" ]; then
echo "Default Bison is too old, will search in ${bison_dir}; override by setting BISON_ROOT"
ENVVARS+=" BISON_ROOT=${bison_dir}"
fi
fi
# Try to get the 'MacPort' Bison if installed
if [ -x /opt/local/bin/bison ]; then
ENVVARS+=" BISON_ROOT=${BISON_ROOT:-/opt/local}"
fi
# Try to get the 'Fink' Bison if installed
if [ -x /sw/bin/bison ]; then
ENVVARS+=" BISON_ROOT=${BISON_ROOT:-/sw}"
fi
# If Bison still not found, please set BISON_ROOT by hand


11 changes: 6 additions & 5 deletions share/help-dplasma.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
[cu*_alloc_failed]
There was not enough memory available on a CUDA device
[gpu_alloc_failed]
There was not enough memory available on a GPU device
while trying to allocate a %s handle to manage tasks on
this device, or another CUDA device on the node. The
this device, or another GPU device on the node. The
PaRSEC runtime system may be configured to reserve too
much memory on CUDA devices. Try reducing the amount of
much memory on GPU devices. Try reducing the amount of
reserved memory by setting the PaRSEC MCA parameter
'device_cuda_memory_use' to a lower value.
'device_cuda_memory_use' (or similar for the type of
device) to a lower value.
25 changes: 23 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,27 @@ if( NOT DPLASMA_HAVE_COMPLEX_H )
list(APPEND EXTRA_SOURCES complex.c)
endif()
if( DPLASMA_HAVE_CUDA )
list(APPEND EXTRA_SOURCES dplasmaaux_cuda.c)
list(APPEND EXTRA_SOURCES dplasmaaux_cuda.c cuda/lapack_cuda_stage_in.c)
endif()
if( DPLASMA_HAVE_HIP )
list(APPEND EXTRA_SOURCES dplasmaaux_hip.c)
FILE(GLOB cuda_sources cuda/[^\\.]*.[ch])
find_package(Perl REQUIRED)
find_program(HIPIFY_PERL_COMMAND NAMES hipify-perl HINTS ${HIP_BIN_INSTALL_DIR} REQUIRED)
foreach(cuda_file ${cuda_sources})
file(RELATIVE_PATH cuda_filename ${CMAKE_CURRENT_SOURCE_DIR}/cuda ${cuda_file})
string(REPLACE cuda hip hip_file ${cuda_filename})
string(PREPEND hip_file "${CMAKE_CURRENT_BINARY_DIR}/hip/")
add_custom_command(OUTPUT ${hip_file}
DEPENDS ${cuda_file} # do not use MAIN_DEPENDENCY, that overides the default .c.o rule
COMMAND ${CMAKE_COMMAND} -E copy "${cuda_file}" "${hip_file}.prehip"
COMMAND ${PERL_EXECUTABLE} ${HIPIFY_PERL_COMMAND} --inplace --print-stats "${hip_file}"
COMMAND ${PERL_EXECUTABLE} -i -pe "s{(cuda)}{ substr uc hip | (uc \$1 ^ \$1), 0, 3 }egi" "${hip_file}" VERBATIM) # Convert all remaining cuda/CUDA
if(${hip_file} MATCHES [^\\.]*.c) # do not add .h to sources
list(APPEND EXTRA_SOURCES ${hip_file})
endif()
endforeach()
endif( DPLASMA_HAVE_HIP )

### Generate .c files from .jdf for all required precisions
set(JDF
Expand Down Expand Up @@ -236,7 +255,9 @@ target_link_libraries(dplasma
PaRSEC::parsec
LAPACKE::LAPACKE
$<$<BOOL:${DPLASMA_HAVE_CUDA}>:CUDA::cublas>
$<$<BOOL:${DPLASMA_HAVE_CUDA}>:CUDA::cusolver>)
$<$<BOOL:${DPLASMA_HAVE_CUDA}>:CUDA::cusolver>
$<$<BOOL:${DPLASMA_HAVE_HIP}>:roc::hipblas>
$<$<BOOL:${DPLASMA_HAVE_HIP}>:roc::rocsolver>)
set_target_properties(dplasma PROPERTIES VERSION ${DPLASMA_VERSION_MAJOR}.${DPLASMA_VERSION_MINOR}
SOVERSION ${DPLASMA_VERSION_MAJOR})

Expand Down
3 changes: 3 additions & 0 deletions src/cuda/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
This directory contains files that are automatically converted from CUDA to HIP using Hipify.
If your file is not automatically convertible, put it somewhere else.

Loading

0 comments on commit 092e0e8

Please sign in to comment.