From 0e76fd82ac96eda9668fe862b58782ccf234fcb0 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 5 Sep 2016 11:18:48 +0300 Subject: [PATCH 01/33] [HIPIFY][LEGACY] Do not warn on warp shuffle functions. --- bin/hipify | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/hipify b/bin/hipify index 1d1d071a78..7ecfccbd88 100755 --- a/bin/hipify +++ b/bin/hipify @@ -699,12 +699,12 @@ sub warnUnsupportedSpecialFunctions #"__ballot", #"__popc", - #"__clz", + #"__clz", - "__shfl", - "__shfl_up", - "__shfl_down", - "__shfl_xor", + #"__shfl", + #"__shfl_up", + #"__shfl_down", + #"__shfl_xor", "__prof_trigger", From 53de91dd599d095defd71de8dd27b03ad0d1a79e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 5 Sep 2016 13:15:49 +0300 Subject: [PATCH 02/33] [HIPIFY] CUDA Driver API porting to HIP : Context functions support. --- clang-hipify/src/Cuda2Hip.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index 8959c1bc95..edc29832d9 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -310,6 +310,27 @@ struct cuda2hipMap { cuda2hipRename["CUevent"] = {"hipEvent_t", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUstream"] = {"hipStream_t", CONV_TYPE, API_DRIVER}; + // Context + cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuCtxDestroy_v2"] = {"hipCtxDestroy", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuCtxPopCurrent_v2"] = {"hipCtxPopCurrent", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxPushCurrent_v2"] = {"hipCtxPushCurrent", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxSetCurrent"] = {"hipCtxSetCurrent", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxGetCurrent"] = {"hipCtxGetCurrent", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxGetDevice"] = {"hipCtxGetDevice", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxGetApiVersion"] = {"hipCtxGetApiVersion", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxGetCacheConfig"] = {"hipCtxGetCacheConfig", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxSetCacheConfig"] = {"hipCtxSetCacheConfig", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxSetSharedMemConfig"] = {"hipCtxSetSharedMemConfig", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxGetSharedMemConfig"] = {"hipCtxGetSharedMemConfig", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxSynchronize"] = {"hipCtxSynchronize", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxGetFlags"] = {"hipCtxGetFlags", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_DEV, API_DRIVER}; + // unsupported yet by HIP + // cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_DEV, API_DRIVER}; + // cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_DEV, API_DRIVER}; + /////////////////////////////// CUDA RT API /////////////////////////////// // Error API cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR, API_RUNTIME}; From 75af21952c333de4a3747586f67d4b7b8aeb439e Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Mon, 5 Sep 2016 09:29:42 -0500 Subject: [PATCH 03/33] Doc update. - Add link to new driver porting guide. - small typo Change-Id: Ia8e25e371e17f8b4207b99e3fdfb575e59c2ebf2 --- README.md | 1 + cmake/FindHIP.cmake | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6d6c1b2553..f9bdcba9a5 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ HIP releases are typically of two types. The tag naming convention is different - [HIP Kernel Language](docs/markdown/hip_kernel_language.md) - [HIP Runtime API (Doxygen)](http://gpuopen-professionalcompute-tools.github.io/HIP) - [HIP Porting Guide](docs/markdown/hip_porting_guide.md) +- [HIP Porting Driver Guide](docs/markdown/hip_porting_driver_api.md) - [HIP Terminology](docs/markdown/hip_terms.md) (including Rosetta Stone of GPU computing terms across CUDA/HIP/HC/AMP/OpenL) - [clang-hipify](clang-hipify/README.md) - [Developer/CONTRIBUTING Info](CONTRIBUTING.md) diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 7b129d1550..1367a12630 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -179,7 +179,7 @@ hip_find_helper_file(run_hipcc cmake) ############################################################################### ############################################################################### -# MACRO: Seperate the options from the sources +# MACRO: Separate the options from the sources ############################################################################### macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options) set(${_sources}) From 2a1c84b4f9dacea14470488048ca038975ba24f4 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 5 Sep 2016 18:05:16 +0300 Subject: [PATCH 04/33] [HIPIFY] CUDA Driver API porting to HIP : Device functions support. --- clang-hipify/src/Cuda2Hip.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index edc29832d9..715c4b4056 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -328,8 +328,21 @@ struct cuda2hipMap { cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_DEV, API_DRIVER}; cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_DEV, API_DRIVER}; // unsupported yet by HIP - // cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_DEV, API_DRIVER}; - // cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_DEV, API_DRIVER}; + // cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_DEV, API_DRIVER}; + // cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_DEV, API_DRIVER}; + + // Device + cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_TYPE, API_DRIVER}; + // unsupported yet by HIP + // cuda2hipRename["cuDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_TYPE, API_DRIVER}; /////////////////////////////// CUDA RT API /////////////////////////////// // Error API From 5e0323d6c04cbb43d0637f4116670c8f25ef7e1e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 5 Sep 2016 20:26:21 +0300 Subject: [PATCH 05/33] [HIPIFY] CUDA Driver API porting to HIP : Conversion types Context and Cache are added. --- clang-hipify/src/Cuda2Hip.cpp | 98 +++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 45 deletions(-) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index 715c4b4056..e9d51b81a8 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -64,6 +64,8 @@ enum ConvTypes { CONV_SPECIAL_FUNC, CONV_STREAM, CONV_EVENT, + CONV_CONTEXT, + CONV_CACHE, CONV_ERR, CONV_DEF, CONV_TEX, @@ -77,10 +79,10 @@ enum ConvTypes { }; const char *counterNames[CONV_LAST] = { - "dev", "mem", "kern", "coord_func", "math_func", - "special_func", "stream", "event", "err", "def", - "tex", "other", "include", "include_cuda_main_header", - "type", "literal", "numeric_literal"}; + "dev", "mem", "kern", "coord_func", "math_func", + "special_func", "stream", "event", "ctx", "cache", + "err", "def", "tex", "other", "include", + "include_cuda_main_header", "type", "literal", "numeric_literal"}; enum ApiTypes { API_DRIVER = 0, @@ -294,10 +296,10 @@ struct cuda2hipMap { cuda2hipRename["CUfunc_cache_enum"] = {"hipFuncCache", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUfunc_cache"] = {"hipFuncCache", CONV_TYPE, API_DRIVER}; - cuda2hipRename["CU_FUNC_CACHE_PREFER_NONE"] = {"hipFuncCachePreferNone", CONV_DEV, API_DRIVER}; - cuda2hipRename["CU_FUNC_CACHE_PREFER_SHARED"] = {"hipFuncCachePreferShared", CONV_DEV, API_DRIVER}; - cuda2hipRename["CU_FUNC_CACHE_PREFER_L1"] = {"hipFuncCachePreferL1", CONV_DEV, API_DRIVER}; - cuda2hipRename["CU_FUNC_CACHE_PREFER_EQUAL"] = {"hipFuncCachePreferEqual", CONV_DEV, API_DRIVER}; + cuda2hipRename["CU_FUNC_CACHE_PREFER_NONE"] = {"hipFuncCachePreferNone", CONV_CACHE, API_DRIVER}; + cuda2hipRename["CU_FUNC_CACHE_PREFER_SHARED"] = {"hipFuncCachePreferShared", CONV_CACHE, API_DRIVER}; + cuda2hipRename["CU_FUNC_CACHE_PREFER_L1"] = {"hipFuncCachePreferL1", CONV_CACHE, API_DRIVER}; + cuda2hipRename["CU_FUNC_CACHE_PREFER_EQUAL"] = {"hipFuncCachePreferEqual", CONV_CACHE, API_DRIVER}; cuda2hipRename["CUsharedconfig_enum"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUsharedconfig"] = {"hipSharedMemConfig", CONV_TYPE, API_DRIVER}; @@ -308,41 +310,47 @@ struct cuda2hipMap { cuda2hipRename["CUcontext"] = {"hipCtx_t", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUmodule"] = {"hipModule_t", CONV_TYPE, API_DRIVER}; cuda2hipRename["CUevent"] = {"hipEvent_t", CONV_TYPE, API_DRIVER}; + // Event Flags + cuda2hipRename["CU_EVENT_DEFAULT"] = {"hipEventDefault", CONV_EVENT, API_DRIVER}; + cuda2hipRename["CU_EVENT_BLOCKING_SYNC"] = {"hipEventBlockingSync", CONV_EVENT, API_DRIVER}; + cuda2hipRename["CU_EVENT_DISABLE_TIMING"] = {"hipEventDisableTiming", CONV_EVENT, API_DRIVER}; + cuda2hipRename["CU_EVENT_INTERPROCESS"] = {"hipEventInterprocess", CONV_EVENT, API_DRIVER}; + cuda2hipRename["CUstream"] = {"hipStream_t", CONV_TYPE, API_DRIVER}; // Context - cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuCtxDestroy_v2"] = {"hipCtxDestroy", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuCtxPopCurrent_v2"] = {"hipCtxPopCurrent", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxPushCurrent_v2"] = {"hipCtxPushCurrent", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxSetCurrent"] = {"hipCtxSetCurrent", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxGetCurrent"] = {"hipCtxGetCurrent", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxGetDevice"] = {"hipCtxGetDevice", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxGetApiVersion"] = {"hipCtxGetApiVersion", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxGetCacheConfig"] = {"hipCtxGetCacheConfig", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxSetCacheConfig"] = {"hipCtxSetCacheConfig", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxSetSharedMemConfig"] = {"hipCtxSetSharedMemConfig", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxGetSharedMemConfig"] = {"hipCtxGetSharedMemConfig", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxSynchronize"] = {"hipCtxSynchronize", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxGetFlags"] = {"hipCtxGetFlags", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxDestroy_v2"] = {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxPopCurrent_v2"] = {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxPushCurrent_v2"] = {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxSetCurrent"] = {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxGetCurrent"] = {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxGetDevice"] = {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxGetApiVersion"] = {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxGetCacheConfig"] = {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxSetCacheConfig"] = {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxSetSharedMemConfig"] = {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxGetSharedMemConfig"] = {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxSynchronize"] = {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxGetFlags"] = {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxEnablePeerAccess"] = {"hipCtxEnablePeerAccess", CONV_CONTEXT, API_DRIVER}; + cuda2hipRename["cuCtxDisablePeerAccess"] = {"hipCtxDisablePeerAccess", CONV_CONTEXT, API_DRIVER}; // unsupported yet by HIP - // cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_DEV, API_DRIVER}; - // cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_DEV, API_DRIVER}; + // cuda2hipRename["cuCtxSetLimit"] = {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER}; + // cuda2hipRename["cuCtxGetLimit"] = {"hipCtxGetLimit", CONV_CONTEXT, API_DRIVER}; // Device - cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceGet"] = {"hipGetDevice", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceGetName"] = {"hipDeviceGetName", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceGetCount"] = {"hipGetDeviceCount", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceGetAttribute"] = {"hipDeviceGetAttribute", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceGetProperties"] = {"hipGetDeviceProperties", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceGetPCIBusId"] = {"hipDeviceGetPCIBusId", CONV_DEV, API_DRIVER}; // unsupported yet by HIP // cuda2hipRename["cuDeviceGetByPCIBusId"] = {"hipDeviceGetByPCIBusId", CONV_DEV, API_DRIVER}; - cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_TYPE, API_DRIVER}; - cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_TYPE, API_DRIVER}; + cuda2hipRename["cuDeviceTotalMem_v2"] = {"hipDeviceTotalMem", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_DEV, API_DRIVER}; + cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV, API_DRIVER}; /////////////////////////////// CUDA RT API /////////////////////////////// // Error API @@ -516,18 +524,18 @@ struct cuda2hipMap { //cuda2hipRename["cudaDeviceMask"] = {"hipDeviceMask", CONV_DEV, API_RUNTIME}; // Cache config - cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDeviceSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME}; // translate deprecated - cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaThreadSetCacheConfig"] = {"hipDeviceSetCacheConfig", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaDeviceGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME}; // translate deprecated - cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaThreadGetCacheConfig"] = {"hipDeviceGetCacheConfig", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaFuncCache"] = {"hipFuncCache", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaFuncCachePreferNone"] = {"hipFuncCachePreferNone", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaFuncCachePreferShared"] = {"hipFuncCachePreferShared", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaFuncCachePreferL1"] = {"hipFuncCachePreferL1", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaFuncCachePreferEqual"] = {"hipFuncCachePreferEqual", CONV_CACHE, API_RUNTIME}; + cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_CACHE, API_RUNTIME}; // Driver/Runtime cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DEV, API_RUNTIME}; From 48e8b83e20917f113d4eadd033fe97912697eb80 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 15:10:07 +0530 Subject: [PATCH 06/33] hipgenisa.sh: Look for ROCM_PATH in env or use default Change-Id: I31733cb059c82d3315376f8f65a280c5737cd2a8 --- bin/hipgenisa.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/hipgenisa.sh b/bin/hipgenisa.sh index f52b1c5f4e..5c6b786fcc 100755 --- a/bin/hipgenisa.sh +++ b/bin/hipgenisa.sh @@ -5,11 +5,11 @@ then exit fi -ROCM_PATH=$1 -GEN_ISA=$2 -FILE_NAMES=$3 -OUT=$4 -OUTPUT_FILE=$5 +: ${ROCM_PATH:=/opt/rocm} +GEN_ISA=$1 +FILE_NAMES=$2 +OUT=$3 +OUTPUT_FILE=$4 TARGET="" if [ ${GEN_ISA:0:12} = "--target-isa" ] then From 23068b4ac51cd8d8edd77b645619c2349db62c30 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 15:11:17 +0530 Subject: [PATCH 07/33] hipcc: Pass ROCM_PATH via env to hipgenisa.sh Change-Id: Ibc2dd8ce65645fd946f1a749effcd9ca0828c453 --- bin/hipcc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/hipcc b/bin/hipcc index c3d46bfcce..cb9ac81118 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -181,8 +181,7 @@ if ($verbose & 0x4) { # Handle code object generation my $ISACMD=""; if($HIP_PLATFORM eq "hcc"){ - $ISACMD .= "$HIP_PATH/bin/hipgenisa.sh "; - $ISACMD .= $ROCM_PATH; + $ISACMD .= "set ROCM_PATH=$ROCM_PATH && $HIP_PATH/bin/hipgenisa.sh "; if($ARGV[0] eq "--genco"){ foreach $isaarg (@ARGV[1..$#ARGV]){ $ISACMD .= " "; From 2bb7ad5eba163c563dda8e53e478e729f2204db7 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 15:23:56 +0530 Subject: [PATCH 08/33] hipgenisa.sh: exit if no command-line args specified Change-Id: Ic1b532927fdbb5b9dfe6b2dd144ffad1ee2e6b16 --- bin/hipgenisa.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/hipgenisa.sh b/bin/hipgenisa.sh index 5c6b786fcc..4833750e53 100755 --- a/bin/hipgenisa.sh +++ b/bin/hipgenisa.sh @@ -1,8 +1,7 @@ #!/bin/bash -if [ $1 = " " ] -then -exit +if [ $# = 0 ]; then + >&2 echo "$(basename $0): Invalid number of arguments" && exit 1 fi : ${ROCM_PATH:=/opt/rocm} From 2d9cf60127f13c1c8fef99f7b75603aaad00b97b Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 15:45:11 +0530 Subject: [PATCH 09/33] hipgenisa.sh: Honors ROCM_TARGET env or uses default Change-Id: Idabf77ff1610ba4adbc78cfa69eb6e87d7e94ee9 --- bin/hipgenisa.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/bin/hipgenisa.sh b/bin/hipgenisa.sh index 4833750e53..ffad52393c 100755 --- a/bin/hipgenisa.sh +++ b/bin/hipgenisa.sh @@ -5,15 +5,12 @@ if [ $# = 0 ]; then fi : ${ROCM_PATH:=/opt/rocm} +: ${ROCM_TARGET:=fiji} + GEN_ISA=$1 FILE_NAMES=$2 OUT=$3 OUTPUT_FILE=$4 -TARGET="" -if [ ${GEN_ISA:0:12} = "--target-isa" ] -then - TARGET=${GEN_ISA:13:12} -fi SOURCE="${BASH_SOURCE[0]}" HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )" @@ -27,7 +24,7 @@ int main(){} " >> $FILE_NAMES.kernel.tmp.cpp $HIP_PATH/bin/hipcc $FILE_NAMES.kernel.tmp.cpp -o $hipgenisa_dir/a.out mv dump.* $hipgenisa_dir -$ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o +$ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$ROCM_TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o $ROCM_PATH/llvm/bin/clang -target amdgcn--amdhsa $hipgenisa_dir/dump.o -o $hipgenisa_dir/dump.co map_sym="" kernels=$(objdump -t $hipgenisa_dir/dump.co | grep grid_launch_parm | sed 's/ \+/ /g; s/\t/ /g' | cut -d" " -f6) From 380a6c412b5c000017d855a4fc087cffad8b4da7 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 15:46:18 +0530 Subject: [PATCH 10/33] hipcc: Pass ROCM_TARGET as env to hipgenisa.sh Change-Id: Ib55b6366ae392ca10266ffae38722776ba91f274 --- bin/hipcc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/hipcc b/bin/hipcc index cb9ac81118..790238b6c5 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -181,7 +181,7 @@ if ($verbose & 0x4) { # Handle code object generation my $ISACMD=""; if($HIP_PLATFORM eq "hcc"){ - $ISACMD .= "set ROCM_PATH=$ROCM_PATH && $HIP_PATH/bin/hipgenisa.sh "; + $ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hipgenisa.sh "; if($ARGV[0] eq "--genco"){ foreach $isaarg (@ARGV[1..$#ARGV]){ $ISACMD .= " "; From 2c69d602c499faa99c15dbcabf39f4e08dcb0a3e Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 17:45:59 +0530 Subject: [PATCH 11/33] hipgenisa.sh: Refactor to handle multiple source files Change-Id: Icc212c43b3bf8c33fb856cd6fc5be7bbebb38d42 --- bin/hipgenisa.sh | 52 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/bin/hipgenisa.sh b/bin/hipgenisa.sh index ffad52393c..dcedfa174d 100755 --- a/bin/hipgenisa.sh +++ b/bin/hipgenisa.sh @@ -1,38 +1,62 @@ #!/bin/bash +function die { + echo "${1-Died}." >&2 + exit 1 +} + if [ $# = 0 ]; then - >&2 echo "$(basename $0): Invalid number of arguments" && exit 1 + die "$(basename $0): Invalid number of arguments" fi : ${ROCM_PATH:=/opt/rocm} : ${ROCM_TARGET:=fiji} -GEN_ISA=$1 -FILE_NAMES=$2 -OUT=$3 -OUTPUT_FILE=$4 +INPUT_FILES="" +OUTPUT_FILE="" +while [[ $# -gt 1 ]]; do + key="$1" + case $key in + -o) + OUTPUT_FILE="$2" + shift + ;; + *) + INPUT_FILES="$INPUT_FILES $key" + esac + shift +done + +[ INPUT_FILES != "" ] || die "No source files specified" +[ OUTPUT_FILE != "" ] || die "Output file not specified" SOURCE="${BASH_SOURCE[0]}" HIP_PATH="$( command cd -P "$( dirname "$SOURCE" )/.." && pwd )" export KMDUMPISA=1 export KMDUMPLLVM=1 -hipgenisa_dir=`mktemp -d --tmpdir=/tmp hip.XXXXXXXX`; -sed 's/extern \+"C" \+//g' $FILE_NAMES > $FILE_NAMES.kernel.tmp.cpp -echo " -int main(){} -" >> $FILE_NAMES.kernel.tmp.cpp -$HIP_PATH/bin/hipcc $FILE_NAMES.kernel.tmp.cpp -o $hipgenisa_dir/a.out +hipgenisa_dir=`mktemp -d --tmpdir=/tmp hip.XXXXXXXX` +hipgenisa_main=`mktemp src.XXXXXXXX.cpp` +hipgenisa_files="$hipgenisa_main" + +for inputfile in $INPUT_FILES; do + sed 's/extern \+"C" \+//g' $inputfile > $inputfile.kernel.tmp.cpp + hipgenisa_files="$hipgenisa_files $inputfile.kernel.tmp.cpp" +done +printf "\nint main(){}\n" >> $hipgenisa_main + +$HIP_PATH/bin/hipcc $hipgenisa_files -o $hipgenisa_dir/a.out mv dump.* $hipgenisa_dir $ROCM_PATH/hcc-lc/bin/llvm-mc -arch=amdgcn -mcpu=$ROCM_TARGET -filetype=obj $hipgenisa_dir/dump.isa -o $hipgenisa_dir/dump.o $ROCM_PATH/llvm/bin/clang -target amdgcn--amdhsa $hipgenisa_dir/dump.o -o $hipgenisa_dir/dump.co + map_sym="" kernels=$(objdump -t $hipgenisa_dir/dump.co | grep grid_launch_parm | sed 's/ \+/ /g; s/\t/ /g' | cut -d" " -f6) -for mangled_sym in $kernels -do +for mangled_sym in $kernels; do real_sym=$(c++filt $(c++filt _$mangled_sym | cut -d: -f3 | sed 's/_functor//g') | cut -d\( -f1) map_sym="--redefine-sym $mangled_sym=$real_sym $map_sym" done objcopy -F elf64-little $map_sym $hipgenisa_dir/dump.co $OUTPUT_FILE -rm $FILE_NAMES.kernel.tmp.cpp + +rm $hipgenisa_files rm -r $hipgenisa_dir From 9b93847628c299b74f5539b3043ab097d13c59a0 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 6 Sep 2016 17:47:10 +0530 Subject: [PATCH 12/33] module_api/Makefile: Update as per newer hipgenisa.sh Change-Id: I479c74eae00d7521434f2740ce5930e326ea05cf --- samples/0_Intro/module_api/Makefile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/samples/0_Intro/module_api/Makefile b/samples/0_Intro/module_api/Makefile index 81e876ddbf..f2c0ce555a 100644 --- a/samples/0_Intro/module_api/Makefile +++ b/samples/0_Intro/module_api/Makefile @@ -5,17 +5,13 @@ endif HIPCC=$(HIP_PATH)/bin/hipcc HIP_PLATFORM=$(shell $(HIP_PATH)/bin/hipconfig --compiler) -ifeq (${HIP_PLATFORM}, hcc) - GENCO_FLAGS=--target-isa=fiji -endif - all: vcpy_kernel.code runKernel.hip.out runKernel.hip.out: runKernel.cpp $(HIPCC) $(HIPCC_FLAGS) $< -o $@ vcpy_kernel.code: vcpy_kernel.cpp - $(HIPCC) --genco $(GENCO_FLAGS) $< -o $@ + $(HIPCC) --genco $(GENCO_FLAGS) $^ -o $@ clean: rm -f *.code *.out From 27b549453760ce1bd7d0289faa544c4ede7a913e Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 7 Sep 2016 10:23:02 +0530 Subject: [PATCH 13/33] Rename hipgenisa.sh -> hccgenco.sh Change-Id: Icfdbb35acd7e84881bd1ab7ef3c85a3109902c6a --- bin/{hipgenisa.sh => hccgenco.sh} | 0 bin/hipcc | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename bin/{hipgenisa.sh => hccgenco.sh} (100%) diff --git a/bin/hipgenisa.sh b/bin/hccgenco.sh similarity index 100% rename from bin/hipgenisa.sh rename to bin/hccgenco.sh diff --git a/bin/hipcc b/bin/hipcc index 790238b6c5..b1f2bab552 100755 --- a/bin/hipcc +++ b/bin/hipcc @@ -181,7 +181,7 @@ if ($verbose & 0x4) { # Handle code object generation my $ISACMD=""; if($HIP_PLATFORM eq "hcc"){ - $ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hipgenisa.sh "; + $ISACMD .= "set ROCM_PATH=$ROCM_PATH && set ROCM_TARGET=$ROCM_TARGET && $HIP_PATH/bin/hccgenco.sh "; if($ARGV[0] eq "--genco"){ foreach $isaarg (@ARGV[1..$#ARGV]){ $ISACMD .= " "; From c4735224df536e6db553e15cb18c318a6d6e4e49 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 7 Sep 2016 12:38:25 +0530 Subject: [PATCH 14/33] FindHIP: don't clobber CXX linker and fix some typos Change-Id: Ie92e9d7c212491ce54f0709d291fafaeb1c479de --- cmake/FindHIP.cmake | 10 ++++------ cmake/FindHIP/run_hipcc.cmake | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cmake/FindHIP.cmake b/cmake/FindHIP.cmake index 1367a12630..2541cf6ec6 100644 --- a/cmake/FindHIP.cmake +++ b/cmake/FindHIP.cmake @@ -303,7 +303,7 @@ macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file) endforeach() else() # No dependencies, so regenerate the file - set(CUDA_NVCC_DEPEND_REGENERATE TRUE) + set(HIP_HIPCC_DEPEND_REGENERATE TRUE) endif() # Regenerate the dependency file if needed @@ -416,7 +416,7 @@ macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files) # Create up the comment string file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - set(hip_build_comment_string "Building HIPCC (${cuda_build_type}) object ${generated_file_relative_path}") + set(hip_build_comment_string "Building HIPCC (using ${HIP_PLATFORM}) object ${generated_file_relative_path}") # Build the generated file and dependency file add_custom_command( @@ -453,11 +453,9 @@ macro(HIP_ADD_EXECUTABLE hip_target) # Separate the sources from the options HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) - set(HIP_CMAKE_CXX_LINK_EXECUTABLE ${CMAKE_CXX_LINK_EXECUTABLE}) - set(CMAKE_CXX_LINK_EXECUTABLE "${HIP_HIPCC_EXECUTABLE} -o ") + set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_EXECUTABLE} -o ") add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) - set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX}) - #set(CMAKE_CXX_COMPILER ${ORIGINAL_CMAKE_CXX_COMPILER}) + set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) endmacro() # vim: ts=4:sw=4:expandtab:smartindent diff --git a/cmake/FindHIP/run_hipcc.cmake b/cmake/FindHIP/run_hipcc.cmake index 8af1c72700..52ad57f532 100644 --- a/cmake/FindHIP/run_hipcc.cmake +++ b/cmake/FindHIP/run_hipcc.cmake @@ -22,7 +22,7 @@ endif() # Set these up as variables to make reading the generated file easier set(HIP_HIPCC_EXECUTABLE "@HIP_HIPCC_EXECUTABLE@") # path -set(HIP_HOST_COMPILER "@CUDA_HOST_COMPILER@") # path +set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path set(HIP_PLATFORM "@HIP_PLATFORM@") #string set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path From 87d7e956936765806c13bc9efc7bb2520ba494da Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Wed, 7 Sep 2016 19:22:38 +0530 Subject: [PATCH 15/33] CMakeLists.txt: Change default to HIP static library Change-Id: I9b233c6873d0a69d9aedeb1f9f3773e4258c6f44 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e418a184ae..1aa5d3f342 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -116,7 +116,7 @@ endif() # Set if we need to build shared or static library if(NOT DEFINED HIP_LIB_TYPE) if(NOT DEFINED ENV{HIP_LIB_TYPE}) - set(HIP_LIB_TYPE 0) + set(HIP_LIB_TYPE 1) else() set(HIP_LIB_TYPE $ENV{HIP_LIB_TYPE}) endif() From 2c2f6ab078e53176356c95313afbe294073e69eb Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Wed, 7 Sep 2016 12:57:18 -0500 Subject: [PATCH 16/33] Fixed group and private memory size to AQL Change-Id: I6e721f63fe5697b7b90a7d25add9aa024d9dc429 --- include/hcc_detail/hip_hcc.h | 3 ++- src/hip_hcc.cpp | 7 ++++--- src/hip_module.cpp | 17 +++++++++++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 8b9f1db97b..e10b68695e 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -460,7 +460,8 @@ typedef uint64_t SeqNum_t ; void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel); + uint32_t groupSegmentSize, uint32_t sharedMemBytes, + void *kernarg, size_t kernSize, uint64_t kernel); // Non-threadsafe accessors - must be protected by high-level stream lock with accessor passed to function. SIGSEQNUM lastCopySeqId (LockedAccessor_StreamCrit_t &crit) const { return crit->_last_copy_signal ? crit->_last_copy_signal->_sigId : 0; }; diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 1954b31c70..97911d08eb 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -522,7 +522,8 @@ void ihipStream_t::launchModuleKernel( uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t sharedMemBytes, + uint32_t groupSegmentSize, + uint32_t privateSegmentSize, void *kernarg, size_t kernSize, uint64_t kernel){ @@ -545,8 +546,8 @@ void ihipStream_t::launchModuleKernel( dispatch_packet->grid_size_x = blockDimX * gridDimX; dispatch_packet->grid_size_y = blockDimY * gridDimY; dispatch_packet->grid_size_z = blockDimZ * gridDimZ; - dispatch_packet->group_segment_size = 0; - dispatch_packet->private_segment_size = sharedMemBytes; + dispatch_packet->group_segment_size = groupSegmentSize; + dispatch_packet->private_segment_size = privateSegmentSize; dispatch_packet->kernarg_address = kern; dispatch_packet->kernel_object = kernel; uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 28c65b6669..640b2bb7c4 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -252,10 +252,23 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, }else{ return ihipLogStatus(hipErrorInvalidValue); } + + uint32_t groupSegmentSize; + hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &groupSegmentSize); + + uint32_t privateSegmentSize; + status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &privateSegmentSize); + + privateSegmentSize += sharedMemBytes; + + /* Kernel argument preparation. */ - hsa_status_t status; grid_launch_parm lp; hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp); @@ -270,7 +283,7 @@ Kernel argument preparation. Launch AQL packet */ hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, - gridDimX, gridDimY, gridDimZ, sharedMemBytes, config[1], kernSize, f->kernel); + gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->kernel); /* Wait for signal From 48b1f7a6ea863936ebaa88eec531e322577880ce Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Fri, 2 Sep 2016 15:49:22 -0500 Subject: [PATCH 17/33] refactor ihipPreLaunchKernel phase#1 - Fix calls to HIP_INIT_API to pass all function arguments. - Change ihipFunction to follow coding convention: - leading underscore for member fields, - camelCase for member fields. - move kernel print function inside ihipPreLaunchKernel. - add HIP_TRACE_API_COLOR, control color of messages. - add ihipLogStatus wrapper to hipDeviceSynchronize() Change-Id: I20bbb644da213f821404648945197254e3648fc9 --- include/hcc_detail/hip_hcc.h | 38 ++++++-- include/hcc_detail/hip_runtime.h | 12 +-- src/hip_device.cpp | 4 +- src/hip_hcc.cpp | 85 ++++++++++++++++-- src/hip_module.cpp | 86 ++++++++++--------- tests/src/hipLaunchParm.cpp | 6 +- .../runtimeApi/stream/hipStreamWaitEvent.cpp | 4 +- 7 files changed, 170 insertions(+), 65 deletions(-) diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index e10b68695e..1a4e9780aa 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -87,7 +87,8 @@ class ihipCtx_t; #define KCYN "\x1B[36m" #define KWHT "\x1B[37m" -#define API_COLOR KGRN +extern const char *API_COLOR; +extern const char *API_COLOR_END; // If set, thread-safety is enforced on all stream functions. @@ -149,7 +150,7 @@ class ihipCtx_t; if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) {\ std::string s = std::string(__func__) + " (" + ToString(__VA_ARGS__) + ')';\ if (COMPILE_HIP_DB && HIP_TRACE_API) {\ - fprintf (stderr, API_COLOR "<>\n" KNRM, (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus));\ + fprintf(stderr, " %ship-api: %-30s ret=%2d (%s)>>%s\n", (localHipStatus == 0) ? API_COLOR:KRED, __func__, localHipStatus, ihipErrorString(localHipStatus), API_COLOR_END);\ }\ localHipStatus;\ }) @@ -365,8 +366,23 @@ class ihipModule_t{ class ihipFunction_t{ public: - hsa_executable_symbol_t kernel_symbol; - uint64_t kernel; + ihipFunction_t(const char *name) { + size_t nameSz = strlen(name); + char *kernelName = (char*)malloc(nameSz); + strncpy(kernelName, name, nameSz); + _kernelName = kernelName; + }; + + ~ihipFunction_t() { + if (_kernelName) { + free((void*)_kernelName); + _kernelName = NULL; + }; + }; +public: + const char *_kernelName; + hsa_executable_symbol_t _kernelSymbol; + uint64_t _kernel; }; @@ -719,6 +735,18 @@ inline std::ostream & operator<<(std::ostream& os, const dim3& s) return os; } +inline std::ostream & operator<<(std::ostream& os, const gl_dim3& s) +{ + os << '{'; + os << s.x; + os << ','; + os << s.y; + os << ','; + os << s.z; + os << '}'; + return os; +} + // Stream printf functions: inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e) { diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index f8166e7897..4e67cb7292 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -621,25 +621,19 @@ __device__ static inline void* memset(void* ptr, uint8_t val, size_t size) #define HIP_KERNEL_NAME(...) __VA_ARGS__ #ifdef __HCC_CPP__ -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp); extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp); -extern void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream); extern void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp); -// TODO - move to common header file. -#define KNRM "\x1B[0m" -#define KGRN "\x1B[32m" +// Due to multiple overloaded versions of ihipPreLaunchKernel, the numBlocks3D and blockDim3D can be either size_t or dim3 types #define hipLaunchKernel(_kernelName, _numBlocks3D, _blockDim3D, _groupMemBytes, _stream, ...) \ do {\ grid_launch_parm lp;\ lp.dynamic_group_mem_bytes = _groupMemBytes; \ - hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp)); \ - if (HIP_TRACE_API) {\ - ihipPrintKernelLaunch(#_kernelName, &lp, _stream); \ - }\ + hipStream_t trueStream = (ihipPreLaunchKernel(_stream, _numBlocks3D, _blockDim3D, &lp, #_kernelName)); \ _kernelName (lp, ##__VA_ARGS__);\ ihipPostLaunchKernel(trueStream, lp);\ } while(0) diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 72c92ac76f..61221e64eb 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -161,7 +161,7 @@ hipError_t hipSetDevice(int deviceId) hipError_t hipDeviceSynchronize(void) { HIP_INIT_API(); - return ihipSynchronize(); + return ihipLogStatus(ihipSynchronize()); } @@ -182,7 +182,7 @@ hipError_t hipDeviceReset(void) if (ctx) { // Release ctx resources (streams and memory): - ctx->locked_reset(); + ctx->locked_reset(); } return ihipLogStatus(hipSuccess); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 97911d08eb..f4d20021a9 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -57,10 +57,14 @@ const int release = 1; #define MEMCPY_H2D_DIRECT_VS_STAGING_COPY_THRESHOLD 65336 #define MEMCPY_H2D_STAGING_VS_PININPLACE_COPY_THRESHOLD 1048576 +const char *API_COLOR = KGRN; +const char *API_COLOR_END = KNRM; + int HIP_LAUNCH_BLOCKING = 0; int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; +std::string HIP_TRACE_API_COLOR("green"); int HIP_ATP_MARKER= 0; int HIP_DB= 0; int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ @@ -1123,6 +1127,7 @@ void ihipCtx_t::locked_waitAllStreams() +//--- // Read environment variables. void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, const char *description) { @@ -1133,6 +1138,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c env = getenv(var_name2); } + // TODO: Refactor this code so it is a separate call rather than being part of ihipReadEnv_I, which should only read integers. // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which // contains a sequence of comma-separated device IDs if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ @@ -1170,15 +1176,37 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c printf ("%-30s = %2d : %s\n", var_name1, *var_ptr, description); } } +} + + +void ihipReadEnv_S(std::string *var_ptr, const char *var_name1, const char *var_name2, const char *description) +{ + char * env = getenv(var_name1); + + // Check second name if first not defined, used to allow HIP_ or CUDA_ env vars. + if ((env == NULL) && strcmp(var_name2, "0")) { + env = getenv(var_name2); + } + if (env) { + *var_ptr = env; + } + if (HIP_PRINT_ENV) { + printf ("%-30s = %s : %s\n", var_name1, var_ptr->c_str(), description); + } } + #if defined (DEBUG) #define READ_ENV_I(_build, _ENV_VAR, _ENV_VAR2, _description) \ if ((_build == release) || (_build == debug) {\ ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ }; +#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \ + if ((_build == release) || (_build == debug) {\ + ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ + }; #else @@ -1187,6 +1215,11 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c ihipReadEnv_I(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ }; +#define READ_ENV_S(_build, _ENV_VAR, _ENV_VAR2, _description) \ + if (_build == release) {\ + ihipReadEnv_S(&_ENV_VAR, #_ENV_VAR, #_ENV_VAR2, _description);\ + }; + #endif @@ -1219,6 +1252,7 @@ void ihipInit() } READ_ENV_I(release, HIP_TRACE_API, 0, "Trace each HIP API call. Print function name and return code to stderr as program executes."); + READ_ENV_S(release, HIP_TRACE_API_COLOR, 0, "Color to use for HIP_API. None/Red/Green/Yellow/Blue/Magenta/Cyan/White"); READ_ENV_I(release, HIP_ATP_MARKER, 0, "Add HIP function begin/end to ATP file generated with CodeXL"); READ_ENV_I(release, HIP_STAGING_SIZE, 0, "Size of each staging buffer (in KB)" ); READ_ENV_I(release, HIP_STAGING_BUFFERS, 0, "Number of staging buffers to use in each direction. 0=use hsa_memory_copy."); @@ -1262,6 +1296,31 @@ void ihipInit() fprintf (stderr, "warning: env var HIP_ATP_MARKER=0x%x but COMPILE_HIP_ATP_MARKER=0. (perhaps enable COMPILE_HIP_DB in src code before compiling?)", HIP_ATP_MARKER); } + std::transform(HIP_TRACE_API_COLOR.begin(), HIP_TRACE_API_COLOR.end(), HIP_TRACE_API_COLOR.begin(), ::tolower); + + if (HIP_TRACE_API_COLOR == "none") { + API_COLOR = ""; + API_COLOR_END = ""; + } else if (HIP_TRACE_API_COLOR == "red") { + API_COLOR = KRED; + } else if (HIP_TRACE_API_COLOR == "green") { + API_COLOR = KGRN; + } else if (HIP_TRACE_API_COLOR == "yellow") { + API_COLOR = KYEL; + } else if (HIP_TRACE_API_COLOR == "blue") { + API_COLOR = KBLU; + } else if (HIP_TRACE_API_COLOR == "magenta") { + API_COLOR = KMAG; + } else if (HIP_TRACE_API_COLOR == "cyan") { + API_COLOR = KCYN; + } else if (HIP_TRACE_API_COLOR == "white") { + API_COLOR = KWHT; + } else { + fprintf (stderr, "warning: env var HIP_TRACE_API_COLOR=%s must be None/Red/Green/Yellow/Blue/Magenta/Cyan/White", HIP_TRACE_API_COLOR.c_str()); + }; + + + /* * Build a table of valid compute devices. @@ -1333,7 +1392,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) #endif return device->_defaultStream; } else { - // Have to wait for legacy default stream to be empty: + // ALl streams have to wait for legacy default stream to be empty: if (!(stream->_flags & hipStreamNonBlocking)) { tprintf(DB_SYNC, "stream %p wait default stream\n", stream); stream->getCtx()->_defaultStream->locked_wait(); @@ -1345,16 +1404,25 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream) { - std::string streamString = ToString(stream); - fprintf(stderr, KGRN "<grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, - lp->dynamic_group_mem_bytes, streamString.c_str());\ + std::stringstream os; + os << API_COLOR << "<grid_dim + << " groupDim:" << lp->group_dim + << " sharedMem:+" << lp->dynamic_group_mem_bytes + << " " << *stream + << API_COLOR_END << std::endl; + + std::cerr << os.str(); + + //fprintf(stderr, KGRN "<grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, + // lp->dynamic_group_mem_bytes, streamString.c_str()); } // TODO - data-up to data-down: // Called just before a kernel is launched from hipLaunchKernel. // Allows runtime to track some information about the stream. -hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp) +hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr) { HIP_INIT(); stream = ihipSyncAndResolveStream(stream); @@ -1370,6 +1438,11 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_ auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; + + if (HIP_TRACE_API) { + ihipPrintKernelLaunch(kernelNameStr, lp, stream); + } + return (stream); } diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 640b2bb7c4..594ddde2f9 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -30,7 +30,7 @@ THE SOFTWARE. //TODO Use Pool APIs from HCC to get memory regions. -namespace hipdrv{ +namespace hipdrv { hsa_status_t findSystemRegions(hsa_region_t region, void *data){ hsa_region_segment_t segment_id; @@ -99,7 +99,7 @@ uint64_t ElfSize(const void *emi){ } hipError_t hipModuleLoad(hipModule_t *module, const char *fname){ - HIP_INIT_API(fname); + HIP_INIT_API(module, fname); hipError_t ret = hipSuccess; *module = new ihipModule_t; @@ -187,7 +187,7 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch ret = hipErrorInvalidContext; }else{ - *func = new ihipFunction_t; + *func = new ihipFunction_t(name); int deviceId = ctx->getDevice()->_deviceId; ihipDevice_t *currentDevice = ihipGetDevice(deviceId); hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent; @@ -199,14 +199,14 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch } status = hsa_executable_freeze(hmod->executable, NULL); - status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->kernel_symbol); + status = hsa_executable_get_symbol(hmod->executable, NULL, name, gpuAgent, 0, &(*func)->_kernelSymbol); if(status != HSA_STATUS_SUCCESS){ return ihipLogStatus(hipErrorNotFound); } - status = hsa_executable_symbol_get_info((*func)->kernel_symbol, + status = hsa_executable_symbol_get_info((*func)->_kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, - &(*func)->kernel); + &(*func)->_kernel); if(status != HSA_STATUS_SUCCESS){ return ihipLogStatus(hipErrorNotFound); @@ -215,9 +215,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t *func, hipModule_t hmod, const ch return ihipLogStatus(ret); } + hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod, const char *name){ - HIP_INIT_API(name); + HIP_INIT_API(hfunc, hmod, name); return ihipModuleGetFunction(hfunc, hmod, name); } @@ -226,8 +227,13 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, hipStream_t hStream, - void **kernelParams, void **extra){ - HIP_INIT_API(f->kernel); + void **kernelParams, void **extra) +{ + HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ, + blockDimX, blockDimY, blockDimZ, + sharedMemBytes, hStream, + kernelParams, extra); + auto ctx = ihipGetTlsDefaultCtx(); hipError_t ret = hipSuccess; @@ -246,48 +252,47 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, memcpy(config, extra, sizeof(size_t)*5); if(config[0] == HIP_LAUNCH_PARAM_BUFFER_POINTER && config[2] == HIP_LAUNCH_PARAM_BUFFER_SIZE && config[4] == HIP_LAUNCH_PARAM_END){ kernSize = *(size_t*)(config[3]); - }else{ + } else { return ihipLogStatus(hipErrorNotInitialized); } }else{ return ihipLogStatus(hipErrorInvalidValue); } - uint32_t groupSegmentSize; - hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &groupSegmentSize); - - uint32_t privateSegmentSize; - status = hsa_executable_symbol_get_info(f->kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &privateSegmentSize); + uint32_t groupSegmentSize; + hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &groupSegmentSize); - privateSegmentSize += sharedMemBytes; + uint32_t privateSegmentSize; + status = hsa_executable_symbol_get_info(f->kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &privateSegmentSize); + privateSegmentSize += sharedMemBytes; -/* -Kernel argument preparation. -*/ + /* + Kernel argument preparation. + */ grid_launch_parm lp; - hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp); + hStream = ihipPreLaunchKernel(hStream, 0, 0, &lp, f->_kernelName); -/* - Create signal -*/ + /* + Create signal + */ hsa_signal_t signal; status = hsa_signal_create(1, 0, NULL, &signal); -/* - Launch AQL packet -*/ + /* + Launch AQL packet + */ hStream->launchModuleKernel(*lp.av, signal, blockDimX, blockDimY, blockDimZ, - gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->kernel); + gridDimX, gridDimY, gridDimZ, groupSegmentSize, privateSegmentSize, config[1], kernSize, f->_kernel); -/* - Wait for signal -*/ + /* + Wait for signal + */ hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); @@ -301,8 +306,9 @@ Kernel argument preparation. hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, - hipModule_t hmod, const char* name){ - HIP_INIT_API(name); + hipModule_t hmod, const char* name) +{ + HIP_INIT_API(dptr, bytes, hmod, name); hipError_t ret = hipSuccess; if(dptr == NULL || bytes == NULL){ return ihipLogStatus(hipErrorInvalidValue); @@ -314,13 +320,15 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipFunction_t func; ihipModuleGetFunction(&func, hmod, name); *bytes = PrintSymbolSizes(hmod->ptr, name) + sizeof(amd_kernel_code_t); - *dptr = reinterpret_cast(func->kernel); + *dptr = reinterpret_cast(func->_kernel); return ihipLogStatus(ret); } } -hipError_t hipModuleLoadData(hipModule_t *module, const void *image){ - HIP_INIT_API(image); + +hipError_t hipModuleLoadData(hipModule_t *module, const void *image) +{ + HIP_INIT_API(module, image); hipError_t ret = hipSuccess; if(image == NULL || module == NULL){ return ihipLogStatus(hipErrorNotInitialized); diff --git a/tests/src/hipLaunchParm.cpp b/tests/src/hipLaunchParm.cpp index 2f4bf11ea2..26ad94f182 100644 --- a/tests/src/hipLaunchParm.cpp +++ b/tests/src/hipLaunchParm.cpp @@ -38,7 +38,7 @@ __global__ void vAdd(hipLaunchParm lp, float *a){} cmd;\ hipDeviceSynchronize();\ gettimeofday(&stop, NULL);\ - } while(0); + } while(0); @@ -61,7 +61,9 @@ int main() { float *Ad; hipMalloc((void**)&Ad, 1024); - hipLaunchKernel(vAdd, 1024, 1, 0, 0, Ad); + + // Test the different hipLaunchParm options: + hipLaunchKernel(vAdd, size_t(1024), 1, 0, 0, Ad); hipLaunchKernel(vAdd, 1024, dim3(1), 0, 0, Ad); hipLaunchKernel(vAdd, dim3(1024), 1, 0, 0, Ad); hipLaunchKernel(vAdd, dim3(1024), dim3(1), 0, 0, Ad); diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index c5a74b2bc0..7148f50628 100644 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -27,7 +27,7 @@ unsigned p_streams = 6; //------ // Structure for one stream; -template +template class Streamer { public: Streamer(size_t numElements); @@ -99,7 +99,7 @@ void parseMyArguments(int argc, char *argv[]) //--- int main(int argc, char *argv[]) { - HipTest::parseStandardArguments(argc, argv, true); + HipTest::parseStandardArguments(argc, argv, false); parseMyArguments(argc, argv); typedef Streamer FloatStreamer; From 4e994a30252ac7d15353cbdd187f6afd3a98128e Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 4 Sep 2016 07:00:59 -0500 Subject: [PATCH 18/33] Add hipStreamQuery Change-Id: Ib0813b1065feba4fe9ae861d24cfc6f9c5f580be --- docs/markdown/hip_terms2.md | 2 + include/hcc_detail/hip_hcc.h | 1 + include/hcc_detail/hip_runtime.h | 6 +-- include/hcc_detail/hip_runtime_api.h | 24 ++++++++--- src/hip_hcc.cpp | 41 ++++++++++--------- src/hip_stream.cpp | 21 ++++++++++ .../runtimeApi/stream/hipStreamWaitEvent.cpp | 41 ++++++++++++++++--- 7 files changed, 104 insertions(+), 32 deletions(-) diff --git a/docs/markdown/hip_terms2.md b/docs/markdown/hip_terms2.md index 82174405cd..9603807925 100644 --- a/docs/markdown/hip_terms2.md +++ b/docs/markdown/hip_terms2.md @@ -15,3 +15,5 @@ The default device can be set with hipSetDevice. - hipify - tool to convert CUDA(R) code to portable C++ code. - hipconfig - tool to report various confoguration properties of the target platform. +- nvcc = nvcc compiler, do not capitalize. +- hcc = heterogeneous compute compiler, do not capitalize. diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 1a4e9780aa..939d57c062 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -515,6 +515,7 @@ typedef uint64_t SeqNum_t ; // Friends: friend std::ostream& operator<<(std::ostream& os, const ihipStream_t& s); + friend hipError_t hipStreamQuery(hipStream_t); }; diff --git a/include/hcc_detail/hip_runtime.h b/include/hcc_detail/hip_runtime.h index 4e67cb7292..547df405a2 100644 --- a/include/hcc_detail/hip_runtime.h +++ b/include/hcc_detail/hip_runtime.h @@ -622,9 +622,9 @@ __device__ static inline void* memset(void* ptr, uint8_t val, size_t size) #ifdef __HCC_CPP__ extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr); -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp); -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp); -extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr); +extern hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr); extern void ihipPostLaunchKernel(hipStream_t stream, grid_launch_parm &lp); diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index fc390bbd23..1f4a1fb8a9 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -474,6 +474,20 @@ hipError_t hipStreamCreate(hipStream_t *stream); hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int flags); +/** + * @brief Return #hipSuccess if all of the operations in the specified @p stream have completed, or #hipErrorNotReady if not. + * + * @param[in] stream stream to query + * + * @return #hipSuccess, #hipErrorNotReady + * + * This is thread-safe and returns a snapshot of the current state of the queue. However, if other host threads are sending work to the stream, + * the status may change immediately after the function is called. It is typically used for debug. + */ +hipError_t hipStreamQuery(hipStream_t stream); + + + /** * @brief Wait for all commands in stream to complete. * @@ -726,7 +740,7 @@ hipError_t hipHostAlloc(void** ptr, size_t size, unsigned int flags) __attribute hipError_t hipHostGetDevicePointer(void** devPtr, void* hstPtr, unsigned int flags) ; /** - * @brief Return flags associated with host pointer + * @brief Return flags associated with host pointer * * @param[out] flagsPtr Memory location to store flags * @param[in] hostPtr Host Pointer allocated through hipHostMalloc @@ -1299,10 +1313,10 @@ hipError_t hipDriverGetVersion(int *driverVersion) ; * * @param [in] fname * @param [out] module - * + * * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorFileNotFound, hipErrorOutOfMemory, hipErrorSharedObjectInitFailed, hipErrorNotInitialized * - * + * */ hipError_t hipModuleLoad(hipModule_t *module, const char *fname); @@ -1313,7 +1327,7 @@ hipError_t hipModuleLoad(hipModule_t *module, const char *fname); * * @returns hipSuccess, hipInvalidValue * module is freed and the code objects associated with it are destroyed - * + * */ hipError_t hipModuleUnload(hipModule_t module); @@ -1325,7 +1339,7 @@ hipError_t hipModuleUnload(hipModule_t module); * @param [in] kname * @param [out] function * - * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, hipErrorNotFound, + * @returns hipSuccess, hipErrorInvalidValue, hipErrorInvalidContext, hipErrorNotInitialized, hipErrorNotFound, */ hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, const char *kname); diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index f4d20021a9..9d9b667f8b 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -1404,19 +1404,20 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) void ihipPrintKernelLaunch(const char *kernelName, const grid_launch_parm *lp, const hipStream_t stream) { - std::stringstream os; - os << API_COLOR << "<grid_dim - << " groupDim:" << lp->group_dim - << " sharedMem:+" << lp->dynamic_group_mem_bytes - << " " << *stream - << API_COLOR_END << std::endl; - - std::cerr << os.str(); - - //fprintf(stderr, KGRN "<grid_dim.x, lp->grid_dim.y, lp->grid_dim.z, lp->group_dim.x, lp->group_dim.y, lp->group_dim.z, - // lp->dynamic_group_mem_bytes, streamString.c_str()); + if (HIP_ATP_MARKER || (COMPILE_HIP_DB && HIP_TRACE_API)) { + std::stringstream os; + os << "<grid_dim + << " groupDim:" << lp->group_dim + << " sharedMem:+" << lp->dynamic_group_mem_bytes + << " " << *stream; + + + if (COMPILE_HIP_DB && HIP_TRACE_API) { + std::cerr << API_COLOR << os.str() << API_COLOR_END << std::endl; + } + SCOPED_MARKER(os.str().c_str(), "HIP", NULL); + } } // TODO - data-up to data-down: @@ -1439,15 +1440,13 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_ lp->av = &(crit->_av); lp->cf = new hc::completion_future; - if (HIP_TRACE_API) { - ihipPrintKernelLaunch(kernelNameStr, lp, stream); - } + ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } -hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp) +hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, grid_launch_parm *lp, const char *kernelNameStr) { HIP_INIT(); stream = ihipSyncAndResolveStream(stream); @@ -1463,11 +1462,13 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; + + ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } -hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp) +hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr) { HIP_INIT(); stream = ihipSyncAndResolveStream(stream); @@ -1483,11 +1484,12 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; + ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } -hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp) +hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, grid_launch_parm *lp, const char *kernelNameStr) { HIP_INIT(); stream = ihipSyncAndResolveStream(stream); @@ -1503,6 +1505,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; // TODO, is this necessary? + ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } diff --git a/src/hip_stream.cpp b/src/hip_stream.cpp index b5d7d8cf5b..7b3dc31f07 100644 --- a/src/hip_stream.cpp +++ b/src/hip_stream.cpp @@ -115,6 +115,27 @@ hipError_t hipStreamWaitEvent(hipStream_t stream, hipEvent_t event, unsigned int }; +//--- +hipError_t hipStreamQuery(hipStream_t stream) +{ + HIP_INIT_API(stream); + + // Use default stream if 0 specified: + if (stream == hipStreamNull) { + ihipCtx_t *device = ihipGetTlsDefaultCtx(); + stream = device->_defaultStream; + } + + LockedAccessor_StreamCrit_t crit(stream->_criticalData); + int pendingOps = crit->_av.get_pending_async_ops(); + + + hipError_t e = (pendingOps > 0) ? hipErrorNotReady : hipSuccess; + + return ihipLogStatus(e); +} + + //--- hipError_t hipStreamSynchronize(hipStream_t stream) { diff --git a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp index 7148f50628..4ad093a16e 100644 --- a/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp +++ b/tests/src/runtimeApi/stream/hipStreamWaitEvent.cpp @@ -33,7 +33,8 @@ class Streamer { Streamer(size_t numElements); ~Streamer(); void runAsync(); - void waitComplete(); + void queryUntilComplete(); + private: T *_A_h; @@ -66,11 +67,24 @@ void Streamer::runAsync() printf ("testing: %s numElements=%zu size=%6.2fMB\n", __func__, _numElements, _numElements * sizeof(T) / 1024.0/1024.0); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, _numElements); hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, _stream, _A_d, _B_d, _C_d, _numElements); + + // Test case where hipStreamWaitEvent waits on same event we just placed into the queue. HIPCHECK(hipEventRecord(_event, _stream)); HIPCHECK(hipStreamWaitEvent(_stream, _event, 0)); - } +template +void Streamer::queryUntilComplete() +{ + int numQueries = 0; + hipError_t e = hipSuccess; + do { + numQueries++; + e = hipStreamQuery(_stream); + } while (e != hipSuccess) ; + + printf ("completed after %d queries\n", numQueries); +}; @@ -113,11 +127,28 @@ int main(int argc, char *argv[]) streamers.push_back(s); } - for (int i=0; irunAsync(); + if (p_tests & 0x1) { + printf ("==> Test 0x1 runAsnc\n"); + for (int i=0; irunAsync(); + } + HIPCHECK(hipDeviceSynchronize()); + } + + if (p_tests & 0x2) { + printf ("==> Test 0x2 queryUntilComplete\n"); + for (int i=0; irunAsync(); + streamers[i]->queryUntilComplete(); + } + HIPCHECK(hipDeviceSynchronize()); + } + + if (p_tests & 0x4) { + hipStreamQuery(0/* try null stream*/); + } - HIPCHECK(hipDeviceSynchronize()); passed(); } From 172939e397591cc0a770493d3acd0530c0c91100 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 7 Sep 2016 15:15:45 -0500 Subject: [PATCH 19/33] Bump min required HCC version (need feature for hipStreamQuery) Change-Id: I3d51de0527b73a88948b0263a4ba6cb90d71a280 --- include/hcc_detail/hip_hcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 939d57c062..62989fa9e2 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -25,7 +25,7 @@ THE SOFTWARE. #include "hip/hcc_detail/unpinned_copy_engine.h" -#if defined(__HCC__) && (__hcc_workweek__ < 16186) +#if defined(__HCC__) && (__hcc_workweek__ < 16354) #error("This version of HIP requires a newer version of HCC."); #endif From 0693b1693f5ed0dbb8e6c6c59dc4be4fe7c4638c Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Wed, 7 Sep 2016 15:48:40 -0500 Subject: [PATCH 20/33] Adapt to _kernelSymbol Change-Id: Idebb7d7e895286ce5423afdcf391e00fa8b2b94f --- src/hip_module.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 594ddde2f9..7ef3ca9933 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -260,14 +260,14 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, } uint32_t groupSegmentSize; - hsa_status_t status = hsa_executable_symbol_get_info(f->kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, - &groupSegmentSize); + hsa_status_t status = hsa_executable_symbol_get_info(f->_kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &groupSegmentSize); uint32_t privateSegmentSize; - status = hsa_executable_symbol_get_info(f->kernel_symbol, - HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, - &privateSegmentSize); + status = hsa_executable_symbol_get_info(f->_kernelSymbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &privateSegmentSize); privateSegmentSize += sharedMemBytes; From 6db6dbaaaf7db9492cd6764bb83ff58496059298 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 8 Sep 2016 17:20:38 +0530 Subject: [PATCH 21/33] Directed tests: Refactor phase 1 - build HIP the right way Change-Id: I8fcd2bcb01b12878878f50777e2cf0095fae61a1 --- tests/src/CMakeLists.txt | 144 +++++++++++++++++++-------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index eea43659dc..73083a4973 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -2,74 +2,86 @@ cmake_minimum_required (VERSION 2.6) # remove CMAKE_CXX_COMPILER entry from cache since it will be pointing to hipcc unset(CMAKE_CXX_COMPILER CACHE) -message (CMAKE_CXX_COMPILER = ${CMAKE_CXX_COMPILER} ) +# remove HIP_PATH entry from cache since we might be running tests with a different configuration +unset(HIP_PATH CACHE) -project (HIP_Unit_Tests) +project(HIP_Unit_Tests) include(CTest) +set(HIPTEST_SOURCE_DIR ${PROJECT_SOURCE_DIR}) - -#include_directories( ${PROJECT_SOURCE_DIR}/include ) -set (HIPTEST_SOURCE_DIR ${PROJECT_SOURCE_DIR} ) - -# The version number. -set (HIP_Unit_Test_VERSION_MAJOR 1) -set (HIP_Unit_Test_VERSION_MINOR 0) - +# Enable multi-gpu tests if(NOT DEFINED HIP_MULTI_GPU) set(HIP_MULTI_GPU 0 CACHE BOOL "Run tests requiring more than one GPU") endif() -if(NOT DEFINED HIP_BUILD_LOCAL) - if(NOT DEFINED ENV{HIP_BUILD_LOCAL}) - set(HIP_BUILD_LOCAL 1 CACHE BOOL "Build HIP in local folder") +# Determine HIP_PATH +if(NOT DEFINED HIP_PATH) + if(NOT DEFINED ENV{HIP_PATH}) + # We are going to use HIP source... + get_filename_component(HIP_SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../.. ABSOLUTE) + execute_process( + COMMAND "${CMAKE_COMMAND}" -E remove_directory hip + OUTPUT_QUIET + ERROR_QUIET + ) + execute_process( + COMMAND "${CMAKE_COMMAND}" -E make_directory hip + OUTPUT_QUIET + ERROR_QUIET + ) + message(STATUS "Configuring HIP") + # ...so need to build HIP locally. + execute_process( + COMMAND "${CMAKE_COMMAND}" -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/hip/localbuild ${HIP_SRC_PATH} + WORKING_DIRECTORY hip + RESULT_VARIABLE hip_build_result + OUTPUT_QUIET + ERROR_QUIET + ) + if(hip_build_result) + message(FATAL_ERROR "Error configuring HIP") + else() + message(STATUS "Configuring HIP - done") + message(STATUS "Building HIP") + endif() + execute_process( + COMMAND "${CMAKE_COMMAND}" --build . --target install + WORKING_DIRECTORY hip + RESULT_VARIABLE hip_build_result + OUTPUT_QUIET + ERROR_QUIET + ) + if(hip_build_result) + message(FATAL_ERROR "Error building HIP") + else() + # Building HIP is successful. Point HIP_PATH to this location. + message(STATUS "Building HIP - done") + get_filename_component(HIP_PATH ${CMAKE_CURRENT_BINARY_DIR}/hip/localbuild ABSOLUTE) + endif() + # Add a target to rebuild HIP if HIP source changes. + add_custom_target( + hip ALL + COMMAND "${CMAKE_COMMAND}" --build . --target install + WORKING_DIRECTORY hip + ) else() - set(HIP_BUILD_LOCAL $ENV{HIP_BUILD_LOCAL} CACHE BOOL "Build HIP in local folder") + # We are using HIP_PATH from env. So just create a fake target. + set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to installed HIP") + add_custom_target(hip ALL) endif() +else() + # We are using HIP_PATH passed to cmake. So just create a fake target. + add_custom_target(hip ALL) endif() +MESSAGE("HIP_PATH=" ${HIP_PATH}) -set(HIP_PATH $ENV{HIP_PATH}) -if (NOT DEFINED HIP_PATH) - get_filename_component (HIP_PATH ../.. ABSOLUTE) -endif() - +# Determine HIP_PLATFORM execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM) -MESSAGE ("HIP_PATH=" ${HIP_PATH}) - -if (${HIP_PLATFORM} STREQUAL "hcc") - MESSAGE ("HIP_PLATFORM=hcc") - - set (HSA_PATH $ENV{HSA_PATH}) - if (NOT DEFINED HSA_PATH) - set (HSA_PATH /opt/rocm/hsa) - endif() - - set (CODEXL_PATH $ENV{CODEXL_PATH}) - if (NOT DEFINED CODEXL_PATH) - set (CODEXL_PATH /opt/AMD/CodeXL) - endif() - set (CODEXL_SDK_ATAL_PATH ${CODEXL_PATH}/SDK/AMDTActivityLogger) - - #--- - # Add HSA library: - add_library(hsa-runtime64 SHARED IMPORTED) - set_property(TARGET hsa-runtime64 PROPERTY IMPORTED_LOCATION "${HSA_PATH}/lib/libhsa-runtime64.so") - - #These includes are used for all files. - #Include HIP and HC since the tests need both of these: - include_directories(${HIP_PATH}/include) - - # This will create a subdir "hip_hcc" in the test build directory - # Any changes to hip_hcc source will be detected and force the library and then the tests to be rebuilt. - if (${HIP_BUILD_LOCAL}) - add_subdirectory(${HIP_PATH} build.hip_hcc) - #link_directories(${CMAKE_CURRENT_BINARY_DIR}/build.hip_hcc) # search the local hip_hcc for libhip_hcc.a - set (CMAKE_CXX_FLAGS --hipcc_explicit_lib) - endif() - - -elseif (${HIP_PLATFORM} STREQUAL "nvcc") - MESSAGE ("HIP_PLATFORM=nvcc") +if(${HIP_PLATFORM} STREQUAL "hcc") + MESSAGE("HIP_PLATFORM=hcc") +elseif(${HIP_PLATFORM} STREQUAL "nvcc") + MESSAGE("HIP_PLATFORM=nvcc") #Need C++11 for threads in some of the tests. add_definitions(-std=c++11) @@ -77,33 +89,21 @@ elseif (${HIP_PLATFORM} STREQUAL "nvcc") # NVCC does not not support -rdynamic option set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS ) set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS ) - else() - MESSAGE (FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM}) + MESSAGE(FATAL_ERROR "UNKNOWN HIP_PLATFORM=" ${HIP_PLATFORM}) endif() - -set (HIPCC ${HIP_PATH}/bin/hipcc) -set (CMAKE_CXX_COMPILER ${HIPCC} CACHE FILEPATH "CXX Compiler" FORCE) +set(HIPCC ${HIP_PATH}/bin/hipcc) +set(CMAKE_CXX_COMPILER ${HIPCC} CACHE FILEPATH "CXX Compiler" FORCE) add_library(test_common OBJECT test_common.cpp ) - # usage : build_hip_executable (exe_name CPP_FILES) macro (build_hip_executable exe cpp) - if (${HIP_PLATFORM} STREQUAL "hcc") - if (${HIP_BUILD_LOCAL}) - #target_link_libraries(${exe} hip_hcc) - add_executable (${exe} ${cpp} ${ARGN} $ $ ) - else() - add_executable (${exe} ${cpp} ${ARGN} $ ) - endif() - else() - add_executable (${exe} ${cpp} ${ARGN} $ ) - endif() + add_executable (${exe} ${cpp} ${ARGN} $ ) + add_dependencies(${exe} hip) endmacro() - # Make a hip executable, using libc++ macro (build_hip_executable_libcpp exe cpp) build_hip_executable( ${exe} ${cpp} ${ARGN} ) From 9e05375acf3a5a4859f70de377b87c78b8db8249 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 8 Sep 2016 22:37:24 +0530 Subject: [PATCH 22/33] Initial support for hipChooseDevice function Change-Id: Iedbf5f98c96673ab701dd7539d80a77b994d296f --- include/hcc_detail/hip_runtime_api.h | 8 ++++++ src/hip_device.cpp | 37 ++++++++++++++++++++++++++++ tests/src/hipChooseDevice.cpp | 17 +++++++++++++ 3 files changed, 62 insertions(+) create mode 100644 tests/src/hipChooseDevice.cpp diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index 1f4a1fb8a9..bcb2053a70 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -345,6 +345,14 @@ hipError_t hipSetDeviceFlags ( unsigned flags); * @} */ +/** + * @brief Select compute-device which best matches criteria. + * + * @param [out] device ID + * @param [in] device properties pointer + * + */ +hipError_t hipChooseDevice(int *device,hipDeviceProp_t* prop); /** *------------------------------------------------------------------------------------------------- diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 61221e64eb..5abafe6748 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -361,3 +361,40 @@ hipError_t hipDeviceTotalMem (size_t *bytes,hipDevice_t device) *bytes= device->_props.totalGlobalMem; return ihipLogStatus(e); } + +hipError_t hipChooseDevice( int* device, const hipDeviceProp_t* prop ) +{ + hipDeviceProp_t tempProp; + int deviceCount; + int inPropCount=0; + int matchedPropCount=0; + hipError_t e = hipSuccess; + hipGetDeviceCount( &deviceCount ); + *device = 0; + for (int i=0; i< deviceCount; i++) { + hipGetDeviceProperties( &tempProp, i ); + if(prop->major !=0) { + inPropCount++; + if(tempProp.major >= prop->major) { + matchedPropCount++; + } + if(prop->minor !=0) { + inPropCount++; + if(tempProp.minor >= prop->minor) { + matchedPropCount++; + } + } + } + + if(inPropCount == matchedPropCount) { + *device = i; + } +#if 0 + else{ + e= hipErrorInvalidValue; + } +#endif + } + return ihipLogStatus(e); +} + diff --git a/tests/src/hipChooseDevice.cpp b/tests/src/hipChooseDevice.cpp new file mode 100644 index 0000000000..b1cd73ee87 --- /dev/null +++ b/tests/src/hipChooseDevice.cpp @@ -0,0 +1,17 @@ +#include +#include +int main( void ) { + hipDeviceProp_t prop; + int dev; + + hipGetDevice( &dev ) ; + printf( "ID of current HIP device: %d\n", dev ); + + memset( &prop, 0, sizeof( hipDeviceProp_t ) ); + prop.major = 1; + prop.minor = 3; + hipChooseDevice( &dev, &prop ); + printf( "ID of hip device closest to revision 1.3: %d\n", dev ); + + hipSetDevice( dev ); +} From a52cb887c2db551496367da2009e0fef14dfa44e Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Thu, 8 Sep 2016 23:12:12 +0530 Subject: [PATCH 23/33] Removed return code related doxygen from hip_device src file Change-Id: Iffe51b69dd6be064b7898d06e537a505e27edb0b --- include/hcc_detail/hip_runtime_api.h | 11 +++++ src/hip_device.cpp | 64 ---------------------------- 2 files changed, 11 insertions(+), 64 deletions(-) diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index bcb2053a70..e447fb609c 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -278,6 +278,11 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int deviceI * @param [out] prop written with device properties * @param [in] deviceId which device to query for information * + * @return #hipSuccess, #hipErrorInvalidDevice + * @bug HCC always returns 0 for maxThreadsPerMultiProcessor + * @bug HCC always returns 0 for regsPerBlock + * @bug HCC always returns 0 for l2CacheSize + * * Populates hipGetDeviceProperties with information for the specified device. */ hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); @@ -289,6 +294,7 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* prop, int deviceId); /** * @brief Set L1/Shared cache partition. * + * @returns #hipSuccess * Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * */ @@ -298,6 +304,7 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig ); /** * @brief Set Cache configuration for a specific function * + * @returns #hipSuccess * Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * */ @@ -307,6 +314,7 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig ); /** * @brief Set Cache configuration for a specific function * + * @returns #hipSuccess * Note: AMD devices and recent Nvidia GPUS do not support reconfigurable cache. This hint is ignored on those architectures. * */ @@ -318,6 +326,7 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache config ); /** * @brief Get Shared memory bank configuration. * + * @returns #hipSuccess * Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. * */ @@ -327,6 +336,7 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ); /** * @brief Set Shared memory bank configuration. * + * @returns #hipSuccess * Note: AMD devices and recent Nvidia GPUS do not support shared cache banking, and the hint is ignored on those architectures. * */ @@ -335,6 +345,7 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ); /** * @brief Set Device flags * + * @returns #hipSuccess * Note: Only hipDeviceScheduleAuto and hipDeviceMapHost are supported * */ diff --git a/src/hip_device.cpp b/src/hip_device.cpp index 5abafe6748..fdfb25d7b0 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -24,10 +24,6 @@ THE SOFTWARE. //------------------------------------------------------------------------------------------------- //Devices //------------------------------------------------------------------------------------------------- -//--- -/** - * @return #hipSuccess, hipErrorInvalidDevice - */ // TODO - does this initialize HIP runtime? hipError_t hipGetDevice(int *deviceId) { @@ -47,11 +43,6 @@ hipError_t hipGetDevice(int *deviceId) return ihipLogStatus(e); } - -//--- -/** - * @return #hipSuccess, #hipErrorNoDevice - */ // TODO - does this initialize HIP runtime? hipError_t hipGetDeviceCount(int *count) { @@ -66,11 +57,6 @@ hipError_t hipGetDeviceCount(int *count) } } - -//--- -/** - * @returns #hipSuccess - */ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig ) { HIP_INIT_API(cacheConfig); @@ -80,11 +66,6 @@ hipError_t hipDeviceSetCacheConfig ( hipFuncCache cacheConfig ) return ihipLogStatus(hipSuccess); } - -//--- -/** - * @returns #hipSuccess - */ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig ) { HIP_INIT_API(cacheConfig); @@ -94,11 +75,6 @@ hipError_t hipDeviceGetCacheConfig ( hipFuncCache *cacheConfig ) return ihipLogStatus(hipSuccess); } - -//--- -/** - * @returns #hipSuccess - */ hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig ) { HIP_INIT_API(cacheConfig); @@ -108,12 +84,6 @@ hipError_t hipFuncSetCacheConfig ( hipFuncCache cacheConfig ) return ihipLogStatus(hipSuccess); } - - -//--- -/** - * @returns #hipSuccess - */ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { HIP_INIT_API(config); @@ -123,12 +93,6 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) return ihipLogStatus(hipSuccess); } - - -//--- -/** - * @returns #hipSuccess - */ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) { HIP_INIT_API(pConfig); @@ -138,10 +102,6 @@ hipError_t hipDeviceGetSharedMemConfig ( hipSharedMemConfig * pConfig ) return ihipLogStatus(hipSuccess); } -//--- -/** - * @return #hipSuccess, #hipErrorInvalidDevice - */ hipError_t hipSetDevice(int deviceId) { HIP_INIT_API(deviceId); @@ -153,22 +113,12 @@ hipError_t hipSetDevice(int deviceId) } } - -//--- -/** - * @return #hipSuccess - */ hipError_t hipDeviceSynchronize(void) { HIP_INIT_API(); return ihipLogStatus(ihipSynchronize()); } - -//--- -/** - * @return @ref hipSuccess - */ hipError_t hipDeviceReset(void) { HIP_INIT_API(); @@ -188,9 +138,6 @@ hipError_t hipDeviceReset(void) return ihipLogStatus(hipSuccess); } -/** - * - */ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) { HIP_INIT_API(pi, attr, device); @@ -260,13 +207,6 @@ hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device) return ihipLogStatus(e); } - -/** - * @return #hipSuccess, #hipErrorInvalidDevice - * @bug HCC always returns 0 for maxThreadsPerMultiProcessor - * @bug HCC always returns 0 for regsPerBlock - * @bug HCC always returns 0 for l2CacheSize - */ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) { HIP_INIT_API(props, device); @@ -285,7 +225,6 @@ hipError_t hipGetDeviceProperties(hipDeviceProp_t* props, int device) return ihipLogStatus(e); } - hipError_t hipSetDeviceFlags( unsigned int flags) { HIP_INIT_API(flags); @@ -306,9 +245,6 @@ hipError_t hipSetDeviceFlags( unsigned int flags) return ihipLogStatus(e); }; - - - hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId) { HIP_INIT_API(device, deviceId); From f03570d8cc981257992aaea2c378579f378301d9 Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Thu, 8 Sep 2016 14:52:51 -0500 Subject: [PATCH 24/33] Added signal management which passes stress tests Change-Id: I7e1660a8ca2c5ee580a91f76eae9a58ca49f0457 --- include/hcc_detail/hip_hcc.h | 6 ++++++ src/hip_hcc.cpp | 26 +++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/include/hcc_detail/hip_hcc.h b/include/hcc_detail/hip_hcc.h index 62989fa9e2..94df169e07 100644 --- a/include/hcc_detail/hip_hcc.h +++ b/include/hcc_detail/hip_hcc.h @@ -431,6 +431,9 @@ class ihipStreamCriticalBase_t : public LockedBase SIGSEQNUM _streamSigId; // Monotonically increasing unique signal id. hc::accelerator_view _av; + + std::vector _cfs; + }; @@ -468,6 +471,9 @@ typedef uint64_t SeqNum_t ; void locked_waitEvent(hipEvent_t event); void locked_recordEvent(hipEvent_t event); + void addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future* cf); + void waitOnAllCFs(LockedAccessor_StreamCrit_t &crit); + //--- // Use this if we already have the stream critical data mutex: diff --git a/src/hip_hcc.cpp b/src/hip_hcc.cpp index 9d9b667f8b..e2fb952a5b 100644 --- a/src/hip_hcc.cpp +++ b/src/hip_hcc.cpp @@ -250,7 +250,8 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty { if (! assertQueueEmpty) { tprintf (DB_SYNC, "stream %p wait for queue-empty..\n", this); - crit->_av.wait(); +// crit->_av.wait(); + waitOnAllCFs(crit); } if (crit->_last_copy_signal) { @@ -266,6 +267,21 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty // crit->_signalCnt = 0; } +void ihipStream_t::addCFtoStream(LockedAccessor_StreamCrit_t &crit, hc::completion_future *cf) +{ + crit->_cfs.push_back(cf); +} + +void ihipStream_t::waitOnAllCFs(LockedAccessor_StreamCrit_t &crit) +{ + for(uint32_t i=0;i_cfs.size();i++){ + if(crit->_cfs[i] != NULL){ + crit->_cfs[i]->wait(); + delete crit->_cfs[i]; + } + } + crit->_cfs.clear(); +} //--- //Wait for all kernel and data copy commands in this stream to complete. @@ -1439,7 +1455,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, dim3 block, grid_ auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; - + stream->addCFtoStream(crit, lp->cf); ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); @@ -1462,7 +1478,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, dim3 block, gri auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; - + stream->addCFtoStream(crit, lp->cf); ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } @@ -1484,6 +1500,7 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, dim3 grid, size_t block, gri auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; + stream->addCFtoStream(crit, lp->cf); ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } @@ -1505,6 +1522,9 @@ hipStream_t ihipPreLaunchKernel(hipStream_t stream, size_t grid, size_t block, g auto crit = stream->lockopen_preKernelCommand(); lp->av = &(crit->_av); lp->cf = new hc::completion_future; // TODO, is this necessary? + + stream->addCFtoStream(crit, lp->cf); + ihipPrintKernelLaunch(kernelNameStr, lp, stream); return (stream); } From a3f892e69a4b3346de3c15e0fd704c851233b93d Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 9 Sep 2016 11:01:10 +0530 Subject: [PATCH 25/33] directed tests: Show HIP build errors during configure stage Change-Id: I068b668902cf54286ce5bb4c6c718b643bfa5754 --- tests/src/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 73083a4973..0a2cd1321b 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -48,10 +48,11 @@ if(NOT DEFINED HIP_PATH) COMMAND "${CMAKE_COMMAND}" --build . --target install WORKING_DIRECTORY hip RESULT_VARIABLE hip_build_result - OUTPUT_QUIET + OUTPUT_VARIABLE hip_build_log ERROR_QUIET ) if(hip_build_result) + message(${hip_build_log}) message(FATAL_ERROR "Error building HIP") else() # Building HIP is successful. Point HIP_PATH to this location. From 59b6ffbe70adf57068697df93cbf3f76f87cd8ca Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 9 Sep 2016 12:01:41 +0530 Subject: [PATCH 26/33] Directed tests: hipDynamicShared now compiles on NVCC NVCC does not support template in extern __shared__. Compilation is fixed but test still does not run on NVCC. Change-Id: I427c9170812401460d60ef8e3246525eeda38514 --- tests/src/CMakeLists.txt | 2 +- tests/src/hipDynamicShared.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/src/CMakeLists.txt b/tests/src/CMakeLists.txt index 0a2cd1321b..43a74910b2 100644 --- a/tests/src/CMakeLists.txt +++ b/tests/src/CMakeLists.txt @@ -214,11 +214,11 @@ endif() if (${HIP_PLATFORM} STREQUAL "hcc") make_test(hipArray " ") make_test(hipFuncSetDevice " ") + make_test(hipDynamicShared " ") endif() make_hipify_test(specialFunc.cu ) -make_test(hipDynamicShared " ") # Add subdirs here: add_subdirectory(context) diff --git a/tests/src/hipDynamicShared.cpp b/tests/src/hipDynamicShared.cpp index 329529281c..5686f9ee88 100644 --- a/tests/src/hipDynamicShared.cpp +++ b/tests/src/hipDynamicShared.cpp @@ -27,7 +27,12 @@ template __global__ void testExternSharedKernel(hipLaunchParm lp, const T* A_d, const T* B_d, T* C_d, size_t numElements, size_t groupElements) { // declare dynamic shared memory +#if defined(__HIP_PLATFORM_HCC__) HIP_DYNAMIC_SHARED(T, sdata) +#else + HIP_DYNAMIC_SHARED(__align__(sizeof(T)) unsigned char, my_sdata) + T *sdata = reinterpret_cast(my_sdata); +#endif size_t gid = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); size_t tid = hipThreadIdx_x; From 8efae1e488bab7502154bfbecb469fb0cd4f0624 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 9 Sep 2016 12:04:22 +0530 Subject: [PATCH 27/33] Directed tests: Fix hipCtx_simple on NVCC The test was using hipDeviceGetFromId instead of hipDeviceGet Change-Id: Ia035ded5212f2659d5c8f01e9f6fcec514fe7ccb --- tests/src/context/hipCtx_simple.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/src/context/hipCtx_simple.cpp b/tests/src/context/hipCtx_simple.cpp index 882cf44f6d..a5e08a4551 100644 --- a/tests/src/context/hipCtx_simple.cpp +++ b/tests/src/context/hipCtx_simple.cpp @@ -34,7 +34,7 @@ int main(int argc, char *argv[]) hipCtx_t ctx; hipCtx_t ctx1; - HIPCHECK(hipDeviceGetFromId(&device, 0)); + HIPCHECK(hipDeviceGet(&device, 0)); HIPCHECK(hipCtxCreate(&ctx, 0, device)); HIPCHECK(hipCtxGetCurrent(&ctx1)); HIPCHECK(hipCtxGetDevice(&device1)); From 51f25f92716c55fa9af3ac5a89f6948c41630c94 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 9 Sep 2016 12:06:19 +0530 Subject: [PATCH 28/33] Remove redundant API hipDeviceGetFromId from HCC path Change-Id: Id6b4f1374b12cb3949d725f0859425cd8de6d868 --- include/hcc_detail/hip_runtime_api.h | 7 ------- src/hip_device.cpp | 16 ---------------- 2 files changed, 23 deletions(-) diff --git a/include/hcc_detail/hip_runtime_api.h b/include/hcc_detail/hip_runtime_api.h index e447fb609c..71e46612de 100644 --- a/include/hcc_detail/hip_runtime_api.h +++ b/include/hcc_detail/hip_runtime_api.h @@ -1260,13 +1260,6 @@ hipError_t hipCtxDisablePeerAccess (hipCtx_t peerCtx); * @} */ - -// TODO-ctx -/** - * @return hipSuccess, hipErrorInvalidDevice - */ -hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId); - /** * @brief Returns a handle to a compute device * @param [out] device diff --git a/src/hip_device.cpp b/src/hip_device.cpp index fdfb25d7b0..fe7ad6ecd7 100644 --- a/src/hip_device.cpp +++ b/src/hip_device.cpp @@ -245,22 +245,6 @@ hipError_t hipSetDeviceFlags( unsigned int flags) return ihipLogStatus(e); }; -hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId) -{ - HIP_INIT_API(device, deviceId); - - hipError_t e = hipSuccess; - - *device = ihipGetDevice(deviceId); - - if (device == nullptr) { - e = hipErrorInvalidDevice; - } - - - return ihipLogStatus(e); -} - hipError_t hipDeviceComputeCapability(int *major, int *minor, hipDevice_t device) { HIP_INIT_API(major,minor, device); From a07d51d5b9a57151ff72cb5fd9ddd8275ae2d836 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Fri, 9 Sep 2016 12:07:15 +0530 Subject: [PATCH 29/33] Directed tests: Guard unsupported host math functions on NVCC Change-Id: I1145c0e45a913167c82ddc593d8a9027f237c7ba --- tests/src/deviceLib/hipDoublePrecisionMathHost.cpp | 6 ++++++ tests/src/deviceLib/hipSinglePrecisionMathHost.cpp | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp index 94fe912e08..00815768ea 100644 --- a/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp +++ b/tests/src/deviceLib/hipDoublePrecisionMathHost.cpp @@ -86,19 +86,25 @@ __host__ void double_precision_math_functions() nearbyint(0.0); //nextafter(0.0); //fX = 1.0; norm(1, &fX); +#if defined(__HIP_PLATFORM_HCC__) norm3d(1.0, 0.0, 0.0); norm4d(1.0, 0.0, 0.0, 0.0); +#endif normcdf(0.0); normcdfinv(1.0); pow(1.0, 0.0); rcbrt(1.0); remainder(2.0, 1.0); remquo(1.0, 2.0, &iX); +#if defined(__HIP_PLATFORM_HCC__) rhypot(0.0, 1.0); +#endif rint(1.0); +#if defined(__HIP_PLATFORM_HCC__) fX = 1.0; rnorm(1, &fX); rnorm3d(0.0, 0.0, 1.0); rnorm4d(0.0, 0.0, 0.0, 1.0); +#endif round(0.0); rsqrt(1.0); scalbln(0.0, 1); diff --git a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp index 8a95bcaad2..51f09a9c0b 100644 --- a/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp +++ b/tests/src/deviceLib/hipSinglePrecisionMathHost.cpp @@ -56,7 +56,9 @@ __host__ void single_precision_math_functions() expm1f(0.0f); fabsf(1.0f); fdimf(1.0f, 0.0f); +#if defined(__HIP_PLATFORM_HCC__) fdividef(0.0f, 1.0f); +#endif floorf(0.0f); fmaf(1.0f, 2.0f, 3.0f); fmaxf(0.0f, 0.0f); @@ -86,8 +88,10 @@ __host__ void single_precision_math_functions() nanf("1"); nearbyintf(0.0f); //nextafterf(0.0f); +#if defined(__HIP_PLATFORM_HCC__) norm3df(1.0f, 0.0f, 0.0f); norm4df(1.0f, 0.0f, 0.0f, 0.0f); +#endif normcdff(0.0f); normcdfinvf(1.0f); //fX = 1.0f; normf(1, &fX); @@ -95,11 +99,15 @@ __host__ void single_precision_math_functions() rcbrtf(1.0f); remainderf(2.0f, 1.0f); remquof(1.0f, 2.0f, &iX); +#if defined(__HIP_PLATFORM_HCC__) rhypotf(0.0f, 1.0f); +#endif rintf(1.0f); +#if defined(__HIP_PLATFORM_HCC__) rnorm3df(0.0f, 0.0f, 1.0f); rnorm4df(0.0f, 0.0f, 0.0f, 1.0f); fX = 1.0f; rnormf(1, &fX); +#endif roundf(0.0f); rsqrtf(1.0f); scalblnf(0.0f, 1); From 3cbd5d88fe976dde21ede7a5f8b45b99c7a96759 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 9 Sep 2016 11:10:19 +0300 Subject: [PATCH 30/33] [HIPIFY] CUDA Driver API porting to HIP : Conversion type Driver and driver functions are added. --- clang-hipify/src/Cuda2Hip.cpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index e9d51b81a8..8be6b6ab82 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -56,7 +56,8 @@ using namespace llvm; #define DEBUG_TYPE "cuda2hip" enum ConvTypes { - CONV_DEV = 0, + CONV_DRIVER = 0, + CONV_DEV, CONV_MEM, CONV_KERN, CONV_COORD_FUNC, @@ -79,11 +80,10 @@ enum ConvTypes { }; const char *counterNames[CONV_LAST] = { - "dev", "mem", "kern", "coord_func", "math_func", - "special_func", "stream", "event", "ctx", "cache", - "err", "def", "tex", "other", "include", - "include_cuda_main_header", "type", "literal", "numeric_literal"}; - + "driver", "dev", "mem", "kern", "coord_func", "math_func", + "special_func", "stream", "event", "ctx", "cache", "err", + "def", "tex", "other", "include", "include_cuda_main_header", + "type", "literal", "numeric_literal"}; enum ApiTypes { API_DRIVER = 0, API_RUNTIME, @@ -92,7 +92,7 @@ enum ApiTypes { }; const char *apiNames[API_LAST] = { - "CUDA API", "CUDA RT API", "CUDA BLAS API"}; + "CUDA", "CUDA RT", "CUBLAS"}; namespace { @@ -352,6 +352,9 @@ struct cuda2hipMap { cuda2hipRename["cuDeviceComputeCapability"] = {"hipDeviceComputeCapability", CONV_DEV, API_DRIVER}; cuda2hipRename["cuDeviceCanAccessPeer"] = {"hipDeviceCanAccessPeer", CONV_DEV, API_DRIVER}; + // Driver + cuda2hipRename["cuDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_DRIVER}; + /////////////////////////////// CUDA RT API /////////////////////////////// // Error API cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR, API_RUNTIME}; @@ -467,6 +470,7 @@ struct cuda2hipMap { cuda2hipRename["cudaThreadExit"] = {"hipDeviceReset", CONV_DEV, API_RUNTIME}; cuda2hipRename["cudaSetDevice"] = {"hipSetDevice", CONV_DEV, API_RUNTIME}; cuda2hipRename["cudaGetDevice"] = {"hipGetDevice", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME}; // Attributes cuda2hipRename["cudaDeviceAttr"] = {"hipDeviceAttribute_t", CONV_TYPE, API_RUNTIME}; @@ -538,8 +542,7 @@ struct cuda2hipMap { cuda2hipRename["cudaFuncSetCacheConfig"] = {"hipFuncSetCacheConfig", CONV_CACHE, API_RUNTIME}; // Driver/Runtime - cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DEV, API_RUNTIME}; - cuda2hipRename["cudaGetDeviceCount"] = {"hipGetDeviceCount", CONV_DEV, API_RUNTIME}; + cuda2hipRename["cudaDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_RUNTIME}; // unsupported yet //cuda2hipRename["cudaRuntimeGetVersion"] = {"hipRuntimeGetVersion", CONV_DEV, API_RUNTIME}; From 84f8d97ba5729a8b9e78d6765f2da7c8dc34748d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 9 Sep 2016 11:15:11 +0300 Subject: [PATCH 31/33] [HIPIFY] CUDA Driver API porting to HIP : Event functions support. --- clang-hipify/src/Cuda2Hip.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index 8be6b6ab82..76937d81f7 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -355,6 +355,14 @@ struct cuda2hipMap { // Driver cuda2hipRename["cuDriverGetVersion"] = {"hipDriverGetVersion", CONV_DRIVER, API_DRIVER}; + // Events + cuda2hipRename["cuEventCreate"] = {"hipEventCreate", CONV_EVENT, API_DRIVER}; + cuda2hipRename["cuEventDestroy_v2"] = {"hipEventDestroy", CONV_EVENT, API_DRIVER}; + cuda2hipRename["cuEventElapsedTime"] = {"hipEventElapsedTime", CONV_EVENT, API_DRIVER}; + cuda2hipRename["cuEventQuery"] = {"hipEventQuery", CONV_EVENT, API_DRIVER}; + cuda2hipRename["cuEventRecord"] = {"hipEventRecord", CONV_EVENT, API_DRIVER}; + cuda2hipRename["cuEventSynchronize"] = {"hipEventSynchronize", CONV_EVENT, API_DRIVER}; + /////////////////////////////// CUDA RT API /////////////////////////////// // Error API cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR, API_RUNTIME}; From bc28c37654050def817f589cc6bb4cb35be28d78 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 9 Sep 2016 11:31:50 +0300 Subject: [PATCH 32/33] [HIPIFY] CUDA Driver API porting to HIP : Module functions support. --- clang-hipify/src/Cuda2Hip.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index 76937d81f7..4b69204afd 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -66,6 +66,7 @@ enum ConvTypes { CONV_STREAM, CONV_EVENT, CONV_CONTEXT, + CONV_MODULE, CONV_CACHE, CONV_ERR, CONV_DEF, @@ -80,10 +81,10 @@ enum ConvTypes { }; const char *counterNames[CONV_LAST] = { - "driver", "dev", "mem", "kern", "coord_func", "math_func", - "special_func", "stream", "event", "ctx", "cache", "err", - "def", "tex", "other", "include", "include_cuda_main_header", - "type", "literal", "numeric_literal"}; + "driver", "dev", "mem", "kern", "coord_func", "math_func", + "special_func", "stream", "event", "ctx", "module", "cache", + "err", "def", "tex", "other", "include", "include_cuda_main_header", + "type", "literal", "numeric_literal"}; enum ApiTypes { API_DRIVER = 0, API_RUNTIME, @@ -363,6 +364,17 @@ struct cuda2hipMap { cuda2hipRename["cuEventRecord"] = {"hipEventRecord", CONV_EVENT, API_DRIVER}; cuda2hipRename["cuEventSynchronize"] = {"hipEventSynchronize", CONV_EVENT, API_DRIVER}; + // Module + cuda2hipRename["cuModuleGetFunction"] = {"hipModuleGetFunction", CONV_MODULE, API_DRIVER}; + cuda2hipRename["cuModuleGetGlobal_v2"] = {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}; + cuda2hipRename["cuModuleLoad"] = {"hipModuleLoad", CONV_MODULE, API_DRIVER}; + cuda2hipRename["cuModuleLoadData"] = {"hipModuleLoadData", CONV_MODULE, API_DRIVER}; + // unsupported yet by HIP + // cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}; + // cuda2hipRename["cuModuleLoadFatBinary"] = {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER}; + cuda2hipRename["cuModuleUnload"] = {"hipModuleUnload", CONV_MODULE, API_DRIVER}; + cuda2hipRename["cuLaunchKernel"] = {"hipModuleLaunchKernel", CONV_MODULE, API_DRIVER}; + /////////////////////////////// CUDA RT API /////////////////////////////// // Error API cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR, API_RUNTIME}; From 9145df8b6300e9315f8eb58196c7ee3e2110201f Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 9 Sep 2016 11:46:35 +0300 Subject: [PATCH 33/33] [HIPIFY] CUDA Driver API porting to HIP : Stream functions support. --- clang-hipify/src/Cuda2Hip.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/clang-hipify/src/Cuda2Hip.cpp b/clang-hipify/src/Cuda2Hip.cpp index 4b69204afd..77d8b0259e 100644 --- a/clang-hipify/src/Cuda2Hip.cpp +++ b/clang-hipify/src/Cuda2Hip.cpp @@ -318,6 +318,9 @@ struct cuda2hipMap { cuda2hipRename["CU_EVENT_INTERPROCESS"] = {"hipEventInterprocess", CONV_EVENT, API_DRIVER}; cuda2hipRename["CUstream"] = {"hipStream_t", CONV_TYPE, API_DRIVER}; + // Stream Flags + cuda2hipRename["CU_STREAM_DEFAULT"] = {"hipStreamDefault", CONV_STREAM, API_DRIVER}; + cuda2hipRename["CU_STREAM_NON_BLOCKING"] = {"hipStreamNonBlocking", CONV_STREAM, API_DRIVER}; // Context cuda2hipRename["cuCtxCreate_v2"] = {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}; @@ -375,6 +378,15 @@ struct cuda2hipMap { cuda2hipRename["cuModuleUnload"] = {"hipModuleUnload", CONV_MODULE, API_DRIVER}; cuda2hipRename["cuLaunchKernel"] = {"hipModuleLaunchKernel", CONV_MODULE, API_DRIVER}; + // Streams + // unsupported yet by HIP + // cuda2hipRename["cuStreamAddCallback"] = {"hipStreamAddCallback", CONV_STREAM, API_DRIVER}; + cuda2hipRename["cuStreamCreate"] = {"hipStreamCreate", CONV_STREAM, API_DRIVER}; + cuda2hipRename["cuStreamDestroy_v2"] = {"hipStreamDestroy", CONV_STREAM, API_DRIVER}; + cuda2hipRename["cuStreamQuery"] = {"hipStreamQuery", CONV_STREAM, API_DRIVER}; + cuda2hipRename["cuStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM, API_DRIVER}; + cuda2hipRename["cuStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER}; + /////////////////////////////// CUDA RT API /////////////////////////////// // Error API cuda2hipRename["cudaGetLastError"] = {"hipGetLastError", CONV_ERR, API_RUNTIME}; @@ -476,8 +488,8 @@ struct cuda2hipMap { cuda2hipRename["cudaStreamDestroy"] = {"hipStreamDestroy", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamWaitEvent"] = {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamSynchronize"] = {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME}; - // Stream Flags cuda2hipRename["cudaStreamGetFlags"] = {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME}; + // Stream Flags cuda2hipRename["cudaStreamDefault"] = {"hipStreamDefault", CONV_STREAM, API_RUNTIME}; cuda2hipRename["cudaStreamNonBlocking"] = {"hipStreamNonBlocking", CONV_STREAM, API_RUNTIME};