-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
zpotrf_dtd working for the GPU, although results still suspicious
code refactoring, removed useless includes, changed redundancy in lapack to cublas converstions reverted previous refactoring, added gpu support for gemm_dtd added dtd sources and changed to cusolverDnXpotrf refactoring, cusolver workspaces are allocated per streams fixed typo and changed comment changed to devices
- Loading branch information
Showing
16 changed files
with
1,102 additions
and
550 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
/* | ||
* Copyright (c) 2023- The University of Tennessee and The University | ||
* of Tennessee Research Foundation. All rights | ||
* reserved. | ||
* $COPYRIGHT | ||
* | ||
*/ | ||
#include <cublas_v2.h> | ||
#include <cusolverDn.h> | ||
#include "potrf_cublas_utils.h" | ||
#include "parsec/utils/zone_malloc.h" | ||
#include "dplasmaaux_cuda.h" | ||
|
||
/* | ||
* Global info ID's for cublas handles and workspaces | ||
* Should be initialized in the tests | ||
* with the return of parsec_info_register | ||
* or parsec_info_lookup | ||
*/ | ||
parsec_info_id_t CuHI = -1; | ||
parsec_info_id_t WoSI = -1; | ||
|
||
/* Unfortunately, CUBLAS does not provide a error to string function */ | ||
static char *dplasma_cublas_error_to_string(cublasStatus_t cublas_status) | ||
{ | ||
switch(cublas_status) | ||
{ | ||
case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; | ||
case CUBLAS_STATUS_NOT_INITIALIZED: return "CUBLAS_STATUS_NOT_INITIALIZED"; | ||
case CUBLAS_STATUS_ALLOC_FAILED: return "CUBLAS_STATUS_ALLOC_FAILED"; | ||
case CUBLAS_STATUS_INVALID_VALUE: return "CUBLAS_STATUS_INVALID_VALUE"; | ||
case CUBLAS_STATUS_ARCH_MISMATCH: return "CUBLAS_STATUS_ARCH_MISMATCH"; | ||
case CUBLAS_STATUS_MAPPING_ERROR: return "CUBLAS_STATUS_MAPPING_ERROR"; | ||
case CUBLAS_STATUS_EXECUTION_FAILED: return "CUBLAS_STATUS_EXECUTION_FAILED"; | ||
case CUBLAS_STATUS_INTERNAL_ERROR: return "CUBLAS_STATUS_INTERNAL_ERROR"; | ||
default: return "unknown CUBLAS error"; | ||
} | ||
} | ||
|
||
/* Unfortunately, cuSolver does not provide a error to string function */ | ||
char *dplasma_cusolver_error_to_string(cusolverStatus_t cusolver_status) | ||
{ | ||
switch(cusolver_status) { | ||
case CUSOLVER_STATUS_SUCCESS: return "CUSOLVER_STATUS_SUCCESS"; | ||
case CUSOLVER_STATUS_NOT_INITIALIZED: return "CUSOLVER_STATUS_NOT_INITIALIZED"; | ||
case CUSOLVER_STATUS_ALLOC_FAILED: return "CUSOLVER_STATUS_ALLOC_FAILED"; | ||
case CUSOLVER_STATUS_INVALID_VALUE: return "CUSOLVER_STATUS_INVALID_VALUE"; | ||
case CUSOLVER_STATUS_ARCH_MISMATCH: return "CUSOLVER_STATUS_ARCH_MISMATCH"; | ||
case CUSOLVER_STATUS_EXECUTION_FAILED: return "CUSOLVER_STATUS_EXECUTION_FAILED"; | ||
case CUSOLVER_STATUS_INTERNAL_ERROR: return "CUSOLVER_STATUS_INTERNAL_ERROR"; | ||
case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; | ||
default: return "unknown cusolver error"; | ||
} | ||
} | ||
|
||
void *dplasma_create_cuda_handles(void *obj, void *_n) | ||
{ | ||
parsec_cuda_exec_stream_t *cuda_stream = (parsec_cuda_exec_stream_t *)obj; | ||
dplasma_cuda_handles_t *new; | ||
cublasHandle_t cublas_handle; | ||
cublasStatus_t cublas_status; | ||
|
||
(void)_n; | ||
|
||
/* No need to call cudaSetDevice, as this has been done by PaRSEC before calling the task body */ | ||
cublas_status = cublasCreate(&cublas_handle); | ||
if(CUBLAS_STATUS_SUCCESS != cublas_status) { | ||
if( CUBLAS_STATUS_ALLOC_FAILED == cublas_status ) { | ||
parsec_show_help("help-dplasma.txt", "cu*_alloc_failed", 1, "CUBLAS"); | ||
} | ||
parsec_fatal("Unable to create CUBLAS Handle: %s", | ||
dplasma_cublas_error_to_string(cublas_status)); | ||
return NULL; | ||
} | ||
cublas_status = cublasSetStream(cublas_handle, cuda_stream->cuda_stream); | ||
assert(CUBLAS_STATUS_SUCCESS == cublas_status); | ||
|
||
cusolverDnHandle_t cusolver_handle; | ||
cusolverStatus_t cusolver_status; | ||
cusolver_status = cusolverDnCreate(&cusolver_handle); | ||
if(CUSOLVER_STATUS_SUCCESS != cusolver_status) { | ||
cublasDestroy(cublas_handle); | ||
if( CUSOLVER_STATUS_ALLOC_FAILED == cusolver_status ) { | ||
parsec_show_help("help-dplasma.txt", "cu*_alloc_failed", 1, "cusolver"); | ||
} | ||
parsec_fatal("Unable to create a cuSolver handle: %s", | ||
dplasma_cusolver_error_to_string(cusolver_status)); | ||
return NULL; | ||
} | ||
cusolver_status = cusolverDnSetStream(cusolver_handle, cuda_stream->cuda_stream); | ||
assert(CUSOLVER_STATUS_SUCCESS == cusolver_status); | ||
|
||
new = malloc(sizeof(dplasma_cuda_handles_t)); | ||
new->cublas_handle = cublas_handle; | ||
new->cusolverDn_handle = cusolver_handle; | ||
|
||
return new; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
/* | ||
* Copyright (c) 2023- The University of Tennessee and The University | ||
* of Tennessee Research Foundation. All rights | ||
* reserved. | ||
* $COPYRIGHT | ||
* | ||
*/ | ||
|
||
#ifndef _DPLASMAAAUX_CUDA_H_ | ||
#define _DPLASMAAAUX_CUDA_H_ | ||
|
||
#include "parsec/mca/device/cuda/device_cuda.h" | ||
|
||
extern parsec_info_id_t CuHI; | ||
extern parsec_info_id_t WoSI; | ||
|
||
typedef struct { | ||
cublasHandle_t cublas_handle; | ||
void * cusolverDn_handle; | ||
} dplasma_cuda_handles_t; | ||
|
||
void *dplasma_create_cuda_handles(void *obj, void *user); | ||
|
||
#define DPLASMA_CUBLAS_CHECK_STATUS( STR, STATUS, CODE ) \ | ||
do { \ | ||
cublasStatus_t __cublas_status = (cublasStatus_t) (STATUS); \ | ||
if( CUBLAS_STATUS_SUCCESS != __cublas_status ) { \ | ||
parsec_warning( "%s:%d %s%s", __FILE__, __LINE__, \ | ||
(STR), cublasGetStatusString(__cublas_status) ); \ | ||
CODE; \ | ||
} \ | ||
} while(0) | ||
|
||
#define DPLASMA_CUSOLVER_CHECK_STATUS( STR, STATUS, CODE ) \ | ||
do { \ | ||
cusolverStatus_t __cusolver_status = (cusolverStatus_t) (STATUS); \ | ||
if( CUSOLVER_STATUS_SUCCESS != __cusolver_status ) { \ | ||
parsec_warning( "%s:%d %s%s", __FILE__, __LINE__, \ | ||
(STR), dplasma_cusolver_error_to_string(__cusolver_status) ); \ | ||
CODE; \ | ||
} \ | ||
} while(0) | ||
|
||
#endif /* __DPLAMAAUX_CUDA_H__ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
include(PrecisionGenerator) | ||
|
||
set(DTD_HEADERS | ||
dplasma_z_dtd.h | ||
) | ||
set(DTD_SOURCES | ||
zgemm_dtd.c | ||
ztrsm_dtd.c | ||
zherk_dtd.c | ||
zpotrf_dtd.c | ||
) | ||
|
||
set(generated_headers "") | ||
precisions_rules_py(generated_dtd_headers | ||
"${DTD_HEADERS}" | ||
PRECISIONS "${DPLASMA_PRECISIONS}") | ||
|
||
set(generated_files "") | ||
precisions_rules_py(generated_dtd_cores | ||
"${DTD_SOURCES}" | ||
PRECISIONS "${DPLASMA_PRECISIONS}") | ||
|
||
add_custom_target(dplasma_dtd_includes ALL SOURCES | ||
${generated_dtd_headers} ) | ||
|
||
### Publish the documented files | ||
#add_documented_files(PROJECT DPLASMA DIR ${CMAKE_CURRENT_BINARY_DIR} FILES ${generated_dtd_files} ${generated_dtd_headers}) | ||
|
||
### Generate the dplasma_dtd object library | ||
# We cannot do a simple target_sources because of a bug in CMake <3.18 where the | ||
# GENERATED property has a directory visibility | ||
if(NOT TARGET dplasma_dtd) | ||
add_library(dplasma_dtd OBJECT ${generated_dtd_cores}) | ||
endif(NOT TARGET dplasma_dtd) | ||
|
||
add_dependencies(dplasma_dtd dplasma_includes dplasma_cores_includes dplasma_dtd_includes) | ||
|
||
set_target_properties(dplasma_dtd PROPERTIES ENABLE_EXPORTS True) | ||
set_target_properties(dplasma_dtd PROPERTIES POSITION_INDEPENDENT_CODE ${BUILD_SHARED_LIBS}) | ||
target_include_directories(dplasma_dtd | ||
PRIVATE | ||
${CMAKE_CURRENT_BINARY_DIR} | ||
$<$<NOT:${DPLASMA_BUILD_INPLACE}>:${CMAKE_CURRENT_SOURCE_DIR}>) | ||
target_link_libraries(dplasma_dtd | ||
PUBLIC | ||
dplasma_cores | ||
PaRSEC::parsec | ||
LAPACKE::LAPACKE | ||
CUDA::cublas | ||
) | ||
|
||
# Integrate the output into the main library | ||
target_sources(dplasma PRIVATE $<TARGET_OBJECTS:dplasma_dtd>) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/* | ||
* Copyright (c) 2023- The University of Tennessee and The University | ||
* of Tennessee Research Foundation. All rights | ||
* reserved. | ||
* | ||
* @precisions normal z -> s d c | ||
* | ||
*/ | ||
#ifndef __DTD_WRAPPERS_Z_H__ | ||
#define __DTD_WRAPPERS_Z_H__ | ||
|
||
#include "dplasma/types.h" | ||
#include "parsec/interfaces/dtd/insert_function.h" | ||
#include "cores/core_blas.h" | ||
|
||
#if defined(DPLASMA_HAVE_CUDA) | ||
#include "parsec/execution_stream.h" | ||
#include "parsec/parsec_internal.h" | ||
#include "parsec/mca/device/cuda/device_cuda.h" | ||
#include "parsec/utils/zone_malloc.h" | ||
#include "dplasmaaux.h" | ||
#include "potrf_cublas_utils.h" | ||
#include <cublas_v2.h> | ||
|
||
/* probably need to add this to substitions */ | ||
#if defined(PRECISION_s) | ||
#define CUSOLVER_COMPUTE_TYPE CUDA_R_32F | ||
#elif defined(PRECISION_d) | ||
#define CUSOLVER_COMPUTE_TYPE CUDA_R_64F | ||
#elif defined(PRECISION_c) | ||
#define CUSOLVER_COMPUTE_TYPE CUDA_C_32F | ||
#elif defined(PRECISION_z) | ||
#define CUSOLVER_COMPUTE_TYPE CUDA_C_64F | ||
#endif | ||
|
||
typedef struct zpotrf_dtd_workspace_info_s { | ||
int mb; | ||
int nb; | ||
dplasma_enum_t uplo; | ||
} zpotrf_dtd_workspace_info_t; | ||
|
||
void* zpotrf_dtd_create_workspace(void *obj, void *user); | ||
void zpotrf_dtd_destroy_workspace(void *_ws, void *_n); | ||
|
||
void* zpotrf_dtd_create_params(void *obj, void *user); | ||
void zpotrf_dtd_destroy_params(void *params, void *_n); | ||
|
||
#endif | ||
|
||
parsec_task_class_t * parsec_dtd_create_zpotrf_task_class(parsec_taskpool_t * dtd_tp, int tile_full, int devices); | ||
parsec_task_class_t * parsec_dtd_create_ztrsm_task_class(parsec_taskpool_t * dtd_tp, int tile_full, int devices); | ||
parsec_task_class_t * parsec_dtd_create_zherk_task_class(parsec_taskpool_t * dtd_tp, int tile_full, int devices); | ||
parsec_task_class_t * parsec_dtd_create_zgemm_task_class(parsec_taskpool_t * dtd_tp, int tile_full, int devices); | ||
|
||
#endif /* __DTD_WRAPPERS_Z_H__ */ |
Oops, something went wrong.