From 3a5348dea4d0784145ce6d7cd728f442d69b6957 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 6 Dec 2023 18:56:32 +0100 Subject: [PATCH] [HIPIFY][#675][#677][SOLVER][feature] `cuSOLVER` support - Step 27 - Functions (DN) + `cusolverDn(S|D)ormqr(_bufferSize)?` and `cusolverDn(C|Z)unmqr(_bufferSize)?` are `SUPPORTED` by `hipSOLVER` only + [NOTE] `rocsolver_(s|d)ormqr` and `rocsolver_(c|z)unmqr` have a harness of other HIP and ROC API calls, thus `UNSUPPORTED` + Updated `SOLVER` synthetic tests, the regenerated `hipify-perl`, and `SOLVER` `CUDA2HIP` documentation --- bin/hipify-perl | 16 ++++++ docs/tables/CUSOLVER_API_supported_by_HIP.md | 8 +++ .../CUSOLVER_API_supported_by_HIP_and_ROC.md | 8 +++ docs/tables/CUSOLVER_API_supported_by_ROC.md | 8 +++ src/CUDA2HIP_SOLVER_API_functions.cpp | 22 ++++++++ .../synthetic/libraries/cusolver2hipsolver.cu | 54 +++++++++++++++++-- 6 files changed, 112 insertions(+), 4 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index e01178be..b7592e20 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1080,6 +1080,8 @@ my %experimental_funcs = ( "cusolverEigType_t" => "6.1.0", "cusolverEigRange_t" => "6.1.0", "cusolverEigMode_t" => "6.1.0", + "cusolverDnZunmqr_bufferSize" => "6.1.0", + "cusolverDnZunmqr" => "6.1.0", "cusolverDnZungqr_bufferSize" => "6.1.0", "cusolverDnZungqr" => "6.1.0", "cusolverDnZpotrsBatched" => "6.1.0", @@ -1105,6 +1107,8 @@ my %experimental_funcs = ( "cusolverDnSpotrf_bufferSize" => "6.1.0", "cusolverDnSpotrfBatched" => "6.1.0", "cusolverDnSpotrf" => "6.1.0", + "cusolverDnSormqr_bufferSize" => "6.1.0", + "cusolverDnSormqr" => "6.1.0", "cusolverDnSorgqr_bufferSize" => "6.1.0", "cusolverDnSorgqr" => "6.1.0", "cusolverDnSgetrs" => "6.1.0", @@ -1126,6 +1130,8 @@ my %experimental_funcs = ( "cusolverDnDpotrf_bufferSize" => "6.1.0", "cusolverDnDpotrfBatched" => "6.1.0", "cusolverDnDpotrf" => "6.1.0", + "cusolverDnDormqr_bufferSize" => "6.1.0", + "cusolverDnDormqr" => "6.1.0", "cusolverDnDorgqr_bufferSize" => "6.1.0", "cusolverDnDorgqr" => "6.1.0", "cusolverDnDgetrs" => "6.1.0", @@ -1138,6 +1144,8 @@ my %experimental_funcs = ( "cusolverDnDDgesv" => "6.1.0", "cusolverDnDDgels_bufferSize" => "6.1.0", "cusolverDnDDgels" => "6.1.0", + "cusolverDnCunmqr_bufferSize" => "6.1.0", + "cusolverDnCunmqr" => "6.1.0", "cusolverDnCungqr_bufferSize" => "6.1.0", "cusolverDnCungqr" => "6.1.0", "cusolverDnCreate" => "6.1.0", @@ -1331,6 +1339,8 @@ sub experimentalSubstitutions { subst("cusolverDnCreate", "hipsolverDnCreate", "library"); subst("cusolverDnCungqr", "hipsolverDnCungqr", "library"); subst("cusolverDnCungqr_bufferSize", "hipsolverDnCungqr_bufferSize", "library"); + subst("cusolverDnCunmqr", "hipsolverDnCunmqr", "library"); + subst("cusolverDnCunmqr_bufferSize", "hipsolverDnCunmqr_bufferSize", "library"); subst("cusolverDnDDgels", "hipsolverDnDDgels", "library"); subst("cusolverDnDDgels_bufferSize", "hipsolverDnDDgels_bufferSize", "library"); subst("cusolverDnDDgesv", "hipsolverDnDDgesv", "library"); @@ -1343,6 +1353,8 @@ sub experimentalSubstitutions { subst("cusolverDnDgetrs", "hipsolverDnDgetrs", "library"); subst("cusolverDnDorgqr", "hipsolverDnDorgqr", "library"); subst("cusolverDnDorgqr_bufferSize", "hipsolverDnDorgqr_bufferSize", "library"); + subst("cusolverDnDormqr", "hipsolverDnDormqr", "library"); + subst("cusolverDnDormqr_bufferSize", "hipsolverDnDormqr_bufferSize", "library"); subst("cusolverDnDpotrf", "hipsolverDnDpotrf", "library"); subst("cusolverDnDpotrfBatched", "hipsolverDnDpotrfBatched", "library"); subst("cusolverDnDpotrf_bufferSize", "hipsolverDnDpotrf_bufferSize", "library"); @@ -1363,6 +1375,8 @@ sub experimentalSubstitutions { subst("cusolverDnSgetrs", "hipsolverDnSgetrs", "library"); subst("cusolverDnSorgqr", "hipsolverDnSorgqr", "library"); subst("cusolverDnSorgqr_bufferSize", "hipsolverDnSorgqr_bufferSize", "library"); + subst("cusolverDnSormqr", "hipsolverDnSormqr", "library"); + subst("cusolverDnSormqr_bufferSize", "hipsolverDnSormqr_bufferSize", "library"); subst("cusolverDnSpotrf", "hipsolverDnSpotrf", "library"); subst("cusolverDnSpotrfBatched", "hipsolverDnSpotrfBatched", "library"); subst("cusolverDnSpotrf_bufferSize", "hipsolverDnSpotrf_bufferSize", "library"); @@ -1388,6 +1402,8 @@ sub experimentalSubstitutions { subst("cusolverDnZpotrsBatched", "hipsolverDnZpotrsBatched", "library"); subst("cusolverDnZungqr", "hipsolverDnZungqr", "library"); subst("cusolverDnZungqr_bufferSize", "hipsolverDnZungqr_bufferSize", "library"); + subst("cusolverDnZunmqr", "hipsolverDnZunmqr", "library"); + subst("cusolverDnZunmqr_bufferSize", "hipsolverDnZunmqr_bufferSize", "library"); subst("cusolverDnHandle_t", "hipsolverHandle_t", "type"); subst("cusolverEigMode_t", "hipsolverEigMode_t", "type"); subst("cusolverEigRange_t", "hipsolverEigRange_t", "type"); diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP.md b/docs/tables/CUSOLVER_API_supported_by_HIP.md index 2b57a16e..950caf2a 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP.md @@ -143,6 +143,8 @@ |`cusolverDnCreateParams`|11.0| | | | | | | | | | |`cusolverDnCungqr`|8.0| | | |`hipsolverDnCungqr`|5.1.0| | | |6.1.0| |`cusolverDnCungqr_bufferSize`|8.0| | | |`hipsolverDnCungqr_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnCunmqr`| | | | |`hipsolverDnCunmqr`|5.1.0| | | |6.1.0| +|`cusolverDnCunmqr_bufferSize`|8.0| | | |`hipsolverDnCunmqr_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnDBgels`|11.0| | | | | | | | | | |`cusolverDnDBgels_bufferSize`|11.0| | | | | | | | | | |`cusolverDnDBgesv`|11.0| | | | | | | | | | @@ -174,6 +176,8 @@ |`cusolverDnDlauum_bufferSize`|10.1| | | | | | | | | | |`cusolverDnDorgqr`|8.0| | | |`hipsolverDnDorgqr`|5.1.0| | | |6.1.0| |`cusolverDnDorgqr_bufferSize`|8.0| | | |`hipsolverDnDorgqr_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnDormqr`| | | | |`hipsolverDnDormqr`|5.1.0| | | |6.1.0| +|`cusolverDnDormqr_bufferSize`|8.0| | | |`hipsolverDnDormqr_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnDpotrf`| | | | |`hipsolverDnDpotrf`|5.1.0| | | |6.1.0| |`cusolverDnDpotrfBatched`|9.1| | | |`hipsolverDnDpotrfBatched`|5.1.0| | | |6.1.0| |`cusolverDnDpotrf_bufferSize`| | | | |`hipsolverDnDpotrf_bufferSize`|5.1.0| | | |6.1.0| @@ -236,6 +240,8 @@ |`cusolverDnSlauum_bufferSize`|10.1| | | | | | | | | | |`cusolverDnSorgqr`|8.0| | | |`hipsolverDnSorgqr`|5.1.0| | | |6.1.0| |`cusolverDnSorgqr_bufferSize`|8.0| | | |`hipsolverDnSorgqr_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnSormqr`| | | | |`hipsolverDnSormqr`|5.1.0| | | |6.1.0| +|`cusolverDnSormqr_bufferSize`|8.0| | | |`hipsolverDnSormqr_bufferSize`|5.1.0| | | |6.1.0| |`cusolverDnSpotrf`| | | | |`hipsolverDnSpotrf`|5.1.0| | | |6.1.0| |`cusolverDnSpotrfBatched`|9.1| | | |`hipsolverDnSpotrfBatched`|5.1.0| | | |6.1.0| |`cusolverDnSpotrf_bufferSize`| | | | |`hipsolverDnSpotrf_bufferSize`|5.1.0| | | |6.1.0| @@ -285,6 +291,8 @@ |`cusolverDnZpotrsBatched`|9.1| | | |`hipsolverDnZpotrsBatched`|5.1.0| | | |6.1.0| |`cusolverDnZungqr`|8.0| | | |`hipsolverDnZungqr`|5.1.0| | | |6.1.0| |`cusolverDnZungqr_bufferSize`|8.0| | | |`hipsolverDnZungqr_bufferSize`|5.1.0| | | |6.1.0| +|`cusolverDnZunmqr`| | | | |`hipsolverDnZunmqr`|5.1.0| | | |6.1.0| +|`cusolverDnZunmqr_bufferSize`|8.0| | | |`hipsolverDnZunmqr_bufferSize`|5.1.0| | | |6.1.0| \*A - Added; D - Deprecated; C - Changed; R - Removed; E - Experimental \ No newline at end of file diff --git a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md index 732112b6..a5fc4bd7 100644 --- a/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_HIP_and_ROC.md @@ -143,6 +143,8 @@ |`cusolverDnCreateParams`|11.0| | | | | | | | | | | | | | | | |`cusolverDnCungqr`|8.0| | | |`hipsolverDnCungqr`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnCungqr_bufferSize`|8.0| | | |`hipsolverDnCungqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnCunmqr`| | | | |`hipsolverDnCunmqr`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnCunmqr_bufferSize`|8.0| | | |`hipsolverDnCunmqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDBgels`|11.0| | | | | | | | | | | | | | | | |`cusolverDnDBgels_bufferSize`|11.0| | | | | | | | | | | | | | | | |`cusolverDnDBgesv`|11.0| | | | | | | | | | | | | | | | @@ -174,6 +176,8 @@ |`cusolverDnDlauum_bufferSize`|10.1| | | | | | | | | | | | | | | | |`cusolverDnDorgqr`|8.0| | | |`hipsolverDnDorgqr`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDorgqr_bufferSize`|8.0| | | |`hipsolverDnDorgqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnDormqr`| | | | |`hipsolverDnDormqr`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnDormqr_bufferSize`|8.0| | | |`hipsolverDnDormqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDpotrf`| | | | |`hipsolverDnDpotrf`|5.1.0| | | |6.1.0|`rocsolver_dpotrf`|3.2.0| | | |6.1.0| |`cusolverDnDpotrfBatched`|9.1| | | |`hipsolverDnDpotrfBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnDpotrf_bufferSize`| | | | |`hipsolverDnDpotrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | @@ -236,6 +240,8 @@ |`cusolverDnSlauum_bufferSize`|10.1| | | | | | | | | | | | | | | | |`cusolverDnSorgqr`|8.0| | | |`hipsolverDnSorgqr`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSorgqr_bufferSize`|8.0| | | |`hipsolverDnSorgqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnSormqr`| | | | |`hipsolverDnSormqr`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnSormqr_bufferSize`|8.0| | | |`hipsolverDnSormqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSpotrf`| | | | |`hipsolverDnSpotrf`|5.1.0| | | |6.1.0|`rocsolver_spotrf`|3.2.0| | | |6.1.0| |`cusolverDnSpotrfBatched`|9.1| | | |`hipsolverDnSpotrfBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnSpotrf_bufferSize`| | | | |`hipsolverDnSpotrf_bufferSize`|5.1.0| | | |6.1.0| | | | | | | @@ -285,6 +291,8 @@ |`cusolverDnZpotrsBatched`|9.1| | | |`hipsolverDnZpotrsBatched`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnZungqr`|8.0| | | |`hipsolverDnZungqr`|5.1.0| | | |6.1.0| | | | | | | |`cusolverDnZungqr_bufferSize`|8.0| | | |`hipsolverDnZungqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnZunmqr`| | | | |`hipsolverDnZunmqr`|5.1.0| | | |6.1.0| | | | | | | +|`cusolverDnZunmqr_bufferSize`|8.0| | | |`hipsolverDnZunmqr_bufferSize`|5.1.0| | | |6.1.0| | | | | | | \*A - Added; D - Deprecated; C - Changed; R - Removed; E - Experimental \ No newline at end of file diff --git a/docs/tables/CUSOLVER_API_supported_by_ROC.md b/docs/tables/CUSOLVER_API_supported_by_ROC.md index 9be539f3..a1ae33e5 100644 --- a/docs/tables/CUSOLVER_API_supported_by_ROC.md +++ b/docs/tables/CUSOLVER_API_supported_by_ROC.md @@ -143,6 +143,8 @@ |`cusolverDnCreateParams`|11.0| | | | | | | | | | |`cusolverDnCungqr`|8.0| | | | | | | | | | |`cusolverDnCungqr_bufferSize`|8.0| | | | | | | | | | +|`cusolverDnCunmqr`| | | | | | | | | | | +|`cusolverDnCunmqr_bufferSize`|8.0| | | | | | | | | | |`cusolverDnDBgels`|11.0| | | | | | | | | | |`cusolverDnDBgels_bufferSize`|11.0| | | | | | | | | | |`cusolverDnDBgesv`|11.0| | | | | | | | | | @@ -174,6 +176,8 @@ |`cusolverDnDlauum_bufferSize`|10.1| | | | | | | | | | |`cusolverDnDorgqr`|8.0| | | | | | | | | | |`cusolverDnDorgqr_bufferSize`|8.0| | | | | | | | | | +|`cusolverDnDormqr`| | | | | | | | | | | +|`cusolverDnDormqr_bufferSize`|8.0| | | | | | | | | | |`cusolverDnDpotrf`| | | | |`rocsolver_dpotrf`|3.2.0| | | |6.1.0| |`cusolverDnDpotrfBatched`|9.1| | | | | | | | | | |`cusolverDnDpotrf_bufferSize`| | | | | | | | | | | @@ -236,6 +240,8 @@ |`cusolverDnSlauum_bufferSize`|10.1| | | | | | | | | | |`cusolverDnSorgqr`|8.0| | | | | | | | | | |`cusolverDnSorgqr_bufferSize`|8.0| | | | | | | | | | +|`cusolverDnSormqr`| | | | | | | | | | | +|`cusolverDnSormqr_bufferSize`|8.0| | | | | | | | | | |`cusolverDnSpotrf`| | | | |`rocsolver_spotrf`|3.2.0| | | |6.1.0| |`cusolverDnSpotrfBatched`|9.1| | | | | | | | | | |`cusolverDnSpotrf_bufferSize`| | | | | | | | | | | @@ -285,6 +291,8 @@ |`cusolverDnZpotrsBatched`|9.1| | | | | | | | | | |`cusolverDnZungqr`|8.0| | | | | | | | | | |`cusolverDnZungqr_bufferSize`|8.0| | | | | | | | | | +|`cusolverDnZunmqr`| | | | | | | | | | | +|`cusolverDnZunmqr_bufferSize`|8.0| | | | | | | | | | \*A - Added; D - Deprecated; C - Changed; R - Removed; E - Experimental \ No newline at end of file diff --git a/src/CUDA2HIP_SOLVER_API_functions.cpp b/src/CUDA2HIP_SOLVER_API_functions.cpp index 25122727..bba905b2 100644 --- a/src/CUDA2HIP_SOLVER_API_functions.cpp +++ b/src/CUDA2HIP_SOLVER_API_functions.cpp @@ -231,6 +231,16 @@ const std::map CUDA_SOLVER_FUNCTION_MAP { {"cusolverDnDorgqr", {"hipsolverDnDorgqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnCungqr", {"hipsolverDnCungqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cusolverDnZungqr", {"hipsolverDnZungqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + // NOTE: rocsolver_(s|d)ormqr and rocsolver_(c|z)unmqr have a harness of other HIP and ROC API calls + {"cusolverDnSormqr_bufferSize", {"hipsolverDnSormqr_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnDormqr_bufferSize", {"hipsolverDnDormqr_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnCunmqr_bufferSize", {"hipsolverDnCunmqr_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnZunmqr_bufferSize", {"hipsolverDnZunmqr_bufferSize", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + // NOTE: rocsolver_(s|d)ormqr and rocsolver_(c|z)unmqr have a harness of other HIP and ROC API calls + {"cusolverDnSormqr", {"hipsolverDnSormqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnDormqr", {"hipsolverDnDormqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnCunmqr", {"hipsolverDnCunmqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, + {"cusolverDnZunmqr", {"hipsolverDnZunmqr", "", CONV_LIB_FUNC, API_SOLVER, 2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, }; const std::map CUDA_SOLVER_FUNCTION_VER_MAP { @@ -371,6 +381,10 @@ const std::map CUDA_SOLVER_FUNCTION_VER_MAP { {"cusolverDnDorgqr", {CUDA_80, CUDA_0, CUDA_0}}, {"cusolverDnCungqr", {CUDA_80, CUDA_0, CUDA_0}}, {"cusolverDnZungqr", {CUDA_80, CUDA_0, CUDA_0}}, + {"cusolverDnSormqr_bufferSize", {CUDA_80, CUDA_0, CUDA_0}}, + {"cusolverDnDormqr_bufferSize", {CUDA_80, CUDA_0, CUDA_0}}, + {"cusolverDnCunmqr_bufferSize", {CUDA_80, CUDA_0, CUDA_0}}, + {"cusolverDnZunmqr_bufferSize", {CUDA_80, CUDA_0, CUDA_0}}, }; const std::map HIP_SOLVER_FUNCTION_VER_MAP { @@ -450,6 +464,14 @@ const std::map HIP_SOLVER_FUNCTION_VER_MAP { {"hipsolverDnDorgqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"hipsolverDnCungqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"hipsolverDnZungqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnSormqr_bufferSize", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnDormqr_bufferSize", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnCunmqr_bufferSize", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnZunmqr_bufferSize", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnSormqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnDormqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnCunmqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, + {"hipsolverDnZunmqr", {HIP_5010, HIP_0, HIP_0, HIP_LATEST}}, {"rocsolver_spotrf", {HIP_3020, HIP_0, HIP_0, HIP_LATEST}}, {"rocsolver_dpotrf", {HIP_3020, HIP_0, HIP_0, HIP_LATEST}}, diff --git a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu index 116ae0c8..3e48c01a 100644 --- a/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu +++ b/tests/unit_tests/synthetic/libraries/cusolver2hipsolver.cu @@ -15,6 +15,7 @@ int main() { int nrhs = 0; int lda = 0; int ldb = 0; + int ldc = 0; int Lwork = 0; int devIpiv = 0; int devInfo = 0; @@ -23,10 +24,12 @@ int main() { int batchSize = 0; float fA = 0.f; float fB = 0.f; + float fC = 0.f; float fX = 0.f; float fTAU = 0.f; double dA = 0.f; double dB = 0.f; + double dC = 0.f; double dX = 0.f; double dTAU = 0.f; float fWorkspace = 0.f; @@ -39,11 +42,11 @@ int main() { double** dAarray = 0; double** dBarray = 0; - // CHECK: hipDoubleComplex dComplexA, dComplexB, dComplexX, dComplexWorkspace, dComplexTAU; - cuDoubleComplex dComplexA, dComplexB, dComplexX, dComplexWorkspace, dComplexTAU; + // CHECK: hipDoubleComplex dComplexA, dComplexB, dComplexC, dComplexX, dComplexWorkspace, dComplexTAU; + cuDoubleComplex dComplexA, dComplexB, dComplexC, dComplexX, dComplexWorkspace, dComplexTAU; - // CHECK: hipComplex complexA, complexB, complexX, complexWorkspace, complexTAU; - cuComplex complexA, complexB, complexX, complexWorkspace, complexTAU; + // CHECK: hipComplex complexA, complexB, complexC, complexX, complexWorkspace, complexTAU; + cuComplex complexA, complexB, complexC, complexX, complexWorkspace, complexTAU; // CHECK: hipDoubleComplex** dcomplexAarray = 0; // CHECK-NEXT: hipDoubleComplex** dcomplexBarray = 0; @@ -87,6 +90,9 @@ int main() { // CHECK: hipblasOperation_t blasOperation; cublasOperation_t blasOperation; + // CHECK: hipblasSideMode_t blasSideMode; + cublasSideMode_t blasSideMode; + // CHECK: hipStream_t stream_t; cudaStream_t stream_t; @@ -326,6 +332,46 @@ int main() { // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnZungqr(hipsolverHandle_t handle, int m, int n, int k, hipDoubleComplex* A, int lda, const hipDoubleComplex* tau, hipDoubleComplex* work, int lwork, int* devInfo); // CHECK: status = hipsolverDnZungqr(handle, m, n, k, &dComplexA, lda, &dComplexTAU, &dComplexWorkspace, Lwork, &info); status = cusolverDnZungqr(handle, m, n, k, &dComplexA, lda, &dComplexTAU, &dComplexWorkspace, Lwork, &info); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSormqr_bufferSize(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const float * A, int lda, const float * tau, const float * C, int ldc, int * lwork); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnSormqr_bufferSize(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const float* A, int lda, const float* tau, const float* C, int ldc, int* lwork); + // CHECK: status = hipsolverDnSormqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &fA, lda, &fTAU, &fC, ldc, &Lwork); + status = cusolverDnSormqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &fA, lda, &fTAU, &fC, ldc, &Lwork); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnDormqr_bufferSize(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const double * A, int lda, const double * tau, const double * C, int ldc, int * lwork); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnDormqr_bufferSize(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const double* A, int lda, const double* tau, const double* C, int ldc, int* lwork); + // CHECK: status = hipsolverDnDormqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &dA, lda, &dTAU, &dC, ldc, &Lwork); + status = cusolverDnDormqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &dA, lda, &dTAU, &dC, ldc, &Lwork); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr_bufferSize(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const cuComplex * A, int lda, const cuComplex * tau, const cuComplex * C, int ldc, int * lwork); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnCunmqr_bufferSize(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const hipFloatComplex* A, int lda, const hipFloatComplex* tau, const hipFloatComplex* C, int ldc, int* lwork); + // CHECK: status = hipsolverDnCunmqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &complexA, lda, &complexTAU, &complexC, ldc, &Lwork); + status = cusolverDnCunmqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &complexA, lda, &complexTAU, &complexC, ldc, &Lwork); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr_bufferSize(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, const cuDoubleComplex *C, int ldc, int * lwork); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnZunmqr_bufferSize(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const hipDoubleComplex* A, int lda, const hipDoubleComplex* tau, const hipDoubleComplex* C, int ldc, int* lwork); + // CHECK: status = hipsolverDnZunmqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &dComplexA, lda, &dComplexTAU, &dComplexC, ldc, &Lwork); + status = cusolverDnZunmqr_bufferSize(handle, blasSideMode, blasOperation, m, n, k, &dComplexA, lda, &dComplexTAU, &dComplexC, ldc, &Lwork); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnSormqr(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const float * A, int lda, const float * tau, float * C, int ldc, float * work, int lwork, int * devInfo); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnSormqr(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const float* A, int lda, const float* tau, float* C, int ldc, float* work, int lwork, int* devInfo); + // CHECK: status = hipsolverDnSormqr(handle, blasSideMode, blasOperation, m, n, k, &fA, lda, &fTAU, &fC, ldc, &fWorkspace, Lwork, &devInfo); + status = cusolverDnSormqr(handle, blasSideMode, blasOperation, m, n, k, &fA, lda, &fTAU, &fC, ldc, &fWorkspace, Lwork, &devInfo); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnDormqr(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const double * A, int lda, const double * tau, double * C, int ldc, double * work, int lwork, int * devInfo); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnDormqr(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const double* A, int lda, const double* tau, double* C, int ldc, double* work, int lwork, int* devInfo); + // CHECK: status = hipsolverDnDormqr(handle, blasSideMode, blasOperation, m, n, k, &dA, lda, &dTAU, &dC, ldc, &dWorkspace, Lwork, &devInfo); + status = cusolverDnDormqr(handle, blasSideMode, blasOperation, m, n, k, &dA, lda, &dTAU, &dC, ldc, &dWorkspace, Lwork, &devInfo); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnCunmqr(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const cuComplex * A, int lda, const cuComplex * tau, cuComplex * C, int ldc, cuComplex * work, int lwork, int * devInfo); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnCunmqr(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const hipFloatComplex* A, int lda, const hipFloatComplex* tau, hipFloatComplex* C, int ldc, hipFloatComplex* work, int lwork, int* devInfo); + // CHECK: status = hipsolverDnCunmqr(handle, blasSideMode, blasOperation, m, n, k, &complexA, lda, &complexTAU, &complexC, ldc, &complexWorkspace, Lwork, &devInfo); + status = cusolverDnCunmqr(handle, blasSideMode, blasOperation, m, n, k, &complexA, lda, &complexTAU, &complexC, ldc, &complexWorkspace, Lwork, &devInfo); + + // CUDA: cusolverStatus_t CUSOLVERAPI cusolverDnZunmqr(cusolverDnHandle_t handle, cublasSideMode_t side, cublasOperation_t trans, int m, int n, int k, const cuDoubleComplex *A, int lda, const cuDoubleComplex *tau, cuDoubleComplex * C, int ldc, cuDoubleComplex * work, int lwork, int * devInfo); + // HIP: HIPSOLVER_EXPORT hipsolverStatus_t hipsolverDnZunmqr(hipsolverHandle_t handle, hipblasSideMode_t side, hipblasOperation_t trans, int m, int n, int k, const hipDoubleComplex* A, int lda, const hipDoubleComplex* tau, hipDoubleComplex* C, int ldc, hipDoubleComplex* work, int lwork, int* devInfo); + // CHECK: status = hipsolverDnZunmqr(handle, blasSideMode, blasOperation, m, n, k, &dComplexA, lda, &dComplexTAU, &dComplexC, ldc, &dComplexWorkspace, Lwork, &devInfo); + status = cusolverDnZunmqr(handle, blasSideMode, blasOperation, m, n, k, &dComplexA, lda, &dComplexTAU, &dComplexC, ldc, &dComplexWorkspace, Lwork, &devInfo); #endif #if CUDA_VERSION >= 9000