Skip to content

Commit

Permalink
update ck
Browse files Browse the repository at this point in the history
  • Loading branch information
rocking5566 committed Nov 21, 2024
1 parent 7153673 commit 63a64b5
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion csrc/composable_kernel
Submodule composable_kernel updated 83 files
+6 −6 .github/CODEOWNERS
+5 −3 CMakeLists.txt
+36 −63 Dockerfile
+2 −2 client_example/24_grouped_conv_activation/CMakeLists.txt
+1 −1 client_example/CMakeLists.txt
+1 −1 docs/sphinx/requirements.in
+1 −1 docs/sphinx/requirements.txt
+4 −0 example/01_gemm/CMakeLists.txt
+84 −0 example/01_gemm/gemm_wmma_bf16.cpp
+84 −0 example/01_gemm/gemm_wmma_int8.cpp
+1 −1 example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp
+41 −16 example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc
+1 −1 example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp
+6 −0 example/24_batched_gemm/CMakeLists.txt
+99 −0 example/24_batched_gemm/batched_gemm_xdl_bf16_v3.cpp
+106 −0 example/24_batched_gemm/batched_gemm_xdl_fp8_rowwise_v3.cpp
+26 −10 example/24_batched_gemm/run_batched_gemm_example.inc
+280 −0 example/24_batched_gemm/run_batched_gemm_example_rowwise.inc
+0 −3 example/ck_tile/01_fmha/codegen/ops/fmha_fwd_splitkv.py
+38 −21 example/ck_tile/01_fmha/fmha_fwd.cpp
+8 −2 example/ck_tile/01_fmha/fmha_fwd.hpp
+2 −2 example/ck_tile/01_fmha/utils.hpp
+40 −23 example/ck_tile/02_layernorm2d/layernorm2d_fwd.cpp
+3 −0 example/ck_tile/03_gemm/README.md
+10 −9 example/ck_tile/03_gemm/gemm_basic.cpp
+5 −5 example/ck_tile/03_gemm/gemm_mem_pipeline.cpp
+42 −1 include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp
+1,014 −0 include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_xdl_cshuffle_v3.hpp
+6 −4 include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp
+6 −5 include/ck/utility/amd_wmma.hpp
+1 −1 include/ck_tile/core/tensor/shuffle_tile.hpp
+35 −14 include/ck_tile/ops/fmha/kernel/fmha_fwd_splitkv_kernel.hpp
+2 −0 include/ck_tile/ops/fmha/pipeline/block_fmha_pipeline_qx_ks_vs_custom_policy.hpp
+11 −0 include/ck_tile/ops/fused_moe.hpp
+50 −20 include/ck_tile/ops/gemm/kernel/gemm_kernel.hpp
+3 −3 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_ag_bg_cr_mem.hpp
+44 −19 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1.hpp
+267 −63 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_agmem_bgmem_creg_v1_default_policy.hpp
+116 −38 include/ck_tile/ops/gemm/pipeline/gemm_pipeline_problem.hpp
+357 −312 include/ck_tile/ops/gemm/pipeline/gemm_universal_pipeline_ag_bg_cr_policy.hpp
+10 −6 include/ck_tile/ops/gemm/pipeline/tile_gemm_traits.hpp
+16 −7 include/ck_tile/ops/layernorm2d/kernel/layernorm2d_fwd_kernel.hpp
+1 −1 include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_pipeline_one_pass.hpp
+11 −3 include/ck_tile/ops/layernorm2d/pipeline/layernorm2d_fwd_pipeline_two_pass.hpp
+9 −4 include/ck_tile/ops/welford/block/block_welford.hpp
+52 −0 library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp
+185 −0 library/include/ck/library/tensor_operation_instance/gpu/gemm_universal_batched.hpp
+40 −0 library/include/ck/library/tensor_operation_instance/gpu/gemm_wmma.inc
+1 −1 library/include/ck/library/utility/check_err.hpp
+9 −0 library/src/tensor_operation_instance/gpu/CMakeLists.txt
+13 −20 library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt
+35 −6 library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp
+35 −6 library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp
+35 −6 library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp
+35 −6 library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp
+77 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_bf16_bf16_bf16_km_kn_mn_instance.cpp
+77 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_bf16_bf16_bf16_km_nk_mn_instance.cpp
+77 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_bf16_bf16_bf16_mk_kn_mn_instance.cpp
+77 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_bf16_bf16_bf16_mk_nk_mn_instance.cpp
+76 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_int8_int8_int8_km_kn_mn_instance.cpp
+76 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_int8_int8_int8_km_nk_mn_instance.cpp
+76 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_int8_int8_int8_mk_kn_mn_instance.cpp
+76 −0 library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_int8_int8_int8_mk_nk_mn_instance.cpp
+2 −2 ...iply/device_gemm_multiply_multiply_xdl_f8_f8_bf16/device_gemm_multiply_multiply_xdl_f8_f8_bf16_mk_nk_mn.hpp
+2 −2 ...e/gpu/gemm_universal/device_gemm_xdl_universal_f8_f8_bf16/device_gemm_xdl_universal_f8_f8_bf16_mk_kn_mn.hpp
+2 −2 ...e/gpu/gemm_universal/device_gemm_xdl_universal_f8_f8_bf16/device_gemm_xdl_universal_f8_f8_bf16_mk_nk_mn.hpp
+19 −0 library/src/tensor_operation_instance/gpu/gemm_universal_batched/CMakeLists.txt
+95 −0 ...ice_batched_gemm_xdl_universal_bf16_bf16_bf16/device_batched_gemm_xdl_universal_bf16_bf16_bf16_mk_nk_mn.hpp
+32 −0 ...niversal_bf16_bf16_bf16/device_batched_gemm_xdl_universal_bf16_bf16_bf16_mk_nk_mn_comp_default_instance.cpp
+33 −0 ...versal_bf16_bf16_bf16/device_batched_gemm_xdl_universal_bf16_bf16_bf16_mk_nk_mn_mem_v1_default_instance.cpp
+33 −0 ...versal_bf16_bf16_bf16/device_batched_gemm_xdl_universal_bf16_bf16_bf16_mk_nk_mn_mem_v2_default_instance.cpp
+109 −0 ...ched/device_batched_gemm_xdl_universal_f8_f8_bf16/device_batched_gemm_xdl_universal_f8_f8_bf16_mk_nk_mn.hpp
+32 −0 ...mm_xdl_universal_f8_f8_bf16/device_batched_gemm_xdl_universal_f8_f8_bf16_mk_nk_mn_comp_default_instance.cpp
+33 −0 ..._xdl_universal_f8_f8_bf16/device_batched_gemm_xdl_universal_f8_f8_bf16_mk_nk_mn_mem_v1_default_instance.cpp
+33 −0 ..._xdl_universal_f8_f8_bf16/device_batched_gemm_xdl_universal_f8_f8_bf16_mk_nk_mn_mem_v2_default_instance.cpp
+280 −0 profiler/include/profiler/profile_gemm_universal_batched_impl.hpp
+2 −0 profiler/src/CMakeLists.txt
+3 −3 profiler/src/profile_gemm_universal.cpp
+187 −0 profiler/src/profile_gemm_universal_batched.cpp
+2 −2 script/process_perf_data.py
+1 −0 script/process_qa_data.sh
+6 −6 test/ck_tile/gemm/test_gemm_mem_pipeline_util.hpp
+2 −2 test/gemm_universal/test_gemm_universal_xdl.cpp

0 comments on commit 63a64b5

Please sign in to comment.