Skip to content

Commit

Permalink
Break up cutlass_extensions.cu, pt 4
Browse files Browse the repository at this point in the history
Summary: - Break up cutlass_extensions.cu, pt 4

Differential Revision: D61170170
  • Loading branch information
q10 authored and facebook-github-bot committed Aug 12, 2024
1 parent 9b22d07 commit 3d145e9
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 40 deletions.
2 changes: 1 addition & 1 deletion fbgemm_gpu/experimental/gen_ai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ if(USE_ROCM)
src/quantize/quantize.cpp)
else()
set(quantize_ops_sources
src/quantize/cutlass_extensions.cu
src/quantize/cutlass_extensions/f8f8bf16.cu
src/quantize/cutlass_extensions/f8f8bf16_blockwise.cu
src/quantize/cutlass_extensions/f8f8bf16_cublas.cu
src/quantize/cutlass_extensions/f8f8bf16_rowwise.cu
src/quantize/cutlass_extensions/f8f8bf16_tensorwise.cu
src/quantize/cutlass_extensions/i8i8bf16.cu
src/quantize/cutlass_extensions/f8i4bf16_rowwise.cu
src/quantize/cutlass_extensions/i8i8bf16_dynamic.cu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include <cutlass/epilogue/collective/collective_builder.hpp> // @manual
// clang-format on

#include "cutlass_extensions/include/fp8_blockwise_cutlass_helpers.h"
#include "cutlass_extensions/include/kernel_mode.h"
#include "fp8_blockwise_cutlass_helpers.h"

namespace {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,9 @@
*/

#include <ATen/ATen.h>
#include <ATen/DeviceGuard.h>
#include <ATen/Dispatch.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/Exceptions.h>
#include <ATen/cuda/Atomic.cuh>
#if !( \
defined(USE_ROCM) || \
((defined(CUDA_VERSION) && CUDA_VERSION < 11000) || \
(defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))))
#include <cublasLt.h>
#include <cuda_bf16.h>
#include <cuda_fp16.h>
#include <cuda/atomic>
#elif (defined(USE_ROCM))
#include <hip/hip_bf16.h>
#include <hip/hip_fp16.h>
#include <hipblaslt/hipblaslt.h>
#endif
#include <c10/core/ScalarType.h>
#include <c10/cuda/CUDAGuard.h>
#include <cutlass/core_io.h>
#include <cutlass/cutlass.h>
#include <cutlass/gemm/device/gemm.h>
#include <cutlass/half.h>
#include <cutlass/numeric_types.h>
#include <cutlass/trace.h>
#include <cutlass/util/host_tensor.h>
#include "cublas_utils.h"

#if CUDART_VERSION >= 12000
#include <cuda_fp8.h>
#endif
#include <cutlass/util/device_memory.h>
#include <cutlass/util/packed_stride.hpp>

// clang-format off
// The fixed ordering of the headers is required for CUTLASS 3.2+
Expand All @@ -48,14 +19,7 @@
#include <cutlass/epilogue/collective/collective_builder.hpp> // @manual
// clang-format on

#include <cute/atom/mma_atom.hpp>
#include <cutlass/gemm/dispatch_policy.hpp>
#include <cutlass/gemm/kernel/gemm_universal.hpp>
#include <cutlass/util/packed_stride.hpp>

#include "cutlass_extensions/include/kernel_mode.h"
#include "cutlass_extensions/include/threadblock.h"
#include "fp8_blockwise_cutlass_helpers.h"

namespace fbgemm_gpu {

Expand Down

0 comments on commit 3d145e9

Please sign in to comment.