Skip to content

Commit

Permalink
2024-05-25 nightly release (5dd515e)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed May 25, 2024
1 parent b896105 commit 5b01c53
Show file tree
Hide file tree
Showing 27 changed files with 1,202 additions and 521 deletions.
52 changes: 44 additions & 8 deletions fbgemm_gpu/FbgemmGpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ set(GWD_OPTIMIZERS
set(DEFUSED_OPTIMIZERS
rowwise_adagrad)

# Optimizers with the SSD support
set(SSD_OPTIMIZERS
rowwise_adagrad)

set(WEIGHT_OPTIONS
weighted
unweighted_nobag
Expand Down Expand Up @@ -146,6 +150,7 @@ set(gen_gpu_kernel_source_files
"gen_embedding_forward_split_unweighted_codegen_cuda.cu"
"gen_embedding_backward_dense_indice_weights_codegen_cuda.cu"
"gen_embedding_backward_split_indice_weights_codegen_cuda.cu"
"gen_embedding_backward_ssd_indice_weights_codegen_cuda.cu"
"gen_embedding_forward_split_weighted_vbe_codegen_cuda.cu"
"gen_embedding_forward_split_unweighted_vbe_codegen_cuda.cu"
"gen_batch_index_select_dim0_forward_codegen_cuda.cu"
Expand All @@ -156,10 +161,13 @@ set(gen_gpu_kernel_source_files
"gen_batch_index_select_dim0_backward_kernel_warp.cu"
"gen_embedding_backward_split_grad_embedding_ops.cu"
"gen_embedding_backward_split_grad_index_select.cu"
"gen_embedding_backward_common_split_device_kernel.cuh"
"gen_embedding_backward_batch_index_select_split_device_kernel.cuh"
"gen_embedding_backward_split_common_device_kernel.cuh"
"gen_embedding_backward_split_batch_index_select_device_kernel.cuh"
"gen_embedding_forward_split_weighted_gwd_codegen_cuda.cu"
"gen_embedding_forward_split_unweighted_gwd_codegen_cuda.cu"
"gen_embedding_forward_ssd_weighted_codegen_cuda.cu"
"gen_embedding_forward_ssd_unweighted_codegen_cuda.cu"
"gen_embedding_forward_ssd_unweighted_nobag_kernel_small.cu"
)

if(NOT USE_ROCM)
Expand All @@ -182,7 +190,8 @@ foreach(wdesc ${WEIGHT_OPTIONS})
"gen_embedding_backward_dense_split_${wdesc}_kernel_cta.cu"
"gen_embedding_backward_dense_split_${wdesc}_kernel_warp.cu"
"gen_embedding_forward_split_${wdesc}_kernel.cu"
"gen_embedding_backward_${wdesc}_split_device_kernel.cuh")
"gen_embedding_forward_ssd_${wdesc}_kernel.cu"
"gen_embedding_backward_split_${wdesc}_device_kernel.cuh")

foreach(etype fp32 fp16 fp8 int8 int4 int2)
list(APPEND gen_gpu_kernel_source_files
Expand All @@ -194,7 +203,7 @@ endforeach()
foreach(wdesc weighted unweighted)
list(APPEND gen_gpu_kernel_source_files
"gen_embedding_forward_split_${wdesc}_vbe_kernel.cu"
"gen_embedding_backward_${wdesc}_vbe_split_device_kernel.cuh")
"gen_embedding_backward_split_${wdesc}_vbe_device_kernel.cuh")
endforeach()

# Generate GWD files
Expand All @@ -210,22 +219,31 @@ set(gen_cpu_source_files

set(gen_python_source_files
${CMAKE_BINARY_DIR}/__init__.py
${CMAKE_BINARY_DIR}/lookup_args.py)
${CMAKE_BINARY_DIR}/lookup_args.py
${CMAKE_BINARY_DIR}/lookup_args_ssd.py
)

# For each of the optimizers, generate the backward split variant by adding
# the Python, CPU-only, GPU host, and GPU kernel source files

# Generate the Python functions only if there is the backend support
# Generate the Python functions only if there is the backend support (for all
# optimizers)
foreach(optimizer
${COMMON_OPTIMIZERS}
${CPU_ONLY_OPTIMIZERS}
${GPU_ONLY_OPTIMIZERS})
list(APPEND gen_python_source_files
"${CMAKE_BINARY_DIR}/lookup_${optimizer}.py")
list(APPEND gen_python_source_files
"${CMAKE_BINARY_DIR}/lookup_${optimizer}.py"
"${CMAKE_BINARY_DIR}/lookup_${optimizer}_pt2.py")
endforeach()

# Generate the Python functions only if there is the backend support (for SSD
# optimizers)
foreach(optimizer ${SSD_OPTIMIZERS})
list(APPEND gen_python_source_files
"${CMAKE_BINARY_DIR}/lookup_${optimizer}_ssd.py")
endforeach()

# Generate the backend API for all optimizers to preserve the backward
# compatibility
list(APPEND gen_cpu_source_files
Expand Down Expand Up @@ -288,6 +306,24 @@ foreach(optimizer ${DEFUSED_OPTIMIZERS})
"${CMAKE_BINARY_DIR}/split_embedding_optimizer_${optimizer}.py")
endforeach()

foreach(optimizer ${SSD_OPTIMIZERS})
list(APPEND gen_gpu_kernel_source_files
"gen_embedding_optimizer_${optimizer}_ssd_device_kernel.cuh"
)

list(APPEND gen_gpu_host_source_files
"gen_embedding_backward_ssd_${optimizer}.cpp"
)

foreach(wdesc weighted unweighted unweighted_nobag)
list(APPEND gen_gpu_kernel_source_files
"gen_embedding_backward_${optimizer}_ssd_${wdesc}_cuda.cu"
"gen_embedding_backward_${optimizer}_ssd_${wdesc}_kernel_cta.cu"
"gen_embedding_backward_${optimizer}_ssd_${wdesc}_kernel_warp.cu")
endforeach()

endforeach()

list(APPEND gen_defused_optim_py_files
${CMAKE_BINARY_DIR}/optimizer_args.py)

Expand Down
Loading

0 comments on commit 5b01c53

Please sign in to comment.