Skip to content

Commit

Permalink
Merge pull request #47 from ROCmSoftwarePlatform/IFU-2023-10-26
Browse files Browse the repository at this point in the history
FBGEMM Ifu 2023 10 26
  • Loading branch information
liligwu authored Nov 1, 2023
2 parents 289af57 + f24435d commit 83f4183
Show file tree
Hide file tree
Showing 145 changed files with 9,431 additions and 4,587 deletions.
130 changes: 74 additions & 56 deletions .github/scripts/fbgemm_gpu_build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ prepare_fbgemm_gpu_build () {
echo "################################################################################"
echo "# Prepare FBGEMM-GPU Build"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""
fi

test_network_connection || return 1

if [[ "${GITHUB_WORKSPACE}" ]]; then
# https://github.com/actions/checkout/issues/841
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
Expand All @@ -38,19 +40,27 @@ prepare_fbgemm_gpu_build () {
git submodule sync
git submodule update --init --recursive

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

echo "[BUILD] Installing other build dependencies ..."
(exec_with_retries conda run -n "${env_name}" python -m pip install -r requirements.txt) || return 1
# shellcheck disable=SC2086
(exec_with_retries 3 conda run --no-capture-output ${env_prefix} python -m pip install -r requirements.txt) || return 1

(test_python_import "${env_name}" numpy) || return 1
(test_python_import "${env_name}" skbuild) || return 1
# shellcheck disable=SC2086
(test_python_import_package "${env_name}" numpy) || return 1
# shellcheck disable=SC2086
(test_python_import_package "${env_name}" skbuild) || return 1

echo "[BUILD] Successfully ran git submodules update"
}

__configure_fbgemm_gpu_build_cpu () {
# Update the package name and build args depending on if CUDA is specified
echo "[BUILD] Setting CPU-only build args ..."
build_args=(--cpu_only)
build_args=(
--package_variant=cpu
)
}

__configure_fbgemm_gpu_build_rocm () {
Expand All @@ -76,10 +86,14 @@ __configure_fbgemm_gpu_build_rocm () {
fi

echo "[BUILD] Setting the following ROCm targets: ${arch_list}"
print_exec conda env config vars set -n "${env_name}" PYTORCH_ROCM_ARCH="${arch_list}"
# shellcheck disable=SC2086
print_exec conda env config vars set ${env_prefix} PYTORCH_ROCM_ARCH="${arch_list}"

echo "[BUILD] Setting ROCm build args ..."
build_args=()
build_args=(
--package_variant=rocm
-DTORCH_USE_HIP_DSA=1
)
}

__configure_fbgemm_gpu_build_cuda () {
Expand All @@ -103,7 +117,14 @@ __configure_fbgemm_gpu_build_cuda () {

else
echo "[BUILD] Using the default CUDA targets ..."
local arch_list="7.0;8.0"
# For cuda version 12.1, enable sm 9.0
cuda_version_nvcc=$(conda run -n "${env_name}" nvcc --version)
echo "$cuda_version_nvcc"
if [[ $cuda_version_nvcc == *"V12.1"* ]]; then
local arch_list="7.0;8.0;9.0"
else
local arch_list="7.0;8.0"
fi
fi

# Unset the environment-supplied TORCH_CUDA_ARCH_LIST because it will take
Expand All @@ -114,9 +135,10 @@ __configure_fbgemm_gpu_build_cuda () {

# Build only CUDA 7.0 and 8.0 (i.e. V100 and A100) because of 100 MB binary size limits from PyPI.
echo "[BUILD] Setting CUDA build args ..."
# shellcheck disable=SC2155
local nvml_lib_path=$(conda run -n "${env_name}" printenv NVML_LIB_PATH)
# shellcheck disable=SC2155,SC2086
local nvml_lib_path=$(conda run --no-capture-output ${env_prefix} printenv NVML_LIB_PATH)
build_args=(
--package_variant=cuda
--nvml_lib_path="${nvml_lib_path}"
-DTORCH_CUDA_ARCH_LIST="'${arch_list}'"
)
Expand All @@ -138,7 +160,7 @@ __configure_fbgemm_gpu_build () {
echo "################################################################################"
echo "# Configure FBGEMM-GPU Build"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""
fi
Expand Down Expand Up @@ -185,16 +207,17 @@ __build_fbgemm_gpu_common_pre_steps () {
echo "[BUILD] Determined Python package name to use: ${package_name}"

# Extract the Python tag
# shellcheck disable=SC2207
python_version=($(conda run -n "${env_name}" python --version))
# shellcheck disable=SC2207,SC2086
python_version=($(conda run --no-capture-output ${env_prefix} python --version))
# shellcheck disable=SC2206
python_version_arr=(${python_version[1]//./ })
python_tag="py${python_version_arr[0]}${python_version_arr[1]}"
echo "[BUILD] Extracted Python tag: ${python_tag}"

echo "[BUILD] Running pre-build cleanups ..."
print_exec rm -rf dist
print_exec conda run -n "${env_name}" python setup.py clean
# shellcheck disable=SC2086
print_exec conda run --no-capture-output ${env_prefix} python setup.py clean

echo "[BUILD] Printing git status ..."
print_exec git status
Expand Down Expand Up @@ -273,7 +296,7 @@ build_fbgemm_gpu_package () {
fbgemm_variant="$3"
fbgemm_variant_targets="$4"
if [ "$fbgemm_variant" == "" ]; then
echo "Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME VARIANT [TARGETS]"
echo "Usage: ${FUNCNAME[0]} ENV_NAME RELEASE_TYPE VARIANT [VARIANT_TARGETS]"
echo "Example(s):"
echo " ${FUNCNAME[0]} build_env nightly cpu # Nightly CPU-only variant"
echo " ${FUNCNAME[0]} build_env nightly cuda # Nightly CUDA variant for default target(s)"
Expand All @@ -283,25 +306,44 @@ build_fbgemm_gpu_package () {
return 1
fi

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

# Set up and configure the build
__build_fbgemm_gpu_common_pre_steps || return 1
__configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1

echo "################################################################################"
echo "# Build FBGEMM-GPU Package (Wheel)"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""

# manylinux2014 is specified, bc manylinux1 does not support aarch64
# See https://github.com/pypa/manylinux
local plat_name="manylinux2014_${MACHINE_NAME}"

echo "[BUILD] Checking arch_list = ${arch_list}"
echo "[BUILD] Checking build_args:"
echo "${build_args[@]}"

# shellcheck disable=SC2155
local core=$(lscpu | grep "Core(s)" | awk '{print $NF}') && echo "core = ${core}" || echo "core not found"
# shellcheck disable=SC2155
local sockets=$(lscpu | grep "Socket(s)" | awk '{print $NF}') && echo "sockets = ${sockets}" || echo "sockets not found"
local re='^[0-9]+$'
local run_multicore=""
if [[ $core =~ $re && $sockets =~ $re ]] ; then
local n_core=$((core * sockets))
local run_multicore=" -j ${n_core}"
fi

# Distribute Python extensions as wheels on Linux
echo "[BUILD] Building FBGEMM-GPU wheel (VARIANT=${fbgemm_variant}) ..."
print_exec conda run -n "${env_name}" \
python setup.py bdist_wheel \
# shellcheck disable=SC2086
print_exec conda run --no-capture-output ${env_prefix} \
python setup.py "${run_multicore}" bdist_wheel \
--package_name="${package_name}" \
--python-tag="${python_tag}" \
--plat-name="${plat_name}" \
Expand Down Expand Up @@ -336,21 +378,25 @@ build_fbgemm_gpu_install () {
return 1
fi

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

# Set up and configure the build
__build_fbgemm_gpu_common_pre_steps || return 1
__configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1

echo "################################################################################"
echo "# Build + Install FBGEMM-GPU Package"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""

# Parallelism may need to be limited to prevent the build from being
# canceled for going over ulimits
echo "[BUILD] Building + installing FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..."
print_exec conda run -n "${env_name}" \
# shellcheck disable=SC2086
print_exec conda run --no-capture-output ${env_prefix} \
python setup.py install "${build_args[@]}"

# Run checks on the built libraries
Expand All @@ -360,7 +406,7 @@ build_fbgemm_gpu_install () {
# Exit this directory to prevent import clashing, since there is an
# fbgemm_gpu/ subdirectory present
cd - || return 1
(test_python_import "${env_name}" fbgemm_gpu) || return 1
(test_python_import_package "${env_name}" fbgemm_gpu) || return 1

echo "[BUILD] FBGEMM-GPU build + install completed"
}
Expand All @@ -380,57 +426,29 @@ build_fbgemm_gpu_develop () {
return 1
fi

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

# Set up and configure the build
__build_fbgemm_gpu_common_pre_steps || return 1
__configure_fbgemm_gpu_build "${fbgemm_variant}" "${fbgemm_variant_targets}" || return 1

echo "################################################################################"
echo "# Build + Install FBGEMM-GPU Package"
echo "# Build + Install FBGEMM-GPU Package (Develop)"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""

# Parallelism may need to be limited to prevent the build from being
# canceled for going over ulimits
echo "[BUILD] Building (develop) FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..."
print_exec conda run -n "${env_name}" \
# shellcheck disable=SC2086
print_exec conda run --no-capture-output ${env_prefix} \
python setup.py build develop "${build_args[@]}"

# Run checks on the built libraries
(run_fbgemm_gpu_postbuild_checks "${fbgemm_variant}") || return 1

echo "[BUILD] FBGEMM-GPU build + develop completed"
}

install_fbgemm_gpu_package () {
local env_name="$1"
local package_name="$2"
if [ "$package_name" == "" ]; then
echo "Usage: ${FUNCNAME[0]} ENV_NAME WHEEL_NAME"
echo "Example(s):"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu.whl # Install the package (wheel)"
return 1
else
echo "################################################################################"
echo "# Install FBGEMM-GPU Package (Wheel)"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "################################################################################"
echo ""
fi

echo "[INSTALL] Printing out FBGEMM-GPU wheel SHA: ${package_name}"
print_exec sha1sum "${package_name}"
print_exec sha256sum "${package_name}"
print_exec md5sum "${package_name}"

echo "[INSTALL] Installing FBGEMM-GPU wheel: ${package_name} ..."
(exec_with_retries conda run -n "${env_name}" python -m pip install "${package_name}") || return 1

echo "[INSTALL] Checking imports ..."
(test_python_import "${env_name}" fbgemm_gpu) || return 1
(test_python_import "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1

echo "[INSTALL] Wheel installation completed ..."
}
26 changes: 19 additions & 7 deletions .github/scripts/fbgemm_gpu_docs.bash
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,22 @@ install_docs_tools () {
echo "################################################################################"
echo "# Install Documentation Tools"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""
fi

test_network_connection || return 1

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

echo "[INSTALL] Installing docs tools ..."
(exec_with_retries conda install -n "${env_name}" -c conda-forge -y \
# shellcheck disable=SC2086
(exec_with_retries 3 conda install ${env_prefix} -c conda-forge -y \
doxygen) || return 1

# Check binaries are visible in the PAATH
# Check binaries are visible in the PATH
(test_binpath "${env_name}" doxygen) || return 1

echo "[INSTALL] Successfully installed all the docs tools"
Expand All @@ -55,19 +61,25 @@ build_fbgemm_gpu_docs () {
echo "################################################################################"
echo "# Build FBGEMM-GPU Documentation"
echo "#"
echo "# [TIMESTAMP] $(date --utc +%FT%T.%3NZ)"
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
echo "################################################################################"
echo ""
fi

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

echo "[BUILD] Installing docs-build dependencies ..."
(exec_with_retries conda run -n "${env_name}" python -m pip install -r requirements.txt) || return 1
# shellcheck disable=SC2086
(exec_with_retries 3 conda run ${env_prefix} python -m pip install -r requirements.txt) || return 1

echo "[BUILD] Running Doxygen build ..."
(exec_with_retries conda run -n "${env_name}" doxygen Doxyfile.in) || return 1
# shellcheck disable=SC2086
(exec_with_retries 3 conda run ${env_prefix} doxygen Doxyfile.in) || return 1

echo "[BUILD] Building HTML pages ..."
(exec_with_retries conda run -n "${env_name}" make html) || return 1
# shellcheck disable=SC2086
(exec_with_retries 3 conda run ${env_prefix} make html) || return 1

echo "[INSTALL] FBGEMM-GPU documentation build completed"
}
Loading

0 comments on commit 83f4183

Please sign in to comment.