Skip to content

Commit

Permalink
Merge branch 'inference' into attn-qkv-proj
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro authored Oct 9, 2024
2 parents c5264c4 + ca3dabf commit 342f6a8
Show file tree
Hide file tree
Showing 14 changed files with 152 additions and 128 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,13 @@ jobs:
export FF_CUDA_ARCH=70
export FF_HIP_ARCH=gfx1100,gfx1036
export hip_version=5.6
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
export FF_BUILD_INFERENCE=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_TRAINING_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
else
export FF_BUILD_ALL_EXAMPLES=OFF
export FF_BUILD_TRAINING_EXAMPLES=OFF
export FF_BUILD_UNIT_TESTS=OFF
fi
Expand All @@ -106,13 +106,13 @@ jobs:
export FF_CUDA_ARCH=70
export FF_HIP_ARCH=gfx1100,gfx1036
export hip_version=5.6
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
export FF_BUILD_INFERENCE=ON
if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_TRAINING_EXAMPLES=ON
export FF_BUILD_UNIT_TESTS=ON
else
export FF_BUILD_ALL_EXAMPLES=OFF
export FF_BUILD_TRAINING_EXAMPLES=OFF
export FF_BUILD_UNIT_TESTS=OFF
fi
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/gpu-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ jobs:
export PATH=$CONDA_PREFIX/bin:$PATH
export FF_HOME=$(pwd)
export FF_USE_PREBUILT_LEGION=OFF #remove this after fixing python path issue in Legion
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
export FF_BUILD_INFERENCE=ON
mkdir build
cd build
../config/config.linux
Expand Down Expand Up @@ -262,8 +262,8 @@ jobs:
run: |
export PATH=$CONDA_PREFIX/bin:$PATH
export FF_HOME=$(pwd)
export FF_BUILD_ALL_EXAMPLES=ON
export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
export FF_BUILD_TRAINING_EXAMPLES=ON
export FF_BUILD_INFERENCE=ON
export FF_USE_PREBUILT_LEGION=OFF #remove this after fixing python path issue in Legion
pip install . --verbose
Expand Down
105 changes: 36 additions & 69 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,14 @@ include(legion)

# Not build FlexFlow if BUILD_LEGION_ONLY is ON
if(NOT BUILD_LEGION_ONLY)

# build binary options
option(FF_BUILD_INFERENCE "build all inference code and examples." ON)
option(FF_BUILD_TRAINING_EXAMPLES "build all training examples." OFF)
option(FF_BUILD_UNIT_TESTS "build non-operator unit tests" OFF)
option(FF_BUILD_SUBSTITUTION_TOOL "build substitution conversion tool" OFF)
option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" OFF)

# NCCL
if(FF_USE_NCCL)
if(FF_GPU_BACKEND STREQUAL "hip_cuda" OR FF_GPU_BACKEND STREQUAL "cuda")
Expand Down Expand Up @@ -271,18 +279,23 @@ if(NOT BUILD_LEGION_ONLY)
file(GLOB_RECURSE FLEXFLOW_HDR
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/include/*.h)

#list(APPEND FLEXFLOW_HDR ${FLEXFLOW_ROOT}/inference/file_loader.h)

file(GLOB_RECURSE FLEXFLOW_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cc)

list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc")
#list(APPEND FLEXFLOW_SRC ${FLEXFLOW_ROOT}/inference/file_loader.cc)

set(FLEXFLOW_CPP_DRV_SRC
${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc)
# exclude inference files if FF_BUILD_INFERENCE is off
if(NOT FF_BUILD_INFERENCE)
list(REMOVE_ITEM FLEXFLOW_HDR "${FLEXFLOW_ROOT}/include/request_manager.h")
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/request_manager.cc")
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/inference_manager.cc")
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/batch_config.cc")
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/beam_search_batch_config.cc")
list(REMOVE_ITEM FLEXFLOW_SRC "${FLEXFLOW_ROOT}/src/runtime/tree_verify_batch_config.cc")
endif()

set(FLEXFLOW_CPP_DRV_SRC ${FLEXFLOW_ROOT}/src/runtime/cpp_driver.cc)

add_library(substitution_loader SHARED
${FLEXFLOW_ROOT}/src/runtime/substitution_loader.cc)
Expand All @@ -297,6 +310,10 @@ if(NOT BUILD_LEGION_ONLY)
file(GLOB_RECURSE FLEXFLOW_GPU_SRC
LIST_DIRECTORIES False
${FLEXFLOW_ROOT}/src/*.cu)

if(NOT FF_BUILD_INFERENCE)
list(REMOVE_ITEM FLEXFLOW_GPU_SRC "${FLEXFLOW_ROOT}/src/runtime/request_manager.cu")
endif()

add_compile_definitions(FF_USE_CUDA)

Expand Down Expand Up @@ -452,27 +469,6 @@ if(NOT BUILD_LEGION_ONLY)
set_property(TARGET flexflow PROPERTY CXX_STANDARD 14)
endif()

# build binary
option(FF_BUILD_TOKENIZER "build tokenizer=cpp for LLM serving" OFF)
option(FF_BUILD_RESNET "build resnet example" OFF)
option(FF_BUILD_RESNEXT "build resnext example" OFF)
option(FF_BUILD_ALEXNET "build alexnet example" OFF)
option(FF_BUILD_DLRM "build DLRM example" OFF)
option(FF_BUILD_XDL "build XDL example" OFF)
option(FF_BUILD_INCEPTION "build inception example" OFF)
option(FF_BUILD_CANDLE_UNO "build candle uno example" OFF)
option(FF_BUILD_TRANSFORMER "build transformer example" OFF)
option(FF_BUILD_MOE "build mixture of experts example" OFF)
option(FF_BUILD_MLP_UNIFY "build mlp unify example" OFF)
option(FF_BUILD_SPLIT_TEST "build split test example" OFF)
option(FF_BUILD_SPLIT_TEST_2 "build split test 2 example" OFF)
option(FF_BUILD_MLP_UNIFY_INFERENCE "build mlp unify inference example" OFF)
option(FF_BUILD_ALL_INFERENCE_EXAMPLES "build all inference examples. Overrides others" OFF)
option(FF_BUILD_ALL_EXAMPLES "build all examples. Overrides others" OFF)
option(FF_BUILD_UNIT_TESTS "build non-operator unit tests" OFF)
option(FF_BUILD_SUBSTITUTION_TOOL "build substitution conversion tool" OFF)
option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" OFF)

if(FF_BUILD_UNIT_TESTS)
set(BUILD_GMOCK OFF)
add_subdirectory(deps/googletest)
Expand All @@ -488,89 +484,60 @@ if(NOT BUILD_LEGION_ONLY)
add_subdirectory(tools/substitutions_to_dot)
endif()

if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_TOKENIZER)
if(FF_BUILD_INFERENCE)
add_compile_definitions(FF_BUILD_INFERENCE)
# Ensure Rust is installed
execute_process(COMMAND rustc --version
RESULT_VARIABLE RUST_COMMAND_RESULT
OUTPUT_VARIABLE RUSTC_OUTPUT
ERROR_QUIET)
if(NOT RUST_COMMAND_RESULT EQUAL 0)
message(FATAL_ERROR "Rust is not installed on the system. Please install it by running: 'curl https://sh.rustup.rs -sSf | sh -s -- -y' and following the instructions on the screen.")
message(FATAL_ERROR
"Rust is not installed on the system. Please install it by running: \n"
"'curl https://sh.rustup.rs -sSf | sh -s -- -y' \n"
"and follow the instructions on the screen.")
endif()
# Ensure Cargo is installed
execute_process(COMMAND cargo --version
RESULT_VARIABLE CARGO_RESULT
OUTPUT_QUIET ERROR_QUIET)
if(NOT CARGO_RESULT EQUAL 0)
message(FATAL_ERROR "Rust is installed, but cargo is not. Please install it by running: 'curl https://sh.rustup.rs -sSf | sh -s -- -y' and following the instructions on the screen.")
message(FATAL_ERROR
"Rust is installed, but cargo is not. Please install it by running: \n"
"'curl https://sh.rustup.rs -sSf | sh -s -- -y' \n"
"and follow the instructions on the screen.")
endif()
set(MLC_ENABLE_SENTENCEPIECE_TOKENIZER ON)
add_subdirectory(deps/tokenizers-cpp tokenizers EXCLUDE_FROM_ALL)
target_include_directories(flexflow PUBLIC deps/tokenizers-cpp/include)
target_link_libraries(flexflow tokenizers_cpp)
endif()
if(FF_BUILD_RESNET OR FF_BUILD_ALL_EXAMPLES)

if (FF_BUILD_TRAINING_EXAMPLES)
add_subdirectory(examples/cpp/ResNet)
endif()

if(FF_BUILD_RESNEXT OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/resnext50)
endif()

if(FF_BUILD_ALEXNET OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/AlexNet)
endif()

if(FF_BUILD_MLP_UNIFY OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/MLP_Unify)
endif()

if(FF_BUILD_SPLIT_TEST OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/split_test)
endif()

if(FF_BUILD_SPLIT_TEST_2 OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/split_test_2)
endif()

if(FF_BUILD_INCEPTION OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/InceptionV3)
endif()

#TODO: Once functional add to BUILD_ALL_EXAMPLES
if(FF_BUILD_CANDLE_UNO OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/candle_uno)
endif()

if(FF_BUILD_DLRM OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/DLRM)

#add_executable(generate_dlrm_hetero_strategy src/runtime/dlrm_strategy_hetero.cc)
#target_include_directories(generate_dlrm_hetero_strategy PUBLIC ${FLEXFLOW_INCLUDE_DIRS})

#add_executable(generate_dlrm_strategy src/runtime/dlrm_strategy.cc)
#target_include_directories(generate_dlrm_strategy PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
endif()

if(FF_BUILD_XDL OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/XDL)
endif()

if(FF_BUILD_TRANSFORMER OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/Transformer)
endif()

if(FF_BUILD_MOE OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/mixture_of_experts)
endif()

if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_ALL_EXAMPLES)
if(FF_BUILD_INFERENCE)
add_subdirectory(inference/spec_infer)
add_subdirectory(inference/incr_decoding)
add_subdirectory(inference/peft)
endif()


# installation
set(INCLUDE_DEST "include")
set(LIB_DEST "lib")
Expand Down
20 changes: 10 additions & 10 deletions config/config.inc
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,19 @@ elif [ "$FF_LEGION_NETWORKS" = "ucx" ]; then
fi

# build C++ examples
if [ "$FF_BUILD_ALL_EXAMPLES" = "ON" ]; then
SET_EXAMPLES="-DFF_BUILD_ALL_EXAMPLES=ON"
elif [ "$FF_BUILD_ALL_EXAMPLES" = "OFF" ]; then
SET_EXAMPLES="-DFF_BUILD_ALL_EXAMPLES=OFF"
if [ "$FF_BUILD_TRAINING_EXAMPLES" = "ON" ]; then
SET_EXAMPLES="-DFF_BUILD_TRAINING_EXAMPLES=ON"
elif [ "$FF_BUILD_TRAINING_EXAMPLES" = "OFF" ]; then
SET_EXAMPLES="-DFF_BUILD_TRAINING_EXAMPLES=OFF"
else
SET_EXAMPLES="-DFF_BUILD_ALL_EXAMPLES=ON"
SET_EXAMPLES="-DFF_BUILD_TRAINING_EXAMPLES=ON"
fi
if [ "$FF_BUILD_ALL_INFERENCE_EXAMPLES" = "ON" ]; then
SET_INFERENCE_EXAMPLES="-DFF_BUILD_ALL_INFERENCE_EXAMPLES=ON"
elif [ "$FF_BUILD_ALL_INFERENCE_EXAMPLES" = "OFF" ]; then
SET_INFERENCE_EXAMPLES="-DFF_BUILD_ALL_INFERENCE_EXAMPLES=OFF"
if [ "$FF_BUILD_INFERENCE" = "ON" ]; then
SET_INFERENCE_EXAMPLES="-DFF_BUILD_INFERENCE=ON"
elif [ "$FF_BUILD_INFERENCE" = "OFF" ]; then
SET_INFERENCE_EXAMPLES="-DFF_BUILD_INFERENCE=OFF"
else
SET_INFERENCE_EXAMPLES="-DFF_BUILD_ALL_INFERENCE_EXAMPLES=ON"
SET_INFERENCE_EXAMPLES="-DFF_BUILD_INFERENCE=ON"
fi

# enable C++ unit tests
Expand Down
6 changes: 3 additions & 3 deletions config/config.linux
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT:-ibv}
UCX_DIR=${UCX_DIR:-""}

# build C++ examples
FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES:-OFF}
FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES:-ON}
FF_BUILD_TRAINING_EXAMPLES=${FF_BUILD_TRAINING_EXAMPLES:-OFF}
FF_BUILD_INFERENCE=${FF_BUILD_INFERENCE:-ON}

# build C++ unit tests
FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS:-OFF}
Expand Down Expand Up @@ -108,7 +108,7 @@ fi

function get_build_configs() {
# Create a string with the values of the variables set in this script
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDA_DIR=${CUDA_DIR} CUDNN_DIR=${CUDNN_DIR} CUBLAS_DIR=${CUBLAS_DIR} CURAND_DIR=${CURAND_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDA_DIR=${CUDA_DIR} CUDNN_DIR=${CUDNN_DIR} CUBLAS_DIR=${CUBLAS_DIR} CURAND_DIR=${CURAND_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_TRAINING_EXAMPLES=${FF_BUILD_TRAINING_EXAMPLES} FF_BUILD_INFERENCE=${FF_BUILD_INFERENCE} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
}

if [[ -n "$1" && ( "$1" == "CMAKE_FLAGS" || "$1" == "CUDA_PATH" ) ]]; then
Expand Down
4 changes: 2 additions & 2 deletions spack/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ def cmake_args(self):
options.append('-DFF_USE_NCCL=OFF')

if '+examples' in spec:
options.append('-DFF_BUILD_ALL_EXAMPLES=ON')
options.append('-DFF_BUILD_TRAINING_EXAMPLES=ON')
else:
options.append('-DFF_BUILD_ALL_EXAMPLES=OFF')
options.append('-DFF_BUILD_TRAINING_EXAMPLES=OFF')

if '+avx2' in spec:
options.append('-DFF_USE_AVX2=ON')
Expand Down
12 changes: 12 additions & 0 deletions src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "flexflow/flexflow_c.h"
#include "flexflow/dataloader.h"
#include "flexflow/mapper.h"
#ifdef FF_BUILD_INFERENCE
#include "flexflow/request_manager.h"
#endif
#include "flexflow/utils/file_loader.h"

using namespace Legion;
Expand Down Expand Up @@ -58,6 +60,7 @@ class FFCObjectWrapper {
FF_NEW_OPAQUE_WRAPPER(flexflow_dlrm_config_t, DLRMConfig *);
FF_NEW_OPAQUE_WRAPPER(flexflow_single_dataloader_t, SingleDataLoader *);
// inference
#ifdef FF_BUILD_INFERENCE
FF_NEW_OPAQUE_WRAPPER(flexflow_batch_config_t, BatchConfig *);
FF_NEW_OPAQUE_WRAPPER(flexflow_tree_verify_batch_config_t,
TreeVerifyBatchConfig *);
Expand All @@ -74,6 +77,7 @@ class FFCObjectWrapper {
// LoraAdamOptimizerConfig *);
FF_NEW_OPAQUE_WRAPPER(flexflow_lora_linear_config_t, LoraLinearConfig *);
FF_NEW_OPAQUE_WRAPPER(flexflow_peft_model_id_t, PEFTModelID *);
#endif
};

Logger ffc_log("flexflow_c");
Expand Down Expand Up @@ -1603,6 +1607,7 @@ flexflow_tensor_t flexflow_model_add_argmax(flexflow_model_t handle_,
return FFCObjectWrapper::wrap(tensor);
}

#ifdef FF_BUILD_INFERENCE
flexflow_peft_model_id_t flexflow_model_add_lora_layer(
flexflow_model_t handle_,
const flexflow_lora_linear_config_t peft_config_) {
Expand All @@ -1617,6 +1622,7 @@ flexflow_peft_model_id_t flexflow_model_add_lora_layer(
peft_model_id);
return FFCObjectWrapper::wrap(peft_model_id);
}
#endif

void flexflow_model_set_sgd_optimizer(flexflow_model_t handle_,
flexflow_sgd_optimizer_t optimizer_) {
Expand Down Expand Up @@ -1671,6 +1677,7 @@ void flexflow_model_set_transformer_layer_id(flexflow_model_t handle_, int id) {
handle->set_transformer_layer_id(id);
}

#ifdef FF_BUILD_INFERENCE
void flexflow_model_generate(flexflow_model_t handle_,
int num_requests,
enum RequestType *request_types,
Expand Down Expand Up @@ -1751,6 +1758,7 @@ void flexflow_model_generate(flexflow_model_t handle_,
}
}
}
#endif

void flexflow_model_set_position_offset(flexflow_model_t handle_,
int const offset) {
Expand Down Expand Up @@ -2638,6 +2646,8 @@ void flexflow_perform_registration(void) {
true /*global*/);
}

#ifdef FF_BUILD_INFERENCE

// -----------------------------------------------------------------------
// BatchConfig
// -----------------------------------------------------------------------
Expand Down Expand Up @@ -3106,3 +3116,5 @@ void flexflow_peft_model_id_destroy(flexflow_peft_model_id_t handle_) {
DEBUG_PRINT("[PEFTModelID] delete %p", peft_model_id);
delete peft_model_id;
}

#endif
2 changes: 1 addition & 1 deletion src/ops/beam_topk.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

#include "flexflow/ffconst_utils.h"
#include "flexflow/ops/beam_topk.h"
#include "flexflow/request_manager.h"
// #include "flexflow/request_manager.h"
#include "flexflow/utils/cuda_helper.h"

namespace FlexFlow {
Expand Down
Loading

0 comments on commit 342f6a8

Please sign in to comment.