Skip to content

Commit

Permalink
Merge branch 'inference' into bug_fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro authored Jan 9, 2024
2 parents c8d2cd1 + ba4af39 commit d37f5c5
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 17 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ python/flexflow/core/legion_cffi_header.py
/inference/tokenizer/*
/inference/prompt/*
/inference/output/*

/tests/inference/python_test_configs/*.json
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,5 @@ gpt_tokenizer
# pip version
python/flexflow/version.txt

inference_tensors
inference_tensors
tests/inference/python_test_configs/*.json
23 changes: 14 additions & 9 deletions include/flexflow/batch_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,9 +167,10 @@ class BeamSearchBatchConfig : public BatchConfig {
int current_depth = -1;
int max_depth = MAX_BEAM_DEPTH;

BatchConfig::TokenId tokens[BeamSearchBatchConfig::MAX_BEAM_WIDTH];
float probs[BeamSearchBatchConfig::MAX_BEAM_WIDTH];
int parent_id[BeamSearchBatchConfig::MAX_BEAM_WIDTH];
BatchConfig::TokenId
tokens[BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
float probs[BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
int parent_id[BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
int sub_request_num;
};

Expand All @@ -178,10 +179,11 @@ class BeamSearchBatchConfig : public BatchConfig {
};

BeamSearchPerRequestInfo beamRequestsInfo[MAX_NUM_REQUESTS];
BeamSearchPerTokenInfo beamTokenInfo[MAX_NUM_TOKENS * MAX_BEAM_WIDTH];
BeamSearchPerTokenInfo
beamTokenInfo[MAX_NUM_TOKENS +
MAX_SPEC_TREE_TOKEN_NUM * MAX_NUM_REQUESTS];

// why is this == MAX_NUM_REQUESTS * MAX_BEAM_WIDTH?
int sub_requests[MAX_NUM_REQUESTS * MAX_BEAM_WIDTH];
int sub_requests[MAX_NUM_REQUESTS];

private:
size_t current_iteration;
Expand All @@ -190,9 +192,12 @@ class BeamSearchBatchConfig : public BatchConfig {
struct BeamInferenceResult {
static int const MAX_NUM_TOKENS = BatchConfig::MAX_NUM_TOKENS;
BatchConfig::TokenId
token_ids[MAX_NUM_TOKENS * BeamSearchBatchConfig::MAX_BEAM_WIDTH];
float probs[MAX_NUM_TOKENS * BeamSearchBatchConfig::MAX_BEAM_WIDTH];
int parent_id[MAX_NUM_TOKENS * BeamSearchBatchConfig::MAX_BEAM_WIDTH];
token_ids[MAX_NUM_TOKENS *
BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
float probs[MAX_NUM_TOKENS *
BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
int parent_id[MAX_NUM_TOKENS *
BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
};

}; // namespace FlexFlow
2 changes: 1 addition & 1 deletion include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ struct BeamTree {
struct treeLayer {
BeamSearchBatchConfig::TokenId
tokens[BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
int parent_ids[BeamSearchBatchConfig::MAX_BEAM_WIDTH];
int parent_ids[BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
float probs[BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES];
int nodes_num_this_layer = 0;
};
Expand Down
2 changes: 1 addition & 1 deletion python/flexflow/core/flexflow_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_c_name(name):
if name is None:
return ffi.NULL
else:
return ffi.new("char[]", name.encode("ascii"))
return ffi.new("char[]", name.encode("utf-8"))


def get_datatype_size(datatype):
Expand Down
6 changes: 5 additions & 1 deletion src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,11 @@ flexflow_generation_result_t
GenerationResult result = handle->generate(prompts, max_seq_length);
DEBUG_PRINT(
"[Model] generate %p %s %i", handle, text_str.c_str(), max_seq_length);
assert(result.output_tokens.size() <= max_seq_length);
// If the prompt exceeds max seq len, check that we return the prompt with no
// additional token. Otherwise, check that the output does not exceed the max
// sequence length.
assert(result.output_tokens.size() <= max_seq_length ||
result.output_tokens.size() == result.input_tokens.size());
output_length_and_tokens[0] = result.output_tokens.size();
std::copy(result.output_tokens.begin(),
result.output_tokens.end(),
Expand Down
14 changes: 10 additions & 4 deletions src/runtime/request_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ std::string LoadBytesFromFile(std::string const &path) {
}

RequestManager::RequestManager()
: verbose(false), next_available_guid(1000000), num_processed_requests(0) {
: verbose(false), next_available_guid(1000000), num_processed_requests(0),
total_request_run_time(0.0f) {
// The following config parameters are set
// during ffmodel.compile()
// Initialize them to -1 to make sure no one
Expand Down Expand Up @@ -767,7 +768,9 @@ BeamSearchBatchConfig
: 1;
new_bc.beamRequestsInfo[i].max_depth =
std::min(new_max_depth, BeamSearchBatchConfig::MAX_BEAM_DEPTH);
for (int j = 0; j < BeamSearchBatchConfig::MAX_BEAM_WIDTH; j++) {
for (int j = 0;
j < BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES;
j++) {
new_bc.beamRequestsInfo[i].parent_id[j] = 0;
new_bc.beamRequestsInfo[i].probs[j] = 1;
}
Expand Down Expand Up @@ -840,7 +843,8 @@ BeamSearchBatchConfig
? spec_infer_tree_width[ssm_decoding_steps]
: 1;
new_bc.beamRequestsInfo[i].max_depth = 0;
for (int j = 0; j < BeamSearchBatchConfig::MAX_BEAM_WIDTH; j++) {
for (int j = 0; j < BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES;
j++) {
new_bc.beamRequestsInfo[i].parent_id[j] = 0;
new_bc.beamRequestsInfo[i].probs[j] = 1;
}
Expand Down Expand Up @@ -900,7 +904,9 @@ BeamSearchBatchConfig
std::min(BeamSearchBatchConfig::MAX_BEAM_DEPTH,
get_max_tokens_per_batch() -
new_bc.requestsInfo[i].num_tokens_in_batch - 1);
for (int j = 0; j < BeamSearchBatchConfig::MAX_BEAM_WIDTH; j++) {
for (int j = 0;
j < BeamSearchBatchConfig::MAX_SPECULATIVE_TREE_BRANCHES;
j++) {
new_bc.beamRequestsInfo[i].parent_id[j] = 0;
new_bc.beamRequestsInfo[i].probs[j] = 1;
}
Expand Down
1 change: 1 addition & 0 deletions tests/inference/python_inference_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -e
cd "${BASH_SOURCE[0]%/*}"

# Generate test configs
rm -rf python_test_configs/*.json
python python_test_configs/generate_configs.py

# Run all tests
Expand Down

0 comments on commit d37f5c5

Please sign in to comment.