Skip to content

Commit

Permalink
seperate prefilling for increamental decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
zwang86 committed Mar 29, 2024
1 parent 0479a64 commit 4ef856f
Show file tree
Hide file tree
Showing 4 changed files with 930 additions and 371 deletions.
2 changes: 2 additions & 0 deletions include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ struct Request {
RUNNING = 102, // running inference
COMPLETED = 103, // finished and verified
FINISHING = 104, // finishing request, but not yet verified
PREFILLING = 105 // prefilling the tree
};
BatchConfig::RequestGuid guid;
int max_sequence_length;
Expand Down Expand Up @@ -307,6 +308,7 @@ class RequestManager {
};
std::unordered_map<RequestGuid, ProfileInfo> profiling_requests;
double total_request_run_time;
BatchConfig buffer_bc = nullptr;
};

}; // namespace FlexFlow
Loading

0 comments on commit 4ef856f

Please sign in to comment.