Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Oct 7, 2024
1 parent 673184d commit 4a3d1bd
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/runtime/request_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3053,15 +3053,15 @@ void RequestManager::serve_suffix_decoding(FFModel *llm) {
auto const &next_batch = batch_pipeline.back();

BeamSearchBatchConfigFuture beam_bcf = prepare_next_batch_init(next_batch.first, next_batch.second, 0, ctx, runtime);
FutureMap fm = im->inference(ssm, 0, beam_bcf);
FutureMap fm = im->suffix_decode(llm, 0, beam_bcf);
assert(fm.get_future_map_domain().get_volume() == 1);
BeamInferenceResultFuture beam_irf = fm.get_future(0);
beam_bcf = prepare_next_batch_beam(beam_bcf, beam_irf, ctx, runtime);
std::vector<BeamSearchBatchConfigFuture> beam_bcf_vec(1);
beam_bcf_vec[0] = beam_bcf;
// Token Tree Verification
{
TreeVerifyBatchConfigFuture tree_bcf = prepare_next_batch_verify(beam_bcf, ctx, runtime);
TreeVerifyBatchConfigFuture tree_bcf = prepare_next_batch_verify(beam_bcf_vec, ctx, runtime);
FutureMap fm = im->inference(llm, 0, tree_bcf);
assert(fm.get_future_map_domain().get_volume() == 1);
InferenceResultFuture tree_irf = fm.get_future(0);
Expand Down

0 comments on commit 4a3d1bd

Please sign in to comment.