From 093b29d3ed0e159bf1702de024370ee24b92a755 Mon Sep 17 00:00:00 2001 From: Zhihao Jia Date: Tue, 2 Jan 2024 18:50:49 -0500 Subject: [PATCH] bug fixes and update Legion version --- deps/legion | 2 +- src/ops/linear.cc | 8 ++------ src/runtime/model.cu | 1 - src/runtime/request_manager.cc | 7 ++----- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/deps/legion b/deps/legion index 626b55689c..d065278678 160000 --- a/deps/legion +++ b/deps/legion @@ -1 +1 @@ -Subproject commit 626b55689c77848b246e1da19678c7ad58899f0c +Subproject commit d0652786784249e933dd62f675591da99a5e960d diff --git a/src/ops/linear.cc b/src/ops/linear.cc index 63b26bfe7d..93e93a5953 100644 --- a/src/ops/linear.cc +++ b/src/ops/linear.cc @@ -467,12 +467,8 @@ OpMeta *Linear::init_task_with_dim(Task const *task, ctx, runtime, false /*readOutput*/); - TensorAccessorW acc_kernel(regions[2], - task->regions[2], - FID_DATA, - ctx, - runtime, - false /*readOutput*/); + TensorAccessorR acc_kernel( + regions[2], task->regions[2], FID_DATA, ctx, runtime); // TensorAccessorR acc_bias( // regions[3], task->regions[3], FID_DATA, ctx, runtime); diff --git a/src/runtime/model.cu b/src/runtime/model.cu index c885b29db2..23b7f0efbe 100644 --- a/src/runtime/model.cu +++ b/src/runtime/model.cu @@ -175,7 +175,6 @@ FFHandler } else { handle.batch_config_metadata = nullptr; } - // checkCUDA(cudaMalloc(&handle.workSpace, handle.workSpaceSize)); #ifdef FF_USE_NCCL diff --git a/src/runtime/request_manager.cc b/src/runtime/request_manager.cc index 88754f5a82..a285932b7f 100644 --- a/src/runtime/request_manager.cc +++ b/src/runtime/request_manager.cc @@ -1188,10 +1188,7 @@ BeamSearchBatchConfig int ssm_decoding_steps = profiling_requests[request.guid].ssm_decoding_steps; - new_bc.beamRequestsInfo[i].beam_size = - spec_infer_tree_width.size() > ssm_decoding_steps - ? spec_infer_tree_width[ssm_decoding_steps] - : 1; + new_bc.beamRequestsInfo[i].beam_size = 1; // printf("beam size: %d, %d\n", // new_bc.beamRequestsInfo[i].beam_size, // ssm_decoding_steps); @@ -1820,7 +1817,7 @@ void RequestManager::updateBitMask(BatchConfig::BitMask &bitmask, void RequestManager::appendPendingRequest(BatchConfig::BitMask &bitmask, int initLength) { assert(initLength > 0); - std::cout << "append pending bit mask: " << initLength << "\n"; + // std::cout << "append pending bit mask: " << initLength << "\n"; // eg. 4 tokens: t1: 0000000..1111, t2: 0000000..1110, t3: 0000000..1100, t4: // 0000000..1000 bitmask.non_tree_cache_size = 0;