Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
xinhaoc committed Dec 21, 2023
1 parent 40d830c commit e825526
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 17 deletions.
14 changes: 10 additions & 4 deletions src/runtime/optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ void AdamOptimizer::unified_update(std::vector<ParallelTensor> const parameters)
}

int offset = 0;
printf("param size: %d, %d\n", parameters.size(), parameters_num);
// printf("param size: %d, %d\n", parameters.size(), parameters_num);

while(processed_parameters_num < parameters.size()){

Expand All @@ -540,7 +540,7 @@ void AdamOptimizer::unified_update(std::vector<ParallelTensor> const parameters)
assert(p->parallel_is != IndexSpace::NO_SPACE);
}

printf("parameters_num: %d %d, %d\n", parameters_num, reservedWorkSpaceSize, model->handlers->workSpaceSize);
// printf("parameters_num: %d %d, %d\n", parameters_num, reservedWorkSpaceSize, model->handlers->workSpaceSize);
assert(parameters_num <= parameters.size());

IndexLauncher launcher(ADAM_UNIFY_UPD_NCCL_TASK_ID,
Expand Down Expand Up @@ -729,7 +729,12 @@ void AdamOptimizer::nccl_unified_update_task(Task const *task,

float const *w_grad_ptr[op->parameters_num];
float *w_ptr[op->parameters_num], *v_ptr[op->parameters_num], *m_ptr[op->parameters_num];
size_t size[op->parameters_num];

hipMalloc(w_grad_ptr, sizeof(float*) * op->parameters_num);
hipMalloc(w_ptr, sizeof(float*) * op->parameters_num);
hipMalloc(v_ptr, sizeof(float*) * op->parameters_num);
hipMalloc(m_ptr, sizeof(float*) * op->parameters_num);
size_t *size = new size_t[op->parameters_num];
int offset = 0;

printf("parameters_num: %d\n", op->parameters_num);
Expand All @@ -741,10 +746,11 @@ void AdamOptimizer::nccl_unified_update_task(Task const *task,
GenericTensorAccessorW accM = helperGetGenericTensorAccessorWO(DataType::DT_FLOAT, regions[offset+3], task->regions[offset+3], FID_DATA, ctx, runtime);
offset += 4;

size[i] = accW.domain.get_volume();
size[i] = accWGrad.domain.get_volume();
// assert(accWGrad.rect == accW.rect);
// assert(accWGrad.rect == accV.rect);
// assert(accWGrad.rect == accM.rect);
w_grad_ptr[i] = accWGrad.get_float_ptr();
w_ptr[i] = accW.get_float_ptr();
v_ptr[i] = accV.get_float_ptr();
m_ptr[i] = accM.get_float_ptr();
Expand Down
19 changes: 6 additions & 13 deletions src/runtime/optimizer_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,18 +262,11 @@ __host__ void AdamOptimizer::nccl_unified_update_task_gpu(AdamOptimizer const *o
void *workSpace_ptr = meta->handle.workSpace;

for(int i = 0; i < op->parameters_num; i++){
// hipMemcpyAsync(static_cast<float*>(workSpace_ptr),
// w_grad_ptr[i],
// size[i] * sizeof(float),
// hipMemcpyDeviceToDevice,
// stream);
// hipError_t error = hipGetLastError();
// if(error != hipSuccess)
// {
// // print the CUDA error message and exit
// printf("CUDA error: %s\n", hipGetErrorString(error));
// }

hipMemcpyAsync(workSpace_ptr,
w_grad_ptr[i],
size[i] * sizeof(float),
hipMemcpyDeviceToDevice,
stream);
workSpace_ptr = static_cast<char *>(workSpace_ptr) + size[i] * sizeof(float);
}

Expand All @@ -292,7 +285,7 @@ __host__ void AdamOptimizer::nccl_unified_update_task_gpu(AdamOptimizer const *o
float beta2_t = op->beta2_t;
for(int i = 0; i < op->parameters_num; i++){
// update
std::cout<<"update"<<"\n";
// printf("update %d\n", i);
hipLaunchKernelGGL(HIP_KERNEL_NAME(adam_update),
GET_BLOCKS(size[i]),
CUDA_NUM_THREADS,
Expand Down

0 comments on commit e825526

Please sign in to comment.