diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index fd38329a1fa..3df0d2a15be 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -730,15 +730,16 @@ Status ExecutorState::ProcessSync( } else if (kernel_stats_->HasExpensiveMarker(item)) { KernelTimer timer; + static uint64 update_counter = 0; device->Compute(op_kernel, &ctx); - // For expensive kernels, always update the cost estimate. For inexpensive - // kernels, update the cost estimate with ~1/16 probability. This assumes - // that the last 4 bits of the CPU cycle count is uniformly distributed. + constexpr int kKernelExecutionTrackingInvocationSkipCount = 16; if (is_expensive || - timer.start_cycles % kKernelExecutionTrackingInvocationSkipCount == 0) { + update_counter % kKernelExecutionTrackingInvocationSkipCount == 0) { kernel_stats_->UpdateCostEstimate(item, timer.ElapsedCycles()); } + + update_counter++; } else { device->Compute(op_kernel, &ctx); }