Skip to content

Commit

Permalink
Add median, p5 and p95 latency to TFLite benchmark tool results.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 693171996
  • Loading branch information
tensorflower-gardener committed Nov 5, 2024
1 parent 94ed3de commit eb62448
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 16 deletions.
15 changes: 8 additions & 7 deletions tensorflow/lite/tools/benchmark/benchmark_model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ limitations under the License.

namespace tflite {
namespace benchmark {
using tensorflow::Stat;
using tensorflow::StatWithPercentiles;

constexpr int kMemoryCheckIntervalMs = 50;

Expand Down Expand Up @@ -218,10 +218,11 @@ TfLiteStatus BenchmarkModel::PrepareInputData() { return kTfLiteOk; }

TfLiteStatus BenchmarkModel::ResetInputsAndOutputs() { return kTfLiteOk; }

Stat<int64_t> BenchmarkModel::Run(int min_num_times, float min_secs,
float max_secs, RunType run_type,
TfLiteStatus* invoke_status) {
Stat<int64_t> run_stats;
StatWithPercentiles<int64_t> BenchmarkModel::Run(int min_num_times,
float min_secs, float max_secs,
RunType run_type,
TfLiteStatus* invoke_status) {
StatWithPercentiles<int64_t> run_stats;
TFLITE_LOG(INFO) << "Running benchmark for at least " << min_num_times
<< " iterations and at least " << min_secs << " seconds but"
<< " terminate if exceeding " << max_secs << " seconds.";
Expand Down Expand Up @@ -335,15 +336,15 @@ TfLiteStatus BenchmarkModel::Run() {
}

listeners_.OnBenchmarkStart(params_);
Stat<int64_t> warmup_time_us =
StatWithPercentiles<int64_t> warmup_time_us =
Run(params_.Get<int32_t>("warmup_runs"),
params_.Get<float>("warmup_min_secs"), params_.Get<float>("max_secs"),
WARMUP, &status);
if (status != kTfLiteOk) {
return status;
}

Stat<int64_t> inference_time_us =
StatWithPercentiles<int64_t> inference_time_us =
Run(params_.Get<int32_t>("num_runs"), params_.Get<float>("min_secs"),
params_.Get<float>("max_secs"), REGULAR, &status);
const auto overall_mem_usage =
Expand Down
20 changes: 11 additions & 9 deletions tensorflow/lite/tools/benchmark/benchmark_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ class BenchmarkResults {
BenchmarkResults() {}
BenchmarkResults(double model_size_mb, int64_t startup_latency_us,
uint64_t input_bytes,
tensorflow::Stat<int64_t> warmup_time_us,
tensorflow::Stat<int64_t> inference_time_us,
tensorflow::StatWithPercentiles<int64_t> warmup_time_us,
tensorflow::StatWithPercentiles<int64_t> inference_time_us,
const profiling::memory::MemoryUsage& init_mem_usage,
const profiling::memory::MemoryUsage& overall_mem_usage,
float peak_mem_mb)
Expand All @@ -60,10 +60,12 @@ class BenchmarkResults {
peak_mem_mb_(peak_mem_mb) {}

const double model_size_mb() const { return model_size_mb_; }
tensorflow::Stat<int64_t> inference_time_us() const {
tensorflow::StatWithPercentiles<int64_t> inference_time_us() const {
return inference_time_us_;
}
tensorflow::Stat<int64_t> warmup_time_us() const { return warmup_time_us_; }
tensorflow::StatWithPercentiles<int64_t> warmup_time_us() const {
return warmup_time_us_;
}
int64_t startup_latency_us() const { return startup_latency_us_; }
uint64_t input_bytes() const { return input_bytes_; }
double throughput_MB_per_second() const {
Expand All @@ -84,8 +86,8 @@ class BenchmarkResults {
double model_size_mb_ = 0.0;
int64_t startup_latency_us_ = 0;
uint64_t input_bytes_ = 0;
tensorflow::Stat<int64_t> warmup_time_us_;
tensorflow::Stat<int64_t> inference_time_us_;
tensorflow::StatWithPercentiles<int64_t> warmup_time_us_;
tensorflow::StatWithPercentiles<int64_t> inference_time_us_;
profiling::memory::MemoryUsage init_mem_usage_;
profiling::memory::MemoryUsage overall_mem_usage_;
// An invalid value could happen when we don't monitor memory footprint for
Expand Down Expand Up @@ -216,9 +218,9 @@ class BenchmarkModel {
// Get the model file size if it's available.
virtual int64_t MayGetModelFileSize() { return -1; }
virtual uint64_t ComputeInputBytes() = 0;
virtual tensorflow::Stat<int64_t> Run(int min_num_times, float min_secs,
float max_secs, RunType run_type,
TfLiteStatus* invoke_status);
virtual tensorflow::StatWithPercentiles<int64_t> Run(
int min_num_times, float min_secs, float max_secs, RunType run_type,
TfLiteStatus* invoke_status);
// Prepares input data for benchmark. This can be used to initialize input
// data that has non-trivial cost.
virtual TfLiteStatus PrepareInputData();
Expand Down

0 comments on commit eb62448

Please sign in to comment.