From f7e777b2689b639ed7aaba1bf15640eb1f1ad1c5 Mon Sep 17 00:00:00 2001 From: Ravenwater Date: Mon, 14 Aug 2023 21:54:59 -0400 Subject: [PATCH] quantization benchmark for fp8/fp12 --- CMakeLists.txt | 1 + .../accuracy/quantization/CMakeLists.txt | 3 + benchmark/accuracy/quantization/mpfma.cpp | 127 ++++++++++++++++++ include/universal/blas/blas_l3.hpp | 4 +- 4 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 benchmark/accuracy/quantization/CMakeLists.txt create mode 100644 benchmark/accuracy/quantization/mpfma.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 760d7aa18..fa8489341 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -830,6 +830,7 @@ endif(BUILD_BENCHMARK_ERROR) # accuracy benchmarks if(BUILD_BENCHMARK_ACCURACY) add_subdirectory("benchmark/accuracy/blas") +add_subdirectory("benchmark/accuracy/quantization") endif(BUILD_BENCHMARK_ACCURACY) # reproducibility benchmarks diff --git a/benchmark/accuracy/quantization/CMakeLists.txt b/benchmark/accuracy/quantization/CMakeLists.txt new file mode 100644 index 000000000..d7c651eb6 --- /dev/null +++ b/benchmark/accuracy/quantization/CMakeLists.txt @@ -0,0 +1,3 @@ +file (GLOB SOURCES "./*.cpp") + +compile_all("true" "accuracy" "Benchmarks/Accuracy/quantization" "${SOURCES}") diff --git a/benchmark/accuracy/quantization/mpfma.cpp b/benchmark/accuracy/quantization/mpfma.cpp new file mode 100644 index 000000000..195c40bee --- /dev/null +++ b/benchmark/accuracy/quantization/mpfma.cpp @@ -0,0 +1,127 @@ +// mpfma.cpp: accuracy/quantization measurement of mixed-precision dot products +// +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. +// +// This file is part of the universal numbers project, which is released under an MIT Open Source license. +#include +#include +#include +#include + +constexpr unsigned FIELD_WIDTH = 8; + +template +void QuantizationExperiment(unsigned nrSamples, unsigned vectorSize, double mean = 0.0, double stddev = 1.0, bool verbose = false) { + using namespace sw::universal; + + std::cout << "Experiment: nrSamples(" << nrSamples << ") vectorSize(" << vectorSize << ") mean(" << mean << ") stddev(" << stddev << ")\n"; + int64_t L{ vectorSize }, N{ nrSamples }; + blas::vector reference_data(L); + blas::vector y_data(L); + gaussian_random(y_data, mean, stddev); + blas::vector sorted(L); + blas::vector quantized_data(L), quantized_sorted(L), quantized_y(L); + quantized_y = y_data; + blas::vector upSampled(L); + blas::vector y(L); + blas::vector ySorted(L); + y = quantized_y; // upsample the y vector for the dot product x * y + ySorted = quantized_y; + std::sort(ySorted.begin(), ySorted.end()); + auto ySortedAvg = sum(ySorted) / L; + if (verbose) { + std::cout << std::setw(8) << "" + << "[ " + << std::setw(FIELD_WIDTH) << ySorted[0] << " ... " + << std::setw(FIELD_WIDTH) << ySortedAvg << " ... " + << std::setw(FIELD_WIDTH) << ySorted[L - 1] << "]\n"; + } + + blas::vector dotProduct(N); + double experimentalMean{ 0.0 }; + double quantizedMean{ 0.0 }; + for (unsigned i = 0; i < N; ++i) { + gaussian_random(reference_data, mean, stddev); + sorted = reference_data; + std::sort(sorted.begin(), sorted.end()); + auto sorted_avg = blas::sum(sorted) / L; + experimentalMean += sorted_avg; + + quantized_data = reference_data; + auto quantized_avg = double(blas::sum(quantized_data)) / L; + quantizedMean += quantized_avg; + + // dot products in AccumulationType + upSampled = quantized_data; + dotProduct[i] = upSampled * y; + + if (verbose) { + quantized_sorted = sorted; + std::cout << std::setw(8) << i + << "[ " + << std::setw(FIELD_WIDTH) << sorted[0] << " ... " + << std::setw(FIELD_WIDTH) << sorted_avg << " ... " + << std::setw(FIELD_WIDTH) << sorted[L - 1] << "]\n"; + std::cout << std::setw(8) << "" + << "[ " + << std::setw(FIELD_WIDTH) << quantized_sorted[0] << " ... " + << std::setw(FIELD_WIDTH) << quantized_avg << " ... " + << std::setw(FIELD_WIDTH) << quantized_sorted[L - 1] << "]\n"; + } + } + std::cout << "experimental mean : " << (experimentalMean / N) << '\n'; + std::cout << "quantized mean : " << (quantizedMean / N) << '\n'; + + AccumulationType avg = sum(dotProduct) / N; + std::cout << "dot product mean : " << avg << '\n'; +} + +int main(int argc, char** argv) +try { + using namespace sw::universal; + + std::streamsize prec = std::cout.precision(); + std::cout << std::setprecision(3); + + // generate a set of N vectors of length L in double as reference + using fp8 = fp8e2m5; + using fp12 = cfloat<12, 5, uint16_t, true, true, false>; // accumulation type + + double mean{ 0.0 }, stddev{ 1.0 }; + std::cout << "representation type : " << symmetry_range() << '\n'; + std::cout << "accumulation type : " << symmetry_range() << '\n'; + unsigned nrSamples{ 10000 }; + QuantizationExperiment(nrSamples, 50, mean, stddev); + QuantizationExperiment(nrSamples, 100, mean, stddev); + QuantizationExperiment(nrSamples, 200, mean, stddev); + QuantizationExperiment(nrSamples, 400, mean, stddev); + QuantizationExperiment(nrSamples, 600, mean, stddev); + QuantizationExperiment(nrSamples, 800, mean, stddev); + QuantizationExperiment(nrSamples, 1000, mean, stddev); + QuantizationExperiment(nrSamples, 2000, mean, stddev); + QuantizationExperiment(nrSamples, 4000, mean, stddev); + + std::cout << std::setprecision(prec); + + return EXIT_SUCCESS; +} +catch (char const* msg) { + std::cerr << msg << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_arithmetic_exception& err) { + std::cerr << "Uncaught universal arithmetic exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const sw::universal::universal_internal_exception& err) { + std::cerr << "Uncaught universal internal exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (const std::runtime_error& err) { + std::cerr << "Uncaught runtime exception: " << err.what() << std::endl; + return EXIT_FAILURE; +} +catch (...) { + std::cerr << "Caught unknown exception" << std::endl; + return EXIT_FAILURE; +} diff --git a/include/universal/blas/blas_l3.hpp b/include/universal/blas/blas_l3.hpp index b66fe98fb..ccd5af377 100644 --- a/include/universal/blas/blas_l3.hpp +++ b/include/universal/blas/blas_l3.hpp @@ -1,7 +1,7 @@ #pragma once // blas_l3.hpp: BLAS Level 3 functions // -// Copyright (C) 2017-2022 Stillwater Supercomputing, Inc. +// Copyright (C) 2017-2023 Stillwater Supercomputing, Inc. // // This file is part of the universal numbers project, which is released under an MIT Open Source license. @@ -9,7 +9,7 @@ namespace sw { namespace universal { namespace blas { // sum entire matrix (dim == 0), all rows (dim == 1), or all columns (dim == 2) template -vector sum(Matrix& A, int dim = 0) { +vector sumOfElements(Matrix& A, int dim = 0) { using value_type = typename Matrix::value_type; using size_type = typename Matrix::size_type;