From a2f695d1a93d1fb214074683df5f0b3f2c144fba Mon Sep 17 00:00:00 2001 From: Dane Wagner Date: Thu, 1 Feb 2024 22:36:21 -0600 Subject: [PATCH] Refactor waveform aligner and add test --- src/libprojectM/Audio/WaveformAligner.cpp | 212 +++++++++++++--------- src/libprojectM/Audio/WaveformAligner.hpp | 17 +- tests/libprojectM/CMakeLists.txt | 1 + tests/libprojectM/WaveformAlignerTest.cpp | 83 +++++++++ 4 files changed, 222 insertions(+), 91 deletions(-) create mode 100644 tests/libprojectM/WaveformAlignerTest.cpp diff --git a/src/libprojectM/Audio/WaveformAligner.cpp b/src/libprojectM/Audio/WaveformAligner.cpp index d0e3f1385..b784fb5bb 100644 --- a/src/libprojectM/Audio/WaveformAligner.cpp +++ b/src/libprojectM/Audio/WaveformAligner.cpp @@ -8,8 +8,9 @@ namespace Audio { WaveformAligner::WaveformAligner() { - static const size_t maxOctaves{10}; - static const size_t numOctaves{static_cast(std::floor(std::log(AudioBufferSamples - WaveformSamples) / std::log(2.0f)))}; + static const uint32_t maxOctaves{10}; + // floor(log2(96)) = log2(64) = 6 + static const uint32_t numOctaves{static_cast(std::floor(std::log2f(AudioBufferSamples - WaveformSamples)))}; m_octaves = numOctaves > maxOctaves ? maxOctaves : numOctaves; m_aligmentWeights.resize(m_octaves); @@ -21,110 +22,109 @@ WaveformAligner::WaveformAligner() m_octaveSamples[0] = AudioBufferSamples; m_octaveSampleSpacing[0] = AudioBufferSamples - WaveformSamples; - for (size_t octave = 1; octave < m_octaves; octave++) + for (uint32_t octave = 1; octave < m_octaves; octave++) { m_octaveSamples[octave] = m_octaveSamples[octave - 1] / 2; m_octaveSampleSpacing[octave] = m_octaveSampleSpacing[octave - 1] / 2; } } -void WaveformAligner::Align(WaveformBuffer& newWaveform) +void WaveformAligner::GenerateWeights() { - if (m_octaves < 4) - { - return; - } - - int alignOffset{}; - - std::vector newWaveformMips(m_octaves, WaveformBuffer()); - std::copy(newWaveform.begin(), newWaveform.end(), newWaveformMips[0].begin()); - - // Calculate mip levels - for (size_t octave = 1; octave < m_octaves; octave++) + // The below is performed only on the first fill. + for (uint32_t octave = 0; octave < m_octaves; octave++) { - for (size_t sample = 0; sample < m_octaveSamples[octave]; sample++) + // For example: + // m_octaveSampleSpacing[octave] == 4 + // m_octaveSamples[octave] == 36 + // (so we test 32 samples, w/4 offsets) + // this reduces by a factor of 2 each octave + uint32_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave]; + + for (uint32_t sample = 0; sample < compareSamples; sample++) { - newWaveformMips[octave][sample] = 0.5f * (newWaveformMips[octave - 1][sample * 2] + newWaveformMips[octave - 1][sample * 2 + 1]); - } - } + // Take a reference to the alignment weights and set them with the computation + // below. + auto& weightRef = m_aligmentWeights[octave][sample]; - if (!m_alignWaveReady) - { - m_alignWaveReady = true; - for (size_t octave = 0; octave < m_octaves; octave++) - { - // For example: - // m_octaveSampleSpacing[octave] == 4 - // m_octaveSamples[octave] == 36 - // (so we test 32 samples, w/4 offsets) - size_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave]; - - for (size_t sample = 0; sample < compareSamples; sample++) + // Start with pyramid-shaped PDF, from 0..1..0 + if (sample < compareSamples / 2) { - auto& tempVal = m_aligmentWeights[octave][sample]; - - // Start with pyramid-shaped PDF, from 0..1..0 - if (sample < compareSamples / 2) - { - tempVal = static_cast(sample * 2) / static_cast(compareSamples); - } - else - { - tempVal = static_cast((compareSamples - 1 - sample) * 2) / static_cast(compareSamples); - } - - // TWEAK how much the center matters, vs. the edges: - tempVal = (tempVal - 0.8f) * 5.0f + 0.8f; - - // Clip - if (tempVal > 1.0f) - { - tempVal = 1.0f; - } - if (tempVal < 0.0f) - { - tempVal = 0.0f; - } + weightRef = static_cast(sample * 2) / static_cast(compareSamples); } - - size_t sample{}; - while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples) + else { - sample++; + weightRef = static_cast((compareSamples - 1 - sample) * 2) / static_cast(compareSamples); } - m_firstNonzeroWeights[octave] = sample; - sample = compareSamples - 1; - while (m_aligmentWeights[octave][sample] == 0 && compareSamples > 1) + // TWEAK how much the center matters, vs. the edges: + + /* + * weight[i] = 5.0*((2*i/compareSamples) - 0.8) + 0.8 + * Solving for weight[i] == 0 we get + * 2*i/compareSamples = -0.8/5 + 0.8 + * i = 0.32*compareSamples + * The weight distribution is symmetric so the falling side gives + * i = 0.68*compareSamples + */ + weightRef = (weightRef - 0.8f) * 5.0f + 0.8f; + + // Clip (needed because the TWEAK above results in weights from -3.2 to 1.8) + if (weightRef > 1.0f) + { + weightRef = 1.0f; + } + if (weightRef < 0.0f) { - sample--; + weightRef = 0.0f; } - m_lastNonzeroWeights[octave] = sample; } + + uint32_t sample{}; + // The code below also is only needed because of the TWEAK above, which zeroes + // a total of 64% of the weights. + while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples) + { + sample++; + } + m_firstNonzeroWeights[octave] = sample; + + sample = compareSamples - 1; + while (m_aligmentWeights[octave][sample] == 0 && compareSamples > 1) + { + sample--; + } + m_lastNonzeroWeights[octave] = sample; } + m_alignWaveReady = true; +} - int sample1{}; - int sample2{static_cast(m_octaveSampleSpacing[m_octaves - 1])}; +int WaveformAligner::CalculateOffset(std::vector &newWaveformMips) +{ + int alignOffset{}; + + int offsetStart{}; + int offsetEnd{static_cast(m_octaveSampleSpacing[m_octaves - 1])}; // Find best match for alignment - for (int octave = static_cast(m_octaves) - 1; octave >= 0; octave--) + for (int octave = m_octaves - 1; octave >= 0; octave--) { int lowestErrorOffset{-1}; float lowestErrorAmount{}; - for (int sample = sample1; sample < sample2; sample++) + for (int offset = offsetStart; offset < offsetEnd; offset++) // 0 to 2 { float errorSum{}; - for (size_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++) + // perform the pseudo-autocorrelation + for (uint32_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++) { - errorSum += std::abs((newWaveformMips[octave][i + sample] - m_oldWaveformMips[octave][i + sample]) * m_aligmentWeights[octave][i]); + errorSum += std::abs((newWaveformMips[octave][i + offset] - m_oldWaveformMips[octave][i]) * m_aligmentWeights[octave][i]); } if (lowestErrorOffset == -1 || errorSum < lowestErrorAmount) { - lowestErrorOffset = static_cast(sample); + lowestErrorOffset = offset; lowestErrorAmount = errorSum; } } @@ -138,15 +138,21 @@ void WaveformAligner::Align(WaveformBuffer& newWaveform) // (so we'd test 64 samples, w/8->4 offsets) if (octave > 0) { - sample1 = lowestErrorOffset * 2 - 1; - sample2 = lowestErrorOffset * 2 + 2 + 1; - if (sample1 < 0) + offsetStart = lowestErrorOffset * 2 - 1; + offsetEnd = lowestErrorOffset * 2 + 2 + 1; + if (offsetStart < 0) { - sample1 = 0; + /* + * This line is what prevents us from checking negative offsets. + * There should be no impact to allowing offsetStart to be negative as long as + * its magnitude is less than m_firstNonzeroWeights[octave-1]. However, this + * is what the original milkdrop code does so we stick with that behavior. + */ + offsetStart = 0; } - if (sample2 > static_cast(m_octaveSampleSpacing[octave - 1])) + if (offsetEnd > static_cast(m_octaveSampleSpacing[octave - 1])) { - sample2 = static_cast(m_octaveSampleSpacing[octave - 1]); + offsetEnd = static_cast(m_octaveSampleSpacing[octave - 1]); } } else @@ -155,21 +161,57 @@ void WaveformAligner::Align(WaveformBuffer& newWaveform) } } - // Store mip levels for the next frame. - m_oldWaveformMips.clear(); - std::copy(newWaveformMips.begin(), newWaveformMips.end(), std::back_inserter(m_oldWaveformMips)); + return alignOffset; +} + +void WaveformAligner::ResampleOctaves(std::vector &dstWaveformMips, WaveformBuffer& newWaveform) +{ + std::copy(newWaveform.begin(), newWaveform.end(), dstWaveformMips[0].begin()); - // Finally, apply the results by scooting the aligned samples so that they start at index 0. - if (alignOffset > 0) + // "Calculate mip levels" resample 2x each octave + for (size_t octave = 1; octave < m_octaves; octave++) { - for (size_t sample = 0; sample < WaveformSamples; sample++) + for (size_t sample = 0; sample < m_octaveSamples[octave]; sample++) { - newWaveform[sample] = newWaveform[sample + alignOffset]; + // each "octave" resamples 2x using averaging but leaves the vector allocated + dstWaveformMips[octave][sample] = 0.5f * (dstWaveformMips[octave - 1][sample * 2] + dstWaveformMips[octave - 1][sample * 2 + 1]); } + } +} + +void WaveformAligner::Align(WaveformBuffer& newWaveform) +{ + if (m_octaves < 4) + { + // The original code does not align if there isn't enough margin for + // alignment but has no explanation for why the limit is 2**4. + return; + } + + // separate waveform copies for each interval + std::vector newWaveformMips(m_octaves, WaveformBuffer()); + ResampleOctaves(newWaveformMips, newWaveform); + + if (!m_alignWaveReady) + { + GenerateWeights(); + } + + int alignOffset = CalculateOffset(newWaveformMips); + + // Finally, apply the results by scooting the aligned samples so that they start at index 0. + // This is the second place where we limit negative offsets. + if (alignOffset > 0) + { + std::copy_n(newWaveform.begin() + alignOffset, WaveformSamples, newWaveform.begin()); // Set remaining samples to zero. std::fill_n(newWaveform.begin() + WaveformSamples, AudioBufferSamples - WaveformSamples, 0.0f); } + + // Store mip levels for the next frame. Note that we need to recalculate the mips for the *shifted* + // waveform, so we can't reuse the previous mips. + ResampleOctaves(m_oldWaveformMips, newWaveform); } diff --git a/src/libprojectM/Audio/WaveformAligner.hpp b/src/libprojectM/Audio/WaveformAligner.hpp index d8102726a..7e525a812 100644 --- a/src/libprojectM/Audio/WaveformAligner.hpp +++ b/src/libprojectM/Audio/WaveformAligner.hpp @@ -8,6 +8,7 @@ #include "AudioConstants.hpp" #include +#include #include namespace libprojectM { @@ -33,18 +34,22 @@ class WaveformAligner */ void Align(WaveformBuffer& newWaveform); -private: +protected: + void GenerateWeights(); + int CalculateOffset(std::vector &newWaveformMips); + void ResampleOctaves(std::vector &dstWaveformMips, WaveformBuffer& newWaveform); + bool m_alignWaveReady{false}; //!< Alignment needs special treatment for the first buffer fill. std::vector> m_aligmentWeights; //!< Sample weights per octave. - size_t m_octaves{}; //!< Number of mip-levels/octaves. - std::vector m_octaveSamples; //!< Samples per octave. - std::vector m_octaveSampleSpacing; //!< Space between samples per octave. + uint32_t m_octaves{}; //!< Number of mip-levels/octaves. + std::vector m_octaveSamples; //!< Samples per octave. + std::vector m_octaveSampleSpacing; //!< Space between samples per octave. std::vector m_oldWaveformMips; //!< Mip levels of the previous frame's waveform. - std::vector m_firstNonzeroWeights; //!< First non-zero weight sample index for each octave. - std::vector m_lastNonzeroWeights; //!< Last non-zero weight sample index for each octave. + std::vector m_firstNonzeroWeights; //!< First non-zero weight sample index for each octave. + std::vector m_lastNonzeroWeights; //!< Last non-zero weight sample index for each octave. }; } // namespace Audio diff --git a/tests/libprojectM/CMakeLists.txt b/tests/libprojectM/CMakeLists.txt index 963818ee2..f0d49f8c4 100644 --- a/tests/libprojectM/CMakeLists.txt +++ b/tests/libprojectM/CMakeLists.txt @@ -1,6 +1,7 @@ find_package(GTest 1.10 REQUIRED NO_MODULE) add_executable(projectM-unittest + WaveformAlignerTest.cpp PresetFileParserTest.cpp $ diff --git a/tests/libprojectM/WaveformAlignerTest.cpp b/tests/libprojectM/WaveformAlignerTest.cpp new file mode 100644 index 000000000..ebb943cf2 --- /dev/null +++ b/tests/libprojectM/WaveformAlignerTest.cpp @@ -0,0 +1,83 @@ +#include "Audio/WaveformAligner.hpp" + +#include + +using namespace libprojectM::Audio; + +/** + * Class to make protected function accessible to tests. + */ +class WaveformAlignerMock : public WaveformAligner +{ +private: + /* + * gtest docs discourage access to private members using this, but + * we're testing the modern port of legacy code here so we want to + * stick to the original structure as much as possible. + */ + FRIEND_TEST(projectMWaveformAligner, AlignDelta); +}; + +TEST(projectMWaveformAligner, AlignDelta) +{ + auto aligner = WaveformAlignerMock(); + ASSERT_EQ(aligner.m_octaves, 6); + + std::array wf; + std::fill(wf.begin(), wf.end(), 0.0f); + + // Create a delta waveform by setting a single value non-zero. + wf[AudioBufferSamples/2] = 1.0f; + + aligner.Align(wf); + // Ensure that the waveform has not shifted when first sampled. + EXPECT_FLOAT_EQ(wf[AudioBufferSamples/2], 1.0f); + + // Verify weights + for (uint32_t octave=0; octave < aligner.m_octaves; octave++) + { + size_t const compareSamples = aligner.m_octaveSamples[octave] - aligner.m_octaveSampleSpacing[octave]; + // Non-zero range should be (0.32, 0.68)*compareSamples based on + // the "TWEAK" calculation. If the weight calculation changes this + // range will need to change. + // Note that the test below is not a requirement for the algorithm. Instead + // of testing the correctness of the algorithm, this is testing that we + // understand the implications of the legacy code. + EXPECT_NEAR(aligner.m_firstNonzeroWeights[octave], 0.32*compareSamples, 2.0); + EXPECT_NEAR(aligner.m_lastNonzeroWeights[octave], 0.68*compareSamples, 2.0); + } + + // Reset test waveform to all zeroes + wf[AudioBufferSamples/2] = 0.0f; + + for (int i=-8; i<98; i++) + { + wf[AudioBufferSamples/2 + i] = 1.0f; + + std::vector newWaveformMips(aligner.m_octaves, WaveformBuffer()); + aligner.ResampleOctaves(newWaveformMips, wf); + int alignOffset = aligner.CalculateOffset(newWaveformMips); + if (i < 0 || i >= aligner.m_octaveSampleSpacing[0]) + { + // Only offsets between 0 and (AudioBufferSamples - WaveformSamples) are recognized + EXPECT_EQ(alignOffset, 0); + } else { + // Verify that the actual waveform offset matches the reported value + EXPECT_EQ(alignOffset, i); + } + + wf[AudioBufferSamples/2 + i] = 0.0f; + } + + for (int i=0; i<96; i++) + { + wf[AudioBufferSamples/2 + i] = 1.0f; + aligner.Align(wf); + // Verify that the new waveform has been shifted to match the first + EXPECT_EQ(wf[AudioBufferSamples/2], 1.0f); + if (i != 0) + EXPECT_EQ(wf[AudioBufferSamples/2 + i], 0.0f); + // Reset waveform + wf[AudioBufferSamples/2] = 0.0f; + } +}