Refactor waveform aligner and add test

projectM-visualizer · Feb 2, 2024 · a2f695d · a2f695d
1 parent 7e45cbe
commit a2f695d
Show file tree

Hide file tree

Showing 4 changed files with 222 additions and 91 deletions.
diff --git a/src/libprojectM/Audio/WaveformAligner.cpp b/src/libprojectM/Audio/WaveformAligner.cpp
@@ -8,8 +8,9 @@ namespace Audio {
 
 WaveformAligner::WaveformAligner()
 {
-    static const size_t maxOctaves{10};
-    static const size_t numOctaves{static_cast<size_t>(std::floor(std::log(AudioBufferSamples - WaveformSamples) / std::log(2.0f)))};
+    static const uint32_t maxOctaves{10};
+    // floor(log2(96)) = log2(64) = 6
+    static const uint32_t numOctaves{static_cast<uint32_t>(std::floor(std::log2f(AudioBufferSamples - WaveformSamples)))};
     m_octaves = numOctaves > maxOctaves ? maxOctaves : numOctaves;
 
     m_aligmentWeights.resize(m_octaves);
@@ -21,110 +22,109 @@ WaveformAligner::WaveformAligner()
 
     m_octaveSamples[0] = AudioBufferSamples;
     m_octaveSampleSpacing[0] = AudioBufferSamples - WaveformSamples;
-    for (size_t octave = 1; octave < m_octaves; octave++)
+    for (uint32_t octave = 1; octave < m_octaves; octave++)
     {
         m_octaveSamples[octave] = m_octaveSamples[octave - 1] / 2;
         m_octaveSampleSpacing[octave] = m_octaveSampleSpacing[octave - 1] / 2;
     }
 }
 
-void WaveformAligner::Align(WaveformBuffer& newWaveform)
+void WaveformAligner::GenerateWeights()
 {
-    if (m_octaves < 4)
-    {
-        return;
-    }
-
-    int alignOffset{};
-
-    std::vector<WaveformBuffer> newWaveformMips(m_octaves, WaveformBuffer());
-    std::copy(newWaveform.begin(), newWaveform.end(), newWaveformMips[0].begin());
-
-    // Calculate mip levels
-    for (size_t octave = 1; octave < m_octaves; octave++)
+    // The below is performed only on the first fill.
+    for (uint32_t octave = 0; octave < m_octaves; octave++)
     {
-        for (size_t sample = 0; sample < m_octaveSamples[octave]; sample++)
+        // For example:
+        //  m_octaveSampleSpacing[octave] == 4
+        //  m_octaveSamples[octave] == 36
+        //  (so we test 32 samples, w/4 offsets)
+        // this reduces by a factor of 2 each octave
+        uint32_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave];
+
+        for (uint32_t sample = 0; sample < compareSamples; sample++)
         {
-            newWaveformMips[octave][sample] = 0.5f * (newWaveformMips[octave - 1][sample * 2] + newWaveformMips[octave - 1][sample * 2 + 1]);
-        }
-    }
+            // Take a reference to the alignment weights and set them with the computation
+            // below.
+            auto& weightRef = m_aligmentWeights[octave][sample];
 
-    if (!m_alignWaveReady)
-    {
-        m_alignWaveReady = true;
-        for (size_t octave = 0; octave < m_octaves; octave++)
-        {
-            // For example:
-            //  m_octaveSampleSpacing[octave] == 4
-            //  m_octaveSamples[octave] == 36
-            //  (so we test 32 samples, w/4 offsets)
-            size_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave];
-
-            for (size_t sample = 0; sample < compareSamples; sample++)
+            // Start with pyramid-shaped PDF, from 0..1..0
+            if (sample < compareSamples / 2)
             {
-                auto& tempVal = m_aligmentWeights[octave][sample];
-
-                // Start with pyramid-shaped PDF, from 0..1..0
-                if (sample < compareSamples / 2)
-                {
-                    tempVal = static_cast<float>(sample * 2) / static_cast<float>(compareSamples);
-                }
-                else
-                {
-                    tempVal = static_cast<float>((compareSamples - 1 - sample) * 2) / static_cast<float>(compareSamples);
-                }
-
-                // TWEAK how much the center matters, vs. the edges:
-                tempVal = (tempVal - 0.8f) * 5.0f + 0.8f;
-
-                // Clip
-                if (tempVal > 1.0f)
-                {
-                    tempVal = 1.0f;
-                }
-                if (tempVal < 0.0f)
-                {
-                    tempVal = 0.0f;
-                }
+                weightRef = static_cast<float>(sample * 2) / static_cast<float>(compareSamples);
             }
-
-            size_t sample{};
-            while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples)
+            else
             {
-                sample++;
+                weightRef = static_cast<float>((compareSamples - 1 - sample) * 2) / static_cast<float>(compareSamples);
             }
-            m_firstNonzeroWeights[octave] = sample;
 
-            sample = compareSamples - 1;
-            while (m_aligmentWeights[octave][sample] == 0 && compareSamples > 1)
+            // TWEAK how much the center matters, vs. the edges:
+
+            /*
+             * weight[i] = 5.0*((2*i/compareSamples) - 0.8) + 0.8
+             * Solving for weight[i] == 0 we get
+             * 2*i/compareSamples = -0.8/5 + 0.8
+             * i = 0.32*compareSamples
+             * The weight distribution is symmetric so the falling side gives
+             * i = 0.68*compareSamples
+             */
+            weightRef = (weightRef - 0.8f) * 5.0f + 0.8f;
+
+            // Clip (needed because the TWEAK above results in weights from -3.2 to 1.8)
+            if (weightRef > 1.0f)
+            {
+                weightRef = 1.0f;
+            }
+            if (weightRef < 0.0f)
             {
-                sample--;
+                weightRef = 0.0f;
             }
-            m_lastNonzeroWeights[octave] = sample;
         }
+
+        uint32_t sample{};
+        // The code below also is only needed because of the TWEAK above, which zeroes
+        // a total of 64% of the weights.
+        while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples)
+        {
+            sample++;
+        }
+        m_firstNonzeroWeights[octave] = sample;
+
+        sample = compareSamples - 1;
+        while (m_aligmentWeights[octave][sample] == 0 && compareSamples > 1)
+        {
+            sample--;
+        }
+        m_lastNonzeroWeights[octave] = sample;
     }
+    m_alignWaveReady = true;
+}
 
-    int sample1{};
-    int sample2{static_cast<int>(m_octaveSampleSpacing[m_octaves - 1])};
+int WaveformAligner::CalculateOffset(std::vector<WaveformBuffer> &newWaveformMips)
+{
+    int alignOffset{};
+
+    int offsetStart{};
+    int offsetEnd{static_cast<int>(m_octaveSampleSpacing[m_octaves - 1])};
 
     // Find best match for alignment
-    for (int octave = static_cast<int>(m_octaves) - 1; octave >= 0; octave--)
+    for (int octave = m_octaves - 1; octave >= 0; octave--)
     {
         int lowestErrorOffset{-1};
         float lowestErrorAmount{};
 
-        for (int sample = sample1; sample < sample2; sample++)
+        for (int offset = offsetStart; offset < offsetEnd; offset++) // 0 to 2
         {
             float errorSum{};
 
-            for (size_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++)
+            // perform the pseudo-autocorrelation
+            for (uint32_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++)
             {
-                errorSum += std::abs((newWaveformMips[octave][i + sample] - m_oldWaveformMips[octave][i + sample]) * m_aligmentWeights[octave][i]);
+                errorSum += std::abs((newWaveformMips[octave][i + offset] - m_oldWaveformMips[octave][i]) * m_aligmentWeights[octave][i]);
             }
 
             if (lowestErrorOffset == -1 || errorSum < lowestErrorAmount)
             {
-                lowestErrorOffset = static_cast<int>(sample);
+                lowestErrorOffset = offset;
                 lowestErrorAmount = errorSum;
             }
         }
@@ -138,15 +138,21 @@ void WaveformAligner::Align(WaveformBuffer& newWaveform)
         //  (so we'd test 64 samples, w/8->4 offsets)
         if (octave > 0)
         {
-            sample1 = lowestErrorOffset * 2 - 1;
-            sample2 = lowestErrorOffset * 2 + 2 + 1;
-            if (sample1 < 0)
+            offsetStart = lowestErrorOffset * 2 - 1;
+            offsetEnd = lowestErrorOffset * 2 + 2 + 1;
+            if (offsetStart < 0)
             {
-                sample1 = 0;
+                /*
+                 * This line is what prevents us from checking negative offsets.
+                 * There should be no impact to allowing offsetStart to be negative as long as
+                 * its magnitude is less than m_firstNonzeroWeights[octave-1]. However, this
+                 * is what the original milkdrop code does so we stick with that behavior.
+                 */
+                offsetStart = 0;
             }
-            if (sample2 > static_cast<int>(m_octaveSampleSpacing[octave - 1]))
+            if (offsetEnd > static_cast<int>(m_octaveSampleSpacing[octave - 1]))
             {
-                sample2 = static_cast<int>(m_octaveSampleSpacing[octave - 1]);
+                offsetEnd = static_cast<int>(m_octaveSampleSpacing[octave - 1]);
             }
         }
         else
@@ -155,21 +161,57 @@ void WaveformAligner::Align(WaveformBuffer& newWaveform)
         }
     }
 
-    // Store mip levels for the next frame.
-    m_oldWaveformMips.clear();
-    std::copy(newWaveformMips.begin(), newWaveformMips.end(), std::back_inserter(m_oldWaveformMips));
+    return alignOffset;
+}
+
+void WaveformAligner::ResampleOctaves(std::vector<WaveformBuffer> &dstWaveformMips, WaveformBuffer& newWaveform)
+{
+    std::copy(newWaveform.begin(), newWaveform.end(), dstWaveformMips[0].begin());
 
-    // Finally, apply the results by scooting the aligned samples so that they start at index 0.
-    if (alignOffset > 0)
+    // "Calculate mip levels" resample 2x each octave
+    for (size_t octave = 1; octave < m_octaves; octave++)
     {
-        for (size_t sample = 0; sample < WaveformSamples; sample++)
+        for (size_t sample = 0; sample < m_octaveSamples[octave]; sample++)
         {
-            newWaveform[sample] = newWaveform[sample + alignOffset];
+            // each "octave" resamples 2x using averaging but leaves the vector allocated
+            dstWaveformMips[octave][sample] = 0.5f * (dstWaveformMips[octave - 1][sample * 2] + dstWaveformMips[octave - 1][sample * 2 + 1]);
         }
+    }
+}
+
+void WaveformAligner::Align(WaveformBuffer& newWaveform)
+{
+    if (m_octaves < 4)
+    {
+        // The original code does not align if there isn't enough margin for
+        // alignment but has no explanation for why the limit is 2**4.
+        return;
+    }
+
+    // separate waveform copies for each interval
+    std::vector<WaveformBuffer> newWaveformMips(m_octaves, WaveformBuffer());
+    ResampleOctaves(newWaveformMips, newWaveform);
+
+    if (!m_alignWaveReady)
+    {
+        GenerateWeights();
+    }
+
+    int alignOffset = CalculateOffset(newWaveformMips);
+
+    // Finally, apply the results by scooting the aligned samples so that they start at index 0.
+    // This is the second place where we limit negative offsets.
+    if (alignOffset > 0)
+    {
+        std::copy_n(newWaveform.begin() + alignOffset, WaveformSamples, newWaveform.begin());
 
         // Set remaining samples to zero.
         std::fill_n(newWaveform.begin() + WaveformSamples, AudioBufferSamples - WaveformSamples, 0.0f);
     }
+
+    // Store mip levels for the next frame. Note that we need to recalculate the mips for the *shifted*
+    // waveform, so we can't reuse the previous mips.
+    ResampleOctaves(m_oldWaveformMips, newWaveform);
 }
 
 

diff --git a/src/libprojectM/Audio/WaveformAligner.hpp b/src/libprojectM/Audio/WaveformAligner.hpp
@@ -8,6 +8,7 @@
 #include "AudioConstants.hpp"
 
 #include <cstddef>
+#include <cstdint>
 #include <vector>
 
 namespace libprojectM {
@@ -33,18 +34,22 @@ class WaveformAligner
      */
     void Align(WaveformBuffer& newWaveform);
 
-private:
+protected:
+    void GenerateWeights();
+    int CalculateOffset(std::vector<WaveformBuffer> &newWaveformMips);
+    void ResampleOctaves(std::vector<WaveformBuffer> &dstWaveformMips, WaveformBuffer& newWaveform);
+
     bool m_alignWaveReady{false}; //!< Alignment needs special treatment for the first buffer fill.
 
     std::vector<std::array<float, AudioBufferSamples>> m_aligmentWeights; //!< Sample weights per octave.
 
-    size_t m_octaves{};                        //!< Number of mip-levels/octaves.
-    std::vector<size_t> m_octaveSamples;       //!< Samples per octave.
-    std::vector<size_t> m_octaveSampleSpacing; //!< Space between samples per octave.
+    uint32_t m_octaves{};                        //!< Number of mip-levels/octaves.
+    std::vector<uint32_t> m_octaveSamples;       //!< Samples per octave.
+    std::vector<uint32_t> m_octaveSampleSpacing; //!< Space between samples per octave.
 
     std::vector<WaveformBuffer> m_oldWaveformMips; //!< Mip levels of the previous frame's waveform.
-    std::vector<size_t> m_firstNonzeroWeights;     //!< First non-zero weight sample index for each octave.
-    std::vector<size_t> m_lastNonzeroWeights;      //!< Last non-zero weight sample index for each octave.
+    std::vector<uint32_t> m_firstNonzeroWeights;     //!< First non-zero weight sample index for each octave.
+    std::vector<uint32_t> m_lastNonzeroWeights;      //!< Last non-zero weight sample index for each octave.
 };
 
 } // namespace Audio

diff --git a/tests/libprojectM/CMakeLists.txt b/tests/libprojectM/CMakeLists.txt
@@ -1,6 +1,7 @@
 find_package(GTest 1.10 REQUIRED NO_MODULE)
 
 add_executable(projectM-unittest
+        WaveformAlignerTest.cpp
         PresetFileParserTest.cpp
 
         $<TARGET_OBJECTS:Audio>