diff --git a/CHANGELOG.md b/CHANGELOG.md index e66294794..b3f6fb850 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## develop + +### Fixes + +- fix: fix clipping issue in speech separation pipeline ([@joonaskalda](https://github.com/joonaskalda/)) + + ## Version 3.3.2 (2024-09-11) ### Fixes diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index c1b9b036c..43c5b8a44 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -654,6 +654,12 @@ def apply( sources.data * discrete_diarization.align(sources).data[:, :num_sources] ) + # separated sources might be scaled up/down due to SI-SDR loss used when training + # so we peak-normalize them + sources.data = sources.data / np.max( + np.abs(sources.data), axis=0, keepdims=True + ) + # convert to continuous diarization diarization = self.to_annotation( discrete_diarization,