From e5ecafc5413959695b5f80bbbadb3af9606912fe Mon Sep 17 00:00:00 2001 From: Lebourdais Date: Mon, 21 Oct 2024 16:11:07 +0200 Subject: [PATCH 1/5] Fix: Align output of separation pipeline with the diarization when using a reference annotation --- pyannote/audio/pipelines/speech_separation.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index dacb637b1..6381db8c7 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -124,7 +124,7 @@ class SpeechSeparation(SpeakerDiarizationMixin, Pipeline): def __init__( self, - segmentation: PipelineModel = None, + segmentation: PipelineModel = "pyannote/separation-ami-1.0", segmentation_step: float = 0.1, embedding: PipelineModel = "speechbrain/spkrec-ecapa-voxceleb@5c0be3875fda05e81f3c004ed8c7c06be308de1e", embedding_exclude_overlap: bool = False, @@ -698,6 +698,15 @@ def apply( # strings and integers when reference is available and some hypothesis # speakers are not present in the reference) + # re-order sources so that they match + # the order given by diarization.labels() + inverse_mapping = {label: index for index, label in mapping.items()} + original_sliding_window = sources.sliding_window + data = sources.data[ + :, [inverse_mapping[label] for label in diarization.labels()] + ] + sources = SlidingWindowFeature(data, original_sliding_window) + if not return_embeddings: return diarization, sources From 3bf0cbd578a641708e1cb56ffb4bc3c64705301e Mon Sep 17 00:00:00 2001 From: Lebourdais Date: Mon, 21 Oct 2024 16:17:32 +0200 Subject: [PATCH 2/5] Fix: duplicate of a line --- pyannote/audio/pipelines/speech_separation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index 6381db8c7..6b94e3151 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -726,7 +726,6 @@ def apply( # re-order centroids so that they match # the order given by diarization.labels() - inverse_mapping = {label: index for index, label in mapping.items()} centroids = centroids[ [inverse_mapping[label] for label in diarization.labels()] ] From 6de69526cdcbca46a051ea464727a5e1864991c9 Mon Sep 17 00:00:00 2001 From: Lebourdais Date: Wed, 23 Oct 2024 16:09:43 +0200 Subject: [PATCH 3/5] Update pyannote/audio/pipelines/speech_separation.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hervé BREDIN --- pyannote/audio/pipelines/speech_separation.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyannote/audio/pipelines/speech_separation.py b/pyannote/audio/pipelines/speech_separation.py index 6b94e3151..a129ea7a4 100644 --- a/pyannote/audio/pipelines/speech_separation.py +++ b/pyannote/audio/pipelines/speech_separation.py @@ -701,11 +701,9 @@ def apply( # re-order sources so that they match # the order given by diarization.labels() inverse_mapping = {label: index for index, label in mapping.items()} - original_sliding_window = sources.sliding_window - data = sources.data[ + source.data = sources.data[ :, [inverse_mapping[label] for label in diarization.labels()] ] - sources = SlidingWindowFeature(data, original_sliding_window) if not return_embeddings: return diarization, sources From ca54f0ac85fe7d743510d71c3a0e9c1d67c6ab46 Mon Sep 17 00:00:00 2001 From: Lebourdais Date: Wed, 23 Oct 2024 16:24:07 +0200 Subject: [PATCH 4/5] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 100ef7278..7a1d5d304 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### Fixes - fix: fix clipping issue in speech separation pipeline ([@joonaskalda](https://github.com/joonaskalda/)) +- fix: fix alignment between separated sources and diarization when the diarization reference is available ## Version 3.3.2 (2024-09-11) From 38e51095116cd3d247b52a696f332405abf7a277 Mon Sep 17 00:00:00 2001 From: Lebourdais Date: Wed, 23 Oct 2024 16:24:48 +0200 Subject: [PATCH 5/5] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a1d5d304..95e099a4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ ### Fixes - fix: fix clipping issue in speech separation pipeline ([@joonaskalda](https://github.com/joonaskalda/)) -- fix: fix alignment between separated sources and diarization when the diarization reference is available +- fix: fix alignment between separated sources and diarization when the diarization reference is available ([@Lebourdais](https://github.com/Lebourdais/)) ## Version 3.3.2 (2024-09-11)