Skip to content

Commit

Permalink
Merge pull request #73 from fgnt/seglst-apply-assignment
Browse files Browse the repository at this point in the history
Seglst apply assignment
  • Loading branch information
thequilo authored Apr 14, 2024
2 parents 944dde2 + b324638 commit 0e022b0
Show file tree
Hide file tree
Showing 7 changed files with 367 additions and 126 deletions.
1 change: 1 addition & 0 deletions meeteval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from . import io
from . import wer
from . import der
from . import viz

__version__ = '0.0.1'
7 changes: 6 additions & 1 deletion meeteval/wer/matching/cy_time_constrained_orc_matching.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def time_constrained_orc_levenshtein_distance(
reference_timings,
hypothesis_timings
):
"""
Compute the time-constrained ORC Levenshtein distance between two sequences of
symbols and returns the distance and the alignment.
"""
# Validate inputs
if len(reference) != len(reference_timings):
raise ValueError("reference and reference_timings must have the same length")
Expand All @@ -31,7 +35,8 @@ def time_constrained_orc_levenshtein_distance(
if len(reference) == 0:
return sum(len(h) for h in hypothesis), []
if len(hypothesis) == 0:
return sum(len(r) for r in reference), []
# 0 is a dummy stream index
return sum(len(r) for r in reference), [0] * len(reference)

# Translate symbols/words to integers for the cpp code
all_symbols = set()
Expand Down
87 changes: 82 additions & 5 deletions meeteval/wer/wer/cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import string
from typing import Optional, Any, Iterable

import meeteval
from meeteval._typing import Literal
from meeteval.io.seglst import SegLST, asseglst
from meeteval.io.seglst import SegLST, asseglst, asseglistconvertible

from meeteval.wer.wer.error_rate import ErrorRate

Expand Down Expand Up @@ -279,15 +280,14 @@ def _cp_error_rate(

def apply_cp_assignment(
assignment: 'list[tuple[Any, ...]] | tuple[tuple[Any, ...], ...]',
reference: dict,
hypothesis: dict,
reference: 'dict | list | tuple | SegLST',
hypothesis: 'dict | list | tuple | SegLST',
style: 'Literal["hyp", "ref"]' = 'ref',
fallback_keys=string.ascii_letters,
missing='',
):
"""
Apply the assignment, so that reference and hypothesis have the same
keys.
Apply the assignment so that reference and hypothesis have the same keys.
The code is roughly:
if style == 'ref':
Expand Down Expand Up @@ -353,9 +353,86 @@ def apply_cp_assignment(
>>> test_list([(0, 0), (None, 1)])
(['0ref', ''], ['0hyp', '1hyp'])
(['0ref', ''], ['0hyp', '1hyp'])
Also works for anything convertible to SegLST
>>> r, h = apply_cp_assignment(
... [('rA', 'hB'), ('rB', 'hA')],
... meeteval.io.STM.parse(
... 'file1 0 rA 0 1 Hello World\\n'
... 'file1 0 rB 0 1 Goodbye'
... ),
... meeteval.io.STM.parse(
... 'file1 0 hB 0 1 Hello World\\n'
... 'file1 0 hA 0 1 Goodbye'
... ),
... style='ref'
... )
>>> print(r.dumps())
file1 0 rA 0 1 Hello World
file1 0 rB 0 1 Goodbye
<BLANKLINE>
>>> print(h.dumps())
file1 0 rA 0 1 Hello World
file1 0 rB 0 1 Goodbye
<BLANKLINE>
"""
assert assignment, assignment

try:
r_conv = asseglistconvertible(reference, py_convert=None)
h_conv = asseglistconvertible(hypothesis, py_convert=None)
except Exception:
# This is a Python structure
pass
else:
reference = r_conv.to_seglst()
hypothesis = h_conv.to_seglst()

# Check for valid keys
r_keys, h_keys = zip(*assignment)
r_keys = set(r_keys) - {None}
h_keys = set(h_keys) - {None}
assert r_keys == reference.unique('speaker'), (r_keys, reference.unique('speaker'), assignment)
assert h_keys == hypothesis.unique('speaker'), (h_keys, hypothesis.unique('speaker'), assignment)

fallback_keys_iter = iter([
k
for k in fallback_keys
if k not in r_keys
if k not in h_keys
])

try:
if style == 'hyp':
assignment = {
r: h if h is not None else next(fallback_keys_iter)
for r, h in assignment
if r is not None
}
# Change the keys of the reference to those of the hypothesis
reference = reference.map(lambda s: {
**s, 'speaker': assignment[s['speaker']]
})
elif style == 'ref':
assignment = {
h: r if r is not None else next(fallback_keys_iter)
for r, h in assignment
if h is not None
}
hypothesis = hypothesis.map(lambda s: {
**s, 'speaker': assignment[s['speaker']]
})
else:
raise ValueError(f'{style!r} not in ["ref", "hyp"]')
except StopIteration:
raise RuntimeError(
f'Too few fallback keys provided! '
f'There are more over-/under-estimated speakers '
f'than fallback_keys in {fallback_keys}'
)

return r_conv.new(reference), h_conv.new(hypothesis)

if isinstance(reference, dict) and isinstance(hypothesis, dict):
# Check for valid keys
assert None not in reference, reference.keys()
Expand Down
33 changes: 32 additions & 1 deletion meeteval/wer/wer/mimo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import dataclasses
from typing import Iterable, Any

from meeteval.io.seglst import asseglst
from meeteval.io.seglst import asseglst, asseglistconvertible
from meeteval.wer.wer.error_rate import ErrorRate
from meeteval.wer.wer.siso import _siso_error_rate
from meeteval.wer.utils import _keys, _items, _values
Expand Down Expand Up @@ -162,7 +162,38 @@ def apply_mimo_assignment(
>>> hypothesis = {'O1': 'c d', 'O2': 'a b e f'}
>>> apply_mimo_assignment(assignment, reference, hypothesis)
({'O1': ['a b', 'c d'], 'O2': []}, {'O1': 'c d', 'O2': 'a b e f'})
>>> reference = STM.parse('X 1 A 0.0 1.0 a b\\nX 1 A 1.0 2.0 c d\\n')
>>> hypothesis = STM.parse('X 1 O1 0.0 2.0 c d\\nX 1 O0 0.0 2.0 a b e f\\n')
>>> reference, hypothesis = apply_mimo_assignment(assignment, reference, hypothesis)
>>> print(reference.dumps())
X 1 O1 0.0 1.0 a b
X 1 O1 1.0 2.0 c d
<BLANKLINE>
>>> print(hypothesis.dumps())
X 1 O1 0.0 2.0 c d
X 1 O0 0.0 2.0 a b e f
<BLANKLINE>
"""

try:
r_conv = asseglistconvertible(reference, py_convert=None)
except Exception:
pass
else:
reference = r_conv.to_seglst().sorted('start_time')
reference = {
k: list(v)
for k, v in reference.groupby('speaker').items()
}

reference_new = [
{**reference[r].pop(0), 'speaker': h}
for r, h in assignment
]

return r_conv.new(reference_new), hypothesis

reference_new = {k: [] for k in _keys(hypothesis)}
# convert to list and copy
reference = {k: list(v) for k, v in _items(reference)}
Expand Down
Loading

0 comments on commit 0e022b0

Please sign in to comment.