-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparse_xml.py
63 lines (51 loc) · 2.09 KB
/
parse_xml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import argparse
import xml.etree.ElementTree as ET
from pyannote.core import Annotation, Segment
from pathlib import Path
"""
parses a Pangloss XML annotation file and
returns the annotations in pyannote.core.Annotation type
example of S tag :
<S id='S1' who="Questions">
<AUDIO start='0.000' end='1.900'/>
<FORM kindOf='phono'>Quelles sont les langues que tu connais ?</FORM>
</S>
"""
def count_speakers(xml_file):
"""
Counts the number of unique speakers in the given XML file.
Returns 0 if there is no 'who' attribute or if the 'who' attribute is empty.
TODO : check problem who='A' and 'B' in some files
"""
tree = ET.parse(xml_file)
root = tree.getroot()
speakers = set() # A set to keep track of unique speakers
for segment in root.findall('S'):
speaker = segment.get('who')
if speaker and speaker.strip(): # Check if 'who' exists and is not empty or just whitespace
speakers.add(speaker) # Add speaker to the set
print(speakers)
return len(speakers) if speakers else 0
def parse_xml(xml_file):
tree = ET.parse(xml_file)
root = tree.getroot()
annotations = Annotation()
for segment in root.findall('S'):
speaker = segment.get('who')
audio = segment.find('AUDIO')
start_time = float(audio.get('start'))
end_time = float(audio.get('end'))
# add Segment to the Annotation object
annotations[Segment(start_time, end_time)] = speaker
return annotations
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Parse a Pangloss XML annotation file. Converts pangloss xml to rttm format, supported by pyannote-audio.')
parser.add_argument('xml_file', type=str, help='Path to the Pangloss XML annotation file')
args = parser.parse_args()
# Parse the XML and return annotations in pyannote.core.Annotation format
annotations = parse_xml(args.xml_file)
rttm_path = Path(args.xml_file).with_suffix('.rttm')
with rttm_path.open("w") as rttm:
annotations.write_rttm(rttm)
#print(annotations)
print(count_speakers(args.xml_file))