Skip to content

Commit

Permalink
Fix filenames for synthesized mixtures (TUT Rare sound events 2017 de…
Browse files Browse the repository at this point in the history
…velopment dataset)
  • Loading branch information
toni-heittola committed Mar 22, 2017
1 parent a91c4dc commit 29fb91a
Showing 1 changed file with 19 additions and 17 deletions.
36 changes: 19 additions & 17 deletions dcase_framework/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1914,8 +1914,8 @@ def synthesize(self):
}
if item['event_present']:
event_list_item['event_label'] = item['event_class']
event_list_item['event_onset'] = float(item['event_offset_seconds'])
event_list_item['event_offset'] = float(item['event_offset_seconds'] + item['event_length_seconds'])
event_list_item['event_onset'] = float(item['event_start_in_mixture_seconds'])
event_list_item['event_offset'] = float(item['event_start_in_mixture_seconds'] + item['event_length_seconds'])

event_list.append(MetaDataItem(event_list_item))
event_list.save()
Expand Down Expand Up @@ -2026,20 +2026,20 @@ def rmse(y):
# For recipes, we got to provide amplitude scaling factors instead of SNRs: the latter are more ambiguous
# so, go through files, measure levels, calculate scaling factors
mixture_recipes = ParameterListFile()
for mixture_id, (bg, event_presence_flag, event_offset_seconds, ebr, event_instance_id) in tqdm(
for mixture_id, (bg, event_presence_flag, event_start_in_mixture_seconds, ebr, event_instance_id) in tqdm(
enumerate(zip(bgs, event_presence_flags, event_offsets_seconds, target_ebrs, event_instance_ids)),
desc="{0: <25s}".format('Generate recipe'),
file=sys.stdout,
leave=False,
total=len(bgs)):

# Read the bgs and events, measure their energies, find amplitude scaling factors
mixture_recipe = DottedDict({
mixture_recipe = {
'bg_path': bg['filepath'],
'bg_classname': bg['classname'],
'event_present': bool(event_presence_flag),
'ebr': float(ebr)
})
}

if event_presence_flag:
# We have an event assigned
Expand All @@ -2065,7 +2065,7 @@ def rmse(y):
event_audio = event_audio[segment_start_samples:segment_end_samples]

# Let's calculate the levels of bgs also at the location of the event only
eventful_part_of_bg = bg_audio[int(event_offset_seconds * fs):int(event_offset_seconds * fs + len(event_audio))]
eventful_part_of_bg = bg_audio[int(event_start_in_mixture_seconds * fs):int(event_start_in_mixture_seconds * fs + len(event_audio))]

if eventful_part_of_bg.shape[0] == 0:
embed()
Expand All @@ -2075,7 +2075,7 @@ def rmse(y):
# Store information
mixture_recipe['event_path'] = events[int(event_instance_id)]['audio_filepath']
mixture_recipe['event_class'] = events[int(event_instance_id)]['classname']
mixture_recipe['event_offset_seconds'] = float(event_offset_seconds)
mixture_recipe['event_start_in_mixture_seconds'] = float(event_start_in_mixture_seconds)
mixture_recipe['event_length_seconds'] = float(events[int(event_instance_id)]['length_seconds'])
mixture_recipe['scaling_factor'] = float(scaling_factor)
mixture_recipe['segment_start_seconds'] = events[int(event_instance_id)]['segment'][0]
Expand All @@ -2088,9 +2088,11 @@ def rmse(y):
# Generate mixture annotation
if event_presence_flag:
mixture_recipe['annotation_string'] = \
mixture_recipe['mixture_audio_filename'] + '\t' + mixture_recipe['event_class'] + '\t' + \
str(mixture_recipe['event_offset_seconds']) + '\t' + \
str(mixture_recipe['event_offset_seconds'] + mixture_recipe['event_length_seconds'])
mixture_recipe['mixture_audio_filename'] + '\t' + \
"{0:.14f}".format(mixture_recipe['event_start_in_mixture_seconds']) + '\t' + \
"{0:.14f}".format(mixture_recipe['event_start_in_mixture_seconds'] + mixture_recipe['event_length_seconds']) + '\t' + \
mixture_recipe['event_class']

else:
mixture_recipe['annotation_string'] = mixture_recipe['mixture_audio_filename'] + '\t' + 'None' + '\t0\t30'

Expand Down Expand Up @@ -2129,29 +2131,29 @@ def _synthesize_mixture(self, mixture_recipe, params, background_audio_path, eve
segment_end_samples = int(mixture_recipe['segment_end_seconds'] * params['mixture']['fs'])
event_audio_data = event_audio_data[segment_start_samples:segment_end_samples]

event_offset_samples = int(mixture_recipe['event_offset_seconds'] * params['mixture']['fs'])
event_start_in_mixture_samples = int(mixture_recipe['event_start_in_mixture_seconds'] * params['mixture']['fs'])
scaling_factor = mixture_recipe['scaling_factor']

# Mix event into background audio
mixture = self._mix(bg_audio_data=bg_audio_data,
event_audio_data=event_audio_data,
event_offset_samples=event_offset_samples,
event_start_in_mixture_samples=event_start_in_mixture_samples,
scaling_factor=scaling_factor,
magic_anticlipping_factor=params['mixture']['anticlipping_factor'])
else:
mixture = params['mixture']['anticlipping_factor'] * bg_audio_data

return mixture

def _mix(self, bg_audio_data, event_audio_data, event_offset_samples, scaling_factor, magic_anticlipping_factor):
def _mix(self, bg_audio_data, event_audio_data, event_start_in_mixture_samples, scaling_factor, magic_anticlipping_factor):
"""Mix numpy arrays of background and event audio (mono, non-matching lengths supported, sampling frequency
better be the same, no operation in terms of seconds is performed though)
Parameters
----------
bg_audio_data : numpy.array
event_audio_data : numpy.array
event_offset_samples : float
event_start_in_mixture_samples : float
scaling_factor : float
magic_anticlipping_factor : float
Expand All @@ -2169,17 +2171,17 @@ def _mix(self, bg_audio_data, event_audio_data, event_offset_samples, scaling_fa

# Check that the offset is not too long
longest_possible_offset = len(bg_audio_data) - len(event_audio_data)
if event_offset_samples > longest_possible_offset:
if event_start_in_mixture_samples > longest_possible_offset:
message = '{name}: Wrongly generated event offset: event tries to go outside the boundaries of the bg.'.format(name=self.__class__.__name__)
self.logger.exception(message)
raise AssertionError(message)

# Measure how much to pad from the right
tail_length = len(bg_audio_data) - len(event_audio_data) - event_offset_samples
tail_length = len(bg_audio_data) - len(event_audio_data) - event_start_in_mixture_samples

# Pad zeros at the beginning of event signal
padded_event = numpy.pad(event_audio_data,
pad_width=((event_offset_samples, tail_length)),
pad_width=((event_start_in_mixture_samples, tail_length)),
mode='constant',
constant_values=0)

Expand Down

0 comments on commit 29fb91a

Please sign in to comment.