forked from CPJKU/madmom
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSuperFluxNN
executable file
·119 lines (93 loc) · 4.35 KB
/
SuperFluxNN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python
# encoding: utf-8
"""
SuperFlux with neural network based peak picking onset detection algorithm.
"""
from __future__ import absolute_import, division, print_function
import argparse
import numpy as np
from madmom.audio import (FramedSignalProcessor,
LogarithmicSpectrogramProcessor, SignalProcessor,
SpectrogramDifferenceProcessor)
from madmom.audio.filters import FilterbankProcessor, LogarithmicFilterbank
from madmom.features import (ActivationsProcessor, OnsetPeakPickingProcessor,
SpectralOnsetProcessor)
from madmom.io import write_onsets
from madmom.ml.nn import NeuralNetworkEnsemble
from madmom.models import ONSETS_BRNN_PP
from madmom.processors import IOProcessor, io_arguments
def main():
"""SuperFluxNN"""
# define parser
p = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, description='''
The SuperFluxNN program detects all onsets in an audio file with the
SuperFlux algorithm with neural network based peak-picking as described in:
"Enhanced Peak Picking for Onset Detection with Recurrent Neural Networks"
Sebastian Böck, Jan Schlüter and Gerhard Widmer.
Proceedings of the 6th International Workshop on Machine Learning and
Music (MML), 2013.
Please note that this implementation uses 100 frames per second (instead
of 200), because it is faster and produces highly comparable results.
This program can be run in 'single' file mode to process a single audio
file and write the detected onsets to STDOUT or the given output file.
$ SuperFluxNN single INFILE [-o OUTFILE]
If multiple audio files should be processed, the program can also be run
in 'batch' mode to save the detected onsets to files with the given suffix.
$ SuperFluxNN batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES
If no output directory is given, the program writes the files with the
detected onsets to the same location as the audio files.
The 'pickle' mode can be used to store the used parameters to be able to
exactly reproduce experiments.
''')
# version
p.add_argument('--version', action='version', version='SuperFluxNN.2016')
# input/output options
io_arguments(p, output_suffix='.onsets.txt')
ActivationsProcessor.add_arguments(p)
# add signal processing arguments
SignalProcessor.add_arguments(p, norm=False, gain=0)
FramedSignalProcessor.add_arguments(p)
FilterbankProcessor.add_arguments(p, num_bands=24, fmin=30, fmax=17000,
norm_filters=False)
LogarithmicSpectrogramProcessor.add_arguments(p, log=True, mul=1, add=1)
SpectrogramDifferenceProcessor.add_arguments(p, diff_ratio=0.5,
diff_max_bins=3,
positive_diffs=True)
# peak picking arguments
OnsetPeakPickingProcessor.add_arguments(p, threshold=0.4, smooth=0.07,
combine=0.04, delay=0)
# parse arguments
args = p.parse_args()
# set immutable defaults
args.num_channels = 1
args.fps = 100
args.onset_method = 'superflux'
args.filterbank = LogarithmicFilterbank
# print arguments
if args.verbose:
print(args)
# input processor
if args.load:
# load the activations from file
in_processor = ActivationsProcessor(mode='r', **vars(args))
else:
in_processor = SpectralOnsetProcessor(**vars(args))
# output processor
if args.save:
# save the Onset activations to file
out_processor = ActivationsProcessor(mode='w', **vars(args))
else:
# process everything with multiple RNNs and average the predictions
rnn = NeuralNetworkEnsemble.load(ONSETS_BRNN_PP, **vars(args))
# detect the onsets and output them
pp = OnsetPeakPickingProcessor(**vars(args))
# Note: we need np.atleast_2d and np.transpose before the RNN, since
# it expects the data in 2D (1D means framewise processing)
out_processor = [np.atleast_2d, np.transpose, rnn, pp, write_onsets]
# create an IOProcessor
processor = IOProcessor(in_processor, out_processor)
# and call the processing function
args.func(processor, **vars(args))
if __name__ == '__main__':
main()