This repository has been archived by the owner on Jul 16, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MidiLSTM.py
216 lines (201 loc) · 10.3 KB
/
MidiLSTM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import music21
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
import numpy as np
import random
class MidiLSTM:
"""
An attempt at using deep LSTM network to learn from midi files and generates music thereafter.
This is based on notes alone and is intended for combination with markov chain models for duration.
"""
EOS = -1 # End of Song symbol
rest = 0 # Rest symbol
last_note = 129 # last note midi value (+1 so rest is 0)
# Composers dictionary (for comparison, generation, etc)
composers = {0: 'bach', 1: 'beethoven', 2: 'essenFolksong', 3: 'monteverdi',
4: 'oneills1850', 5: 'palestrina', 6: 'ryansMammoth', 7: 'trecento'}
vocabulary_size = 129 # 128 midi values + rest (originally 82 if we want to use previous weights)
input_dim = 1 # Only one note per time step
def __init__(self, composer_index, timesteps=64, hidden_dim=256, batch_size=128):
"""
Initializes a new agent that produces single notes based on LSTM implementation
and adds duration based on Markov Chain model.
:param composer_index: Composer index to associate with this agent
:param timesteps: How many timesteps are in the LSTM model (default 64)
:param hidden_dim: How big is the hidden dimension for the LSTM model (default 256)
:param batch_size: How many samples are required for an update (default 128)
"""
if composer_index in MidiLSTM.composers.keys():
self.composer_idx = composer_index
self.hidden_dim = hidden_dim # Hidden dimensionality between time steps
self.timesteps = timesteps # How many time steps are used for prediction
self.batch_size = batch_size # How many samples are required for update
# Create the model (Sequential model)
self.model = Sequential()
# LSTM layer, returns a matching sequence of vectors of dimension hidden_dim
self.model.add(LSTM(self.hidden_dim, input_shape=(self.timesteps, MidiLSTM.input_dim)))
# Dropout layer (forget some samples, prevents overfitting)
self.model.add(Dropout(0.15))
# Final layer, reduce to tag-one-hot-encoding and softmax over it
self.model.add(Dense(MidiLSTM.vocabulary_size, activation='softmax'))
# Compile with categorical cross-entropy loss and ada-delta optimizer
self.model.compile(loss='categorical_crossentropy', optimizer='adam')
# Checkpoint (for loading\saving weights after training)
filepath = "weights-composer-" + str(composer_index) + "-{epoch:02d}-{loss:.4f}.hdf5"
self.callbacks_list = [ModelCheckpoint(filepath, monitor='loss', verbose=1,
save_best_only=True, mode='min')]
else:
raise Exception("Cannot find specified composer in composer data")
def load_weights(self, weight_file):
"""
Loads given weight file for the LSTM
"""
self.model.load_weights(weight_file)
def train(self, epochs=100, n=20):
"""
Trains the LSTM network on #n samples (chosen randomly) of the instantiated composer for given epochs.
"""
x_raw, raw_length = MidiLSTM._get_notes_from_composer(self.composer_idx, n) # Get data
x_train = list()
y_train = list()
for sample in x_raw: # Refactor training data to sequences of #timesteps
x, y = MidiLSTM._sample2sequences(sample, self.timesteps)
x_train += x
y_train += y
x, y = self._reshape_inputs(x_train, y_train)
self.model.fit(x, y, nb_epoch=epochs, batch_size=self.batch_size, callbacks=self.callbacks_list)
def train_single(self, stream, epochs=2):
"""
Trains the LSTM network on given stream for a given number of epochs
:param stream: music21 stream object
:param epochs: number of epochs to train (default 2)
"""
sample = MidiLSTM.stream2inputs(stream) # Convert to valid inputs
x, y = MidiLSTM._sample2sequences(sample, self.timesteps) # Convert to sequences
if len(x) > 0 and len(y) > 0: # Sanity check (need some input after conversion...)
x, y = self._reshape_inputs(x, y) # reshape for LSTM inputs
self.model.fit(x, y, nb_epoch=epochs, verbose=0) # Fit data
def generate(self, sequence_length=None, iterations=200):
"""
Generates a music sequence of given length (if None is given (default), creates a sequence of
length 2*timestamps. Number of iterations is how far to roll the information inside the LSTM
:return: List of notes (midi values)
"""
if sequence_length is None: # Use #timesteps if no sequence length is given
sequence_length = 2 * self.timesteps
if iterations - sequence_length < sequence_length:
iterations += sequence_length # Ensure we don't include the random initializations in the output
# Generate random starting pattern
pattern = np.random.randint(MidiLSTM.rest, MidiLSTM.last_note, size=self.timesteps).tolist()
result = pattern # Save the result here
for _ in range(iterations): # Iterate enough times
x = np.array(pattern, dtype='float32') # Reshape the data
x = np.reshape(x, (1, self.timesteps, MidiLSTM.input_dim)) # One sample x timesteps x input_dim
prediction = np.argmax(self.model.predict(x, verbose=0)) # Most matching midi value
pattern.append(prediction) # Append to pattern
pattern = pattern[1:] # Truncate pattern to fit timesteps
result.append(prediction) # Append to result
if len(result) > sequence_length: # Truncate to result if needed
result = result[1:sequence_length]
return result
def _reshape_inputs(self, x, y):
true_x = np.array(x, dtype='float32') # Create an array of floats from the data
true_x = np.reshape(true_x, (len(x), self.timesteps, MidiLSTM.input_dim)) # Reshape to match LSTM input
true_y = np_utils.to_categorical(y, nb_classes=MidiLSTM.vocabulary_size) # One-hot encode the tags
return true_x, true_y
@staticmethod
def _sample2sequences(sample, timesteps):
"""
Converts a given sample to a list of inputs and expected outputs.
Divides the sample into inputs of size timesteps and the expected output is the following input.
:returns: A list of inputs and a list of expected outputs
"""
x = list()
y = list()
sample_len = len(sample)
for i in range(sample_len - timesteps):
seq_in = sample[i:i + timesteps]
seq_out = sample[i + timesteps]
x.append(seq_in)
y.append(seq_out)
return x, y
@staticmethod
def _to_midi_values(m21obj):
"""
Converts a music21 object to it's MIDI value + 1 (so 0 is rest).
Only considers first note a chord.
"""
if m21obj.isNote:
return m21obj.pitch.midi + 1 # +1 so that 0 is rest
elif m21obj.isChord: # Only consider first note in chord
if len(m21obj.pitches) > 0:
return m21obj.pitches[0].midi + 1 # +1 so that 0 is rest
else: # Invalid
return MidiLSTM.EOS
elif m21obj.isRest:
return MidiLSTM.rest
return MidiLSTM.EOS
@staticmethod
def stream2inputs(stream):
"""
Converts a music21 stream to a list of midi values that matches the LSTM input
"""
inputs = list(stream.sorted.flat.getElementsByClass(["Note", "Chord", "Rest"]))
return [MidiLSTM._to_midi_values(x) for x in inputs]
@staticmethod
def _get_notes_from_composer(composer_index, n=20):
"""
Gets a list of notes from the composer index given.
Samples n samples randomly and extracts the midi values from that.
If n is None, uses all the data.
:param composer_index: Matches the composers dictionary in this class
:param n: Number of samples
:return: List of lists of notes, and how many samples there are in total
"""
if composer_index in MidiLSTM.composers.keys(): # Sanity check
files = music21.corpus.getComposer(MidiLSTM.composers[composer_index]) # File list
if n is not None:
files = random.sample(files, n) # Sample n samples
data_raw = list() # Data will be kept here
for f in files:
try:
mstream = music21.corpus.parse(f) # Attempt to parse
data_raw.append(MidiLSTM.stream2inputs(mstream)) # Convert to midi list and save
except:
continue # Skip invalid attempts
return data_raw, len(data_raw)
else:
raise Exception("Composer index not found")
@staticmethod
def to_midi_stream(notes, durations=None):
"""
Converts a list of notes (and optional durations) to a playable midi track.
If no durations are given, all notes will have 1 quarter length.
:param notes: List of notes (midi values)
:param durations: List of durations (quarter lengths), where len(durations) = len(notes)
:return: Stream for the file
"""
stream = music21.stream.Stream()
piano = music21.stream.Part()
piano.insert(music21.instrument.Piano())
if durations is None:
durations = [1.0] * len(notes)
for m, d in zip(notes, durations):
note = music21.note.Note(m - 1) if m > 0 else music21.note.Rest()
note.duration.quarterLength = d
piano.append(note)
stream.append(piano)
# stream.show('midi')
return stream
# Uncomment to see in action (might have bad results :))
'''
bach = MidiLSTM(7)
bach.load_weights("weights\weights-composer-7-43-2.4315.hdf5")
bach.train(epochs=100, n=4)
result = bach.generate()
print(result)
MidiLSTM.to_midi_stream(result)
'''
# EOF