-
Notifications
You must be signed in to change notification settings - Fork 1
/
lamb_explorer_and_tutorial.py
246 lines (178 loc) · 7.68 KB
/
lamb_explorer_and_tutorial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# -*- coding: utf-8 -*-
"""LAMB_Explorer_and_Tutorial.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1sXM5OWZXkslOocmF4PzZxZsiPYV8g00Z
# Los Angeles MIDI MetaBase (LAMB) Explorer and Tutorial Notebook (ver. 1.0)
***
## Kilo-Scale MIDI MIR Meta Database
## https://github.com/asigalov61/Los-Angeles-MIDI-Metabase
***
### Project Los Angeles
### Tegridy Code 2021
***
# Setup Environment
"""
# Install tegridy-tools
!git clone https://github.com/asigalov61/tegridy-tools
# Import all needed modules
os.chdir('/content/')
print('Loading needed modules. Please wait...')
import sys
import os
import copy
import secrets
import random
import difflib
import json
from pathlib import Path
import pickle
os.chdir('/content/')
if not os.path.exists('/content/Dataset'):
os.makedirs('/content/Dataset')
if not os.path.exists('/content/Output'):
os.makedirs('/content/Output')
os.chdir('/content/tegridy-tools/tegridy-tools')
import TMIDI
import TMIDIX
import MIDI
import HaystackSearch
import tqdm
os.chdir('/content/')
print('Loading complete. Enjoy! :)')
"""# Download LAKH MIDI Dataset"""
# Commented out IPython magic to ensure Python compatibility.
# Download and untar LAKH MIDI Dataset
# %cd /content/
!wget http://hog.ee.columbia.edu/craffel/lmd/lmd_full.tar.gz
!tar -xvf lmd_full.tar.gz
# %cd /content/
# Commented out IPython magic to ensure Python compatibility.
# Create/save list of dataset files and count all of them
print('Loading MIDI files...')
print('This may take a while on a large dataset in particular.')
# %cd /content/
dataset_addr = "/content/lmd_full"
os.chdir(dataset_addr)
filez = list()
for (dirpath, dirnames, filenames) in os.walk(dataset_addr):
filez += [os.path.join(dirpath, file) for file in filenames]
print('=' * 70)
filez_l = len(filez)
print(len(filez))
TMIDIX.Tegridy_Any_Pickle_File_Writer(filez, '/content/filez.pickle')
"""# LAMB Metabase"""
# Commented out IPython magic to ensure Python compatibility.
# Download and unzip Los Angeles MIDI Metabase (LAMB)
# %cd /content/
print('=' * 70)
print('Downloading pre-trained dataset-model...Please wait...')
print('=' * 70)
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.001'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.002'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.003'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.004'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.005'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.006'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.007'
!wget 'https://github.com/asigalov61/Los-Angeles-MIDI-Metabase/raw/main/Metabase/Los-Angeles-MIDI-Metabase-210000.zip.008'
!cat Los-Angeles-MIDI-Metabase-210000.zip* > Los-Angeles-MIDI-Metabase-210000.zip
print('=' * 70)
!unzip -j Los-Angeles-MIDI-Metabase-210000.zip
print('=' * 70)
print('Done! Enjoy! :)')
print('=' * 70)
# %cd /content/
# Load the LAMB pickle file
LAMB = TMIDIX.Tegridy_Any_Pickle_File_Reader('/content/Los-Angeles-MIDI-Metabase-210000')
print('Done! :)')
"""# LAMB use example code/tutorial"""
# Now we are going to have some fun... :)
# In the following excercise we are going to use LAMB to classify any source MIDI.
# We are going to make a simple, yet powerful MIDI classifier (80%-90% accuracy)
# Let's begin...
# Load some MIDI file (absolutely any will do)
song = TMIDIX.Optimus_MIDI_TXT_Processor('/content/tegridy-tools/tegridy-tools/seed-lyrics.mid',
MIDI_channel=-1,
MIDI_patch=range(0, 127))
# Compute some basic stats about it...
song_len = len(song[2])
song_start_times_avg = int(sum([y[1] for y in song[2]]) / song_len)
song_durations_avg = int(sum([y[2] for y in song[2]]) / song_len)
song_channels_avg = int(sum([y[3] for y in song[2]]) / song_len)
song_pitches_avg = int(sum([y[4] for y in song[2]]) / song_len)
song_velocities_avg = int(sum([y[5] for y in song[2]]) / song_len)
# ...and create a matching MIDI signature to compare against the dataset
Source_MIDI_Signature = [song_len, song_start_times_avg,
song_durations_avg, song_channels_avg,
song_pitches_avg,
song_velocities_avg]
# Check the resuilts. They should have similar structure
# There should be 5 entries if you did everything right
print('Source MIDI Signature:', Source_MIDI_Signature)
# Here is a simple MIDI classifier/code...
sigs_match_ratings = []
sig_match_rating2 = []
for i in tqdm.tqdm(range(1, len(LAMB[2][1]))):
# Computing signatures for each dataset file...
Dataset_MIDI_Signature = [int(LAMB[2][1][i][0]),
int(LAMB[2][1][i][1] / (LAMB[2][1][i][0]+1)),
int(LAMB[2][1][i][2] / (LAMB[2][1][i][0]+1)),
int(LAMB[2][1][i][3] / (LAMB[2][1][i][0]+1)),
int(LAMB[2][1][i][4] / (LAMB[2][1][i][0]+1)),
int(LAMB[2][1][i][5] / (LAMB[2][1][i][0]+1))]
# Computing ratings
sig_match_rating = difflib.SequenceMatcher(None, ' '.join(map(str, Source_MIDI_Signature[2:])), ' '.join(map(str, Dataset_MIDI_Signature[2:]))).ratio()
sigs_match_ratings.append(sig_match_rating)
# Computing some info
FNAME = LAMB[2][1][sigs_match_ratings.index(max(sigs_match_ratings))+1]
INFO = LAMB[2][0][sigs_match_ratings.index(max(sigs_match_ratings))+1][1:3]
# Info printout...
print(chr(10))
print(FNAME)
print(INFO[0])
print(INFO[1][0])
print(INFO[1][1])
print(sigs_match_ratings.index(max(sigs_match_ratings))+1)
print(max(sigs_match_ratings))
# Simple code to search LAKH MIDI Dataset directory for MIDI matches
# If nothing found, MIDI is most likely NOT from LAKH MIDI Dataset
path = ''
print('Searching LAKH for:', INFO[0])
from pathlib import Path
for path in Path('/content/lmd_full').rglob(INFO[0]):
print('File found!!!')
print('The file below is the best possible match with the source MIDI:')
print(path)
if len(str(path)) == 0:
print('Could not locate the file. Check the code and the dataset.')
"""# Plot and Play your MIDI here"""
# stuff for listening and plotting the results
!apt install fluidsynth #Pip does not work for some reason. Only apt works
!pip install midi2audio
!pip install pretty_midi
# Plot and Play your MIDI file here...
# Full path to any MIDI file...
MIDI_FILE_TO_PLAY = '/content/tegridy-tools/tegridy-tools/seed3.mid'
# Code...
from midi2audio import FluidSynth
from IPython.display import display, Javascript, HTML, Audio
import pretty_midi
import librosa.display
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import numpy as np
os.chdir('/content/')
print('Synthesizing the last output MIDI... ')
fname = str(MIDI_FILE_TO_PLAY)
fn = os.path.basename(MIDI_FILE_TO_PLAY)
fn1 = fn.split('.')[0]
print('Plotting the composition. Please wait...')
pm = pretty_midi.PrettyMIDI(fname)
# Retrieve piano roll of the MIDI file
piano_roll = pm.get_piano_roll()
plt.figure(figsize=(14, 5))
librosa.display.specshow(piano_roll, x_axis='time', y_axis='cqt_note', fmin=1, hop_length=160, sr=16000, cmap=plt.cm.hot)
plt.title(fn1)
FluidSynth("/usr/share/sounds/sf2/FluidR3_GM.sf2", 16000).midi_to_audio(str(fname), str('/content/composition' + '.wav'))
Audio(str('/content/composition' + '.wav'), rate=16000)