-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
75 lines (52 loc) · 2.96 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import librosa
from src import spectrogram_operations, error_metrics, config_env, pca_recomposition, pca_decomposition
import soundfile as sf
import os
#STFT and basis parameterizations
repo_root = os.path.abspath(os.path.join(__file__, ".."))
env = os.path.abspath(os.path.join(repo_root, "environmentVars.ini"))
config_env.load_ini_env(env)
FRAME_SIZE = int(os.environ.get("frame_size"))
SAMPLE_RATE = int(os.environ.get("sample_rate"))
NUM_COMPONENTS = int(os.environ.get("num_pca_components"))
FRAMES_PER_SEGMENT = int(os.environ.get("frames_per_segment"))
HOP_SIZE = int(FRAME_SIZE/2)
#Things you might want to do
#1. perform pca
pca_decomposition.generate_pca_bases(plot_bases=False, save_bases_as_audio=False)
#2. visualize bases
# spectrogram_operations.plot_pca_bases()
#load resource and obtain log power spectrum
file = repo_root + "songname.wav"
songname = file.split("/")[-1].split(".")[0]
song, _ = librosa.load(file)
#3. obtain log power spectrum
Log_power, phase_spectrogram = spectrogram_operations.log_power_spectrogram(song) # keep phase for time recovery
spectrogram_operations.plot_spectrogram(Log_power, SAMPLE_RATE, HOP_SIZE, "Log Power Spectrogram")
#4. project the song onto the bases and recompose it to the spectrogram shape
reconstructed_spectrogram = pca_recomposition.recompose_spectrogram(song, plot_spectrogram=True)
#5. reshape the log power spectrum so it can be numerically compared against the reconstruction
Log_power = spectrogram_operations.truncate_spectrogram(Log_power)
Log_power = spectrogram_operations.array_to_spectrogram_shape(Log_power)
#Calculate difference
print("RMSE: " + str(error_metrics.rmse(reconstructed_spectrogram, Log_power)))
print("MAE: " + str(error_metrics.mae(reconstructed_spectrogram, Log_power)))
#6. convert spectrogram back to sound
phase_spectrogram = spectrogram_operations.truncate_spectrogram(phase_spectrogram) #reshape phase spectrogram
phase_spectrogram = spectrogram_operations.array_to_spectrogram_shape(phase_spectrogram)
#convert original log power spectrogram to audio
# spectrogram = librosa.stft(song, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
# original_audio = librosa.istft(spectrogram, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
# sf.write(repo_root + "/Reconstructions/" + #encode environment configuration info in file name
# songname + "_nc" + str(NUM_COMPONENTS)
# + "_fs" + str(FRAME_SIZE)
# + "_FPS" + str(FRAMES_PER_SEGMENT)
# + "_istft.wav", original_audio, samplerate=SAMPLE_RATE)
#reconstructed audio
reconstructed_spectrogram = spectrogram_operations.LogPowerPhase_to_ComplexSpectrogram(reconstructed_spectrogram, phase_spectrogram)
reconstructed_audio = librosa.istft(reconstructed_spectrogram, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
sf.write(repo_root + "/Reconstructions/" +
songname + "_nc" + str(NUM_COMPONENTS)
+ "_fs" + str(FRAME_SIZE)
+ "_FPS" + str(FRAMES_PER_SEGMENT)
+ "_reconstructed.wav", reconstructed_audio, samplerate=SAMPLE_RATE)