soundListener.py

"""
A helper module to visualize the image.pkl s generated by the DeepSearch algorithm.
The .jpg images in the results is not generated by this code.
"""
import os
import pickle
import matplotlib.pyplot as plt
import librosa
import scipy.io.wavfile
from scipy import signal
import numpy as np
import soundfile as sf


def select_directory(view_DSbatched):
	if view_DSbatched:
		out_most = "./DSBatched/"
		cut = -7
	else:
		out_most = "./Results/"
		cut = -1
	directory_list = [directory for directory in os.listdir(out_most)]
	if len(directory_list) == 0:
		print("No directory found")
		return
	for dir_number in range(len(directory_list)):
		print("{0:02d}\t{1}".format(dir_number, directory_list[dir_number][:cut].replace("_",":")))
	selection = int(input("Type in index of the directory\n>>> "))
	return(out_most + directory_list[selection])
	
def load_pkl(path):
	pkl_list = [file for file in os.listdir(path) if file[-3:]=="pkl" and file[0:2] != "da"]
	if len(pkl_list) == 0:
		print("No .pkl file found")
		return
	for pkl_number in range(len(pkl_list)):
		print("{0:02d}\t{1}".format(pkl_number, pkl_list[pkl_number][:-4]))
	selections=[int(x) for x in input("Type in indices of files, each separated by spacing\n>>> ").split()]
	return [path+"/"+pkl_list[selection] for selection in selections]
 
def read_wave_amplitude(wav, label):
    path = "audios/" + label + "/" + str(wav) + ".wav"
    input_data = scipy.io.wavfile.read(path)
    audio = input_data[1]
    sr = input_data[0]
    f, t, Sxx = signal.spectrogram((np.mean(audio, axis=1)), sr, mode='magnitude')
    #print(Sxx.shape)
    return Sxx, f, t

def read_wave(wav, label):
    path = "audios/" + label + "/" + str(wav) + ".wav"
    audio, sr =  librosa.load(path, sr = None)
    n = len(audio)
    n_fft = 204
    audio_pad = librosa.util.fix_length(audio, n + n_fft // 2)
    stft = librosa.stft(audio_pad, n_fft = n_fft)
    _, phase = librosa.magphase(stft)
    return n, phase, sr

def spec2sig(spec, name, sr, n):
    #_, audio = signal.istft(spec)
    audio = librosa.spectrum.istft(spec, length = n)
    newDir = './Audio_Results/audio_from_spec'
    if not os.path.exists(newDir):
        os.makedirs(newDir)        
    sf.write(f"{newDir}/{name}.wav", audio, sr)


if __name__ == "__main__":
	view_DSbatched = bool(int(input("0: View Organized Results\n1: View DSBatched\n>>> ")))
	directory = select_directory(view_DSbatched)
	files = load_pkl(directory)
	imgs = []
	phases = []
	labels = []
	inds = []
	ns = []
	srs = []
	for file_path in files:
		with open(file_path,'rb') as file:
			temp = pickle.load(file)
			size = temp.shape[1:]
			imgs.append(temp.reshape(size))
		name = file_path.split("/")[-1]
		label, wav = name[0:-10], int(name[-9:-4])
		n, phase, sr = read_wave(wav, label)
		ns.append(n)
		phases.append(phase)
		labels.append(label)
		inds.append(wav)
		srs.append(sr)
	#imgs = [pickle.load(open(file_path, 'rb')).reshape(pickle.load(open(file_path, 'rb')).shape[1:]) for file_path in files]
	for i, image in enumerate(imgs):
		amp = librosa.db_to_amplitude(image*90-60)
		spec = amp*phases[i]
		spec2sig(spec, f"{labels[i]}_{inds[i]}", srs[i], ns[i])