-
Notifications
You must be signed in to change notification settings - Fork 0
/
stft.py
76 lines (62 loc) · 2.5 KB
/
stft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import librosa
import tensorflow as tf
import scipy
import scipy.io.wavfile as wavf
import numpy as np
def stft_tf(wav, win_length, hop_length, n_fft, window='hann', mode='REFLECT'):
'''
implement stft in tensorflow
the output is same as librosa.stft with center=True in 10*-6 error
link: https://github.com/zhang-wy15/stft_from_librosa_to_tensorflow
'''
# By default, use the entire frame
if win_length is None:
win_length = n_fft
# Set the default hop, if it's not already specified
if hop_length is None:
hop_length = int(win_length // 4)
window = scipy.signal.get_window(window, win_length, fftbins=True)
# Pad the window out to n_fft size
window = np.pad(window,((n_fft - win_length) // 2, (n_fft - win_length) // 2), mode='constant', constant_values=(0, 0))
# Reshape so that the window can be broadcast
# We don't need this
# window = window.reshape((-1,1))
# Pad the time series so that frames are centered
center = True
if center:
wav = tf.pad(wav, [[n_fft // 2, n_fft // 2]], mode=mode)
# Window the time series.
f = tf.contrib.signal.frame(wav, n_fft, hop_length, pad_end=False)
# fft method 1: divide block and caculate fft separately
# fft method 2: whole frame to tf.spectral.fft
# result are same, but method 2 is faster
# method 1:
'''
linear = tf.zeros((f.shape[0],int(1 + n_fft // 2)))
MAX_MEM_BLOCK = 2**8 * 2**10
itemsieze = 8
n_columns = int(MAX_MEM_BLOCK / (int(1 + n_fft // 2) * itemsieze))
for bl_s in range(0, linear.shape[0], n_columns):
bl_t = min(bl_s + n_columns, linear.shape[0])
temp = tf.spectral.fft(tf.to_complex64(f[bl_s:bl_t,:] * window))[:,:linear.shape[1]]
print(temp)
if not bl_s:
linear_spect = temp
else:
linear_spect = tf.concat([linear_spect, temp],axis=0)
'''
# method 2:
linear = tf.spectral.fft(tf.to_complex64(f * window))[:,:int(1 + n_fft // 2)]
return linear
if __name__ == "__main__":
_, wav = wavf.read('./00001.wav')
wav = np.asarray(wav / 2**15)
win_length = 400
hop_length = 160
n_fft = 512
linear_tf = stft_tf(wav, win_length=win_length, hop_length=hop_length, n_fft=n_fft)
linear_lb = librosa.core.stft(wav, win_length=win_length, hop_length=hop_length, n_fft=n_fft).T
with tf.Session() as sess:
print(sess.run(linear_tf))
print(linear_lb)
print(np.mean(np.abs(sess.run(linear_tf) - linear_lb)))