forked from litagin02/Style-Bert-VITS2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
spec_gen.py
87 lines (80 loc) · 2.68 KB
/
spec_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import torch
from tqdm import tqdm
from multiprocessing import Pool
from mel_processing import spectrogram_torch, mel_spectrogram_torch
from utils import load_wav_to_torch
class AudioProcessor:
def __init__(
self,
max_wav_value,
use_mel_spec_posterior,
filter_length,
n_mel_channels,
sampling_rate,
hop_length,
win_length,
mel_fmin,
mel_fmax,
):
self.max_wav_value = max_wav_value
self.use_mel_spec_posterior = use_mel_spec_posterior
self.filter_length = filter_length
self.n_mel_channels = n_mel_channels
self.sampling_rate = sampling_rate
self.hop_length = hop_length
self.win_length = win_length
self.mel_fmin = mel_fmin
self.mel_fmax = mel_fmax
def process_audio(self, filename):
audio, sampling_rate = load_wav_to_torch(filename)
audio_norm = audio / self.max_wav_value
audio_norm = audio_norm.unsqueeze(0)
spec_filename = filename.replace(".wav", ".spec.pt")
if self.use_mel_spec_posterior:
spec_filename = spec_filename.replace(".spec.pt", ".mel.pt")
try:
spec = torch.load(spec_filename)
except:
if self.use_mel_spec_posterior:
spec = mel_spectrogram_torch(
audio_norm,
self.filter_length,
self.n_mel_channels,
self.sampling_rate,
self.hop_length,
self.win_length,
self.mel_fmin,
self.mel_fmax,
center=False,
)
else:
spec = spectrogram_torch(
audio_norm,
self.filter_length,
self.sampling_rate,
self.hop_length,
self.win_length,
center=False,
)
spec = torch.squeeze(spec, 0)
torch.save(spec, spec_filename)
return spec, audio_norm
# 使用示例
processor = AudioProcessor(
max_wav_value=32768.0,
use_mel_spec_posterior=False,
filter_length=2048,
n_mel_channels=128,
sampling_rate=44100,
hop_length=512,
win_length=2048,
mel_fmin=0.0,
mel_fmax="null",
)
with open("filelists/train.list", "r") as f:
filepaths = [line.split("|")[0] for line in f] # 取每一行的第一部分作为audiopath
# 使用多进程处理
with Pool(processes=32) as pool: # 使用4个进程
with tqdm(total=len(filepaths)) as pbar:
for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)):
pbar.update()