-
Notifications
You must be signed in to change notification settings - Fork 1
/
preprocess.py
174 lines (147 loc) · 7.48 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import argparse
import os
from tqdm import tqdm
from datasets import libri_tts, selvas_multi_lbl,selvas_multispeaker_pron, public_korean_pron, check_file_integrity, generate_mel_f0, f0_mean, emotion_grapheme
from configs.korean_200113 import create_hparams
hparams = create_hparams()
# WARN: Do not use this without adding trim
# def preprocess_libri_tts(args):
# libri_tts.build_from_path(args.num_workers, tqdm=tqdm)
# WARN: Do not use this without adding trim and supporting lbl phoneme sets
# def preprocess_selvas_multi(args):
# in_dir = '/past_projects/DB/selvasai/selvasai_organized'
# out_dir = 'filelists'
# selvas_multi_lbl.build_from_path(in_dir, out_dir, args.num_workers, tqdm=tqdm)
def preprocess_selvas_emotion_grapheme(args):
in_dir = '/mnt/sdd1/selvas_new_emotion'
out_dir = 'filelists'
filelists_name = [
'emotion_train_grapheme.txt',
'emotion_valid_grapheme.txt',
'emotion_test_grapheme.txt'
]
emotion_grapheme.build_from_path(in_dir, out_dir, filelists_name, 4, args.num_workers, tqdm=tqdm)
def preprocess_selvas_multispeaker_pron(args):
# in_dir = '/past_projects/DB/selvasai/selvasai_organized'
in_dir = '/mnt/sdd1/leftout_males'
# in_dir = '/mnt/sdd1/selvas_emotion'
out_dir = 'filelists'
# in order of train-valid-text
filelists_name = [
'train_file_list_pron_sub.txt',
'valid_file_list_pron_sub.txt',
'test_file_list_pron_sub.txt'
]
selvas_multispeaker_pron.build_from_path(in_dir, out_dir, filelists_name, 4, args.num_workers, tqdm=tqdm)
# TODO: lang code is written in this procedure. Langcode==1 for korean-only case is hard-coded for now.
# TODO: This must be fixed to support english and other languages as well.
def _integrate(train_file_lists, target_train_file_list):
sources = [[] for i in range(len(train_file_lists))]
i = 0
for file_list in train_file_lists:
with open(file_list, 'r', encoding='utf-8-sig') as f:
sources[i] = f.readlines()
i += 1
# integrate meta file
lang_code = 1
with open(target_train_file_list, 'w', encoding='utf-8-sig') as f:
for i in range(len(sources)):
for j in range(len(sources[i])):
sources[i][j] = sources[i][j].rstrip() + '|{}\n'.format(str(lang_code)) # add language code
for i in range(1, len(sources)):
sources[0] += sources[i]
# shuffle or not
f.writelines(sources[0])
def preprocess_public_korean_pron(args):
# in_dir = '/mnt/sdd1/korean_public'
in_dir = '/mnt/sdd1/leftout_korean_old_male'
out_dir = 'filelists'
filelists_name = [
'train_korean_pron.txt',
'valid_korean_pron.txt',
'test_korean_pron.txt'
]
public_korean_pron.build_from_path(in_dir, out_dir, filelists_name, args.num_workers, tqdm=tqdm)
# This better not be done multithread because meta file is going to be locked and it will be inefficient.
def integrate_dataset(args):
# train_file_lists = [
# 'filelists/libritts_train_clean_100_audiopath_text_sid_shorterthan10s_atleast5min_train_filelist.txt',
# 'filelists/train_file_list.txt']
# eval_file_lists = [
# '/home/administrator/projects/mellotron/filelists/libritts_train_clean_100_audiopath_text_sid_atleast5min_val_filelist.txt',
# '/home/administrator/projects/mellotron/filelists/valid_file_list.txt']
# test_file_lists = [
# 'filelists/libritts_train_clean_100_audiopath_text_sid_shorterthan10s_atleast5min_test_filelist.txt',
# 'filelists/test_file_list.txt']
#
# target_train_file_list = 'filelists/libritts_selvas_multi_train.txt'
# target_eval_file_list = 'filelists/libritts_selvas_multi_eval.txt'
# target_test_file_list = 'filelists/libritts_selvas_multi_test.txt'
train_file_lists = ['/home/administrator/projects/mellotron/filelists/train_file_list_pron.txt',
'/home/administrator/projects/mellotron/filelists/public_korean_train_file_list_pron.txt'
]
eval_file_lists = ['/home/administrator/projects/mellotron/filelists/valid_file_list_pron.txt',
'/home/administrator/projects/mellotron/filelists/public_korean_valid_file_list_pron.txt'
]
test_file_lists = ['/home/administrator/projects/mellotron/filelists/test_file_list_pron.txt',
'/home/administrator/projects/mellotron/filelists/public_korean_test_file_list_pron.txt'
]
target_train_file_list = 'filelists/merge_korean_pron_train.txt'
target_eval_file_list = 'filelists/merge_korean_pron_valid.txt'
target_test_file_list = 'filelists/merge_korean_pron_test.txt'
# merge train lists
_integrate(train_file_lists, target_train_file_list)
# merge eval lists
_integrate(eval_file_lists, target_eval_file_list)
# merge test lists
_integrate(test_file_lists, target_test_file_list)
print('Dataset integration has been complete')
# Try opening files on the filelist and write down the files with io error.
def check_for_file_integrity(args):
lists = ['filelists/merge_korean_pron_train.txt', 'filelists/merge_korean_pron_valid.txt', 'filelists/merge_korean_pron_test.txt']
check_file_integrity.check_paths(lists, tqdm=tqdm)
def gen_mel_f0(args):
lists = ['filelists/merge_korean_pron_train.txt', 'filelists/merge_korean_pron_valid.txt', 'filelists/merge_korean_pron_test.txt']
generate_mel_f0.build_from_path(lists, hparams, tqdm=tqdm)
def preprocess_cal_f0_scale_per_training_speaker(args):
# root = '/mnt/sdd1/selvas_emotion'
# root = '/mnt/sdd1/leftout_males'
# root = '/mnt/sdd1/leftout_korean_old_male/wav_22050'
root = '/mnt/sdd1/korean_public/wav_22050'
f0_mean.build_from_path(root, hparams, tqdm=tqdm)
def main():
parser = argparse.ArgumentParser()
# parser.add_argument('--base_dir', default=os.path.expanduser('/past_projects/DB'))
# parser.add_argument('--output', default='sitec')
parser.add_argument('--dataset', required=True,
choices=['preprocess_selvas_emotion_grapheme', 'blizzard', 'ljspeech', 'sitec', 'sitec_short', 'selvas_multi', 'libri_tts', 'selvas_multispeaker_pron',
'integrate_dataset', 'public_korean_pron', 'check_file_integrity', 'generate_mel_f0', 'cal_f0_scale_per_training_speaker'])
parser.add_argument('--hparams', default='',
help='Hyperparameter overrides as a comma-separated list of name=value pairs')
parser.add_argument('--num_workers', type=int, default=12)
args = parser.parse_args()
hparams = create_hparams(args.hparams)
if args.dataset == 'libri_tts':
assert(True)
print("Not implemented")
# preprocess_libri_tts(args)
elif args.dataset == 'selvas_multi':
assert(True)
print("Not implemented")
# preprocess_selvas_multi(args)
elif args.dataset == 'integrate_dataset':
integrate_dataset(args)
elif args.dataset == 'selvas_multispeaker_pron':
preprocess_selvas_multispeaker_pron(args)
elif args.dataset == 'public_korean_pron':
preprocess_public_korean_pron(args)
elif args.dataset == 'check_file_integrity':
check_for_file_integrity(args)
elif args.dataset == 'generate_mel_f0':
gen_mel_f0(args)
elif args.dataset == 'cal_f0_scale_per_training_speaker':
preprocess_cal_f0_scale_per_training_speaker(args)
elif args.dataset == 'preprocess_selvas_emotion_grapheme':
preprocess_selvas_emotion_grapheme(args)
if __name__ == "__main__":
main()