-
Notifications
You must be signed in to change notification settings - Fork 79
/
svc_gui.py
250 lines (203 loc) · 9.7 KB
/
svc_gui.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/usr/bin/env python3
"""
This module provides a graphical user interface for the SVC process.
The SVC process is a voice conversion system that uses a generative adversarial network (GAN) to convert the voice of a speaker to another speaker.
The module requires the following files and directories:
- whisper/inference.py: a script that extracts the content encoding from a wav file
- pitch/inference.py: a script that extracts the F0 parameter from a wav file
- svc_inference.py: a script that performs the voice conversion using a GAN model
- configs/maxgan.yaml: a configuration file for the GAN model
- maxgan_g.pth: a pretrained GAN model file
- lora-svc/data_svc/singer: a directory that contains the spk files for different singers
The module allows the user to select a wav file and a spk file, and run the SVC process to generate an output wav file with the converted voice.
"""
# import tkinter module and other modules
import tkinter as tk
from tkinter import filedialog
import os
import subprocess
# import sys and platform modules
import sys
import platform
# import logging module and configure logging level and format
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
# import playsound module
from playsound import playsound
# define global variables for working directory, LD_LIBRARY_PATH, model file and checkpoint file
WORKING_DIR = os.getcwd()
LD_LIBRARY_PATH = "/usr/lib/wsl/lib:" + os.environ.get("LD_LIBRARY_PATH", "")
MODEL_FILE = "maxgan_g.pth"
CHECKPOINT_FILE = ""
# define a function that converts a file to wav format using ffmpeg
def convert_to_wav(file):
"""
Converts a file to wav format using ffmpeg.
Parameters:
file (str): the path of the input file
Returns:
str: the path of the output wav file
"""
# get the file name and extension
file_name = os.path.basename(file)
file_ext = os.path.splitext(file_name)[1]
# check if the file extension is wav
if file_ext == ".wav":
# no need to convert, just return the original file name
return file_name
else:
# use ffmpeg to convert the file to wav format with -y option
new_file_name = os.path.splitext(file_name)[0] + ".wav"
new_file_path = os.path.join(WORKING_DIR, new_file_name)
subprocess.run(["ffmpeg", "-y", "-i", file, "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000", new_file_path])
return new_file_path
# define a function that finds the highest numbered checkpoint file in a given directory
def find_highest_checkpoint(dir_name):
"""
Finds the highest numbered checkpoint file in a given directory.
Parameters:
dir_name (str): the path of the directory
Returns:
str: the path of the highest checkpoint file
"""
# initialize the highest number and file name variables
highest_number = 0
highest_file = ""
# loop through all the files in the directory with pt extension using itertools
for file in itertools.filterfalse(lambda x: not x.endswith(".pt"), os.listdir(dir_name)):
# extract the number from the file name
number = int(os.path.splitext(file)[0].split("_")[-1])
# compare the number with the highest number so far
if number > highest_number:
# update the highest number and file name variables
highest_number = number
highest_file = os.path.join(dir_name, file)
# return the highest file name
return highest_file
# define a function that takes a wav file and a spk file as arguments and runs all the commands
def run_all(wav_file, spk_file):
"""
Takes a wav file and a spk file as arguments and runs all the commands for the SVC process.
Parameters:
wav_file (str): the path of the input wav file
spk_file (str): the path of the input spk file
Returns:
None
"""
try:
# set working directory and LD_LIBRARY_PATH variables using global variables defined earlier
os.environ["PYTHONPATH"] = WORKING_DIR
os.environ["LD_LIBRARY_PATH"] = LD_LIBRARY_PATH
# use whisper to extract content encoding, without using one-click reasoning, in order to reduce GPU memory usage
subprocess.run(["python3", "whisper/inference.py", "-w", wav_file, "-p", wav_file + ".ppg.npy"])
# extract the F0 parameter to the csv text format, open the csv file in Excel, and manually modify the wrong F0 according to Audition or SonicVisualiser
subprocess.run(["python3", "pitch/inference.py", "-w", wav_file, "-p", wav_file + ".csv"])
# specify parameters and infer using the maxgan_g.pth model and checkpoint file names defined earlier as global variables
subprocess.run(["python3", "svc_inference.py", "--config", "configs/maxgan.yaml", "--model", MODEL_FILE, "--spk", spk_file, "--wave", wav_file, "--ppg", wav_file + ".ppg.npy", "--pit", wav_file + ".csv"])
# rename the output file to include the input file name and spk file name
input_file_name = os.path.splitext(os.path.basename(wav_file))[0]
spk_file_name = os.path.splitext(os.path.basename(spk_file))[0]
output_file = f"svc_{input_file_name}_out_{spk_file_name}.wav"
output_path = os.path.join(WORKING_DIR, output_file)
os.rename(os.path.join(WORKING_DIR, "svc_out.wav"), output_path)
# log the successful completion of the process
logging.info(f"SVC process completed for {wav_file} and {spk_file}. Output file: {output_path}")
# update the global variable for output file name
global out_file
out_file = output_path
# enable the play button widget
play_button.config(state="normal")
except Exception as e:
# log the exception and display an error message
logging.error(f"SVC process failed for {wav_file} and {spk_file}. Exception: {e}")
tk.messagebox.showerror("SVC Error", f"An error occurred while running the SVC process. Please check the log for details.")
# define a function that plays the output file using playsound
def play_output():
"""
Plays the output file using playsound.
Parameters:
None
Returns:
None
"""
try:
# check if there is an output file name
if out_file:
# play the output file using playsound
playsound(out_file)
# log the successful playback of the output file
logging.info(f"Played output file: {out_file}")
else:
# display an error message if there is no output file name
tk.messagebox.showerror("SVC Error", f"No output file to play.")
except Exception as e:
# log the exception and display an error message
logging.error(f"Failed to play output file: {out_file}. Exception: {e}")
tk.messagebox.showerror("SVC Error", f"An error occurred while playing the output file. Please check the log for details.")
# create a tkinter window object
window = tk.Tk()
# set window title and size
window.title("SVC GUI")
window.geometry("600x400")
# create a label widget to display instructions
label = tk.Label(window, text="Select any file and a spk file to run the SVC process", font=("Arial", 16))
label.pack()
# create a button widget to select any file
wav_button = tk.Button(window, text="Select any file", font=("Arial", 14), command=lambda: select_file("wav"))
wav_button.pack()
# create a button widget to select a spk file
spk_button = tk.Button(window, text="Select spk file", font=("Arial", 14), command=lambda: select_file("spk"))
spk_button.pack()
# create a button widget to run the SVC process
run_button = tk.Button(window, text="Run SVC", font=("Arial", 14), command=lambda: run_all(wav_file, spk_file))
run_button.pack()
# create a button widget to play the output file
play_button = tk.Button(window, text="Play output file", font=("Arial", 14), command=play_output)
play_button.pack()
# disable the play button widget initially
play_button.config(state="disabled")
# create a label widget to display the status of the process
status_label = tk.Label(window, text="", font=("Arial", 14))
status_label.pack()
# define global variables for wav file and spk file names
wav_file = ""
spk_file = ""
# define a global variable for output file name
out_file = ""
# define a function that selects a file using file dialog and updates the global variables and status label
def select_file(file_type):
"""
Selects a file using file dialog and updates the global variables and status label.
Parameters:
file_type (str): the type of the file to select ("wav" or "spk")
Returns:
None
"""
global wav_file
global spk_file, status_label
# use file dialog to select a file
# specify the initial directory according to the file type
if file_type == "wav":
initial_dir = WORKING_DIR
elif file_type == "spk":
# use os.path to construct the path to the data_svc/singer directory
initial_dir = os.path.join(WORKING_DIR, "data_svc", "singer")
else:
initial_dir = WORKING_DIR
file = filedialog.askopenfilename(initialdir=initial_dir, title=f"Select {file_type} file")
# check if the file is valid
if file:
# update the global variables and status label according to the file type
if file_type == "wav":
wav_file = convert_to_wav(file)
status_label.config(text=f"Wav file selected: {wav_file}")
elif file_type == "spk":
spk_file = file
status_label.config(text=f"Spk file selected: {spk_file}")
else:
status_label.config(text=f"Invalid file type: {file_type}")
else:
# no file selected, update the status label accordingly
status_label.config(text=f"No {file_type} file selected")
# start the main loop of the window
window.mainloop()