mirror of
https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git
synced 2025-01-08 12:08:16 +08:00
chore(format): run black on main
This commit is contained in:
parent
8790ab69e0
commit
005f097fec
156
infer-web.py
156
infer-web.py
@ -92,7 +92,9 @@ if torch.cuda.is_available() or ngpu != 0:
|
||||
"90",
|
||||
"M4",
|
||||
"T4",
|
||||
"TITAN","4060", "L",
|
||||
"TITAN",
|
||||
"4060",
|
||||
"L",
|
||||
"6000",
|
||||
]
|
||||
):
|
||||
@ -405,12 +407,16 @@ def get_pretrained_models(path_str, f0_str, sr2):
|
||||
sr2,
|
||||
)
|
||||
return (
|
||||
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
|
||||
if if_pretrained_generator_exist
|
||||
else "",
|
||||
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
|
||||
if if_pretrained_discriminator_exist
|
||||
else "",
|
||||
(
|
||||
"assets/pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)
|
||||
if if_pretrained_generator_exist
|
||||
else ""
|
||||
),
|
||||
(
|
||||
"assets/pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)
|
||||
if if_pretrained_discriminator_exist
|
||||
else ""
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@ -735,7 +741,9 @@ def train1key(
|
||||
if_save_every_weights18,
|
||||
version19,
|
||||
)
|
||||
yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
|
||||
yield get_info_str(
|
||||
i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")
|
||||
)
|
||||
|
||||
# step3b:训练索引
|
||||
[get_info_str(_) for _ in train_index(exp_dir1, version19)]
|
||||
@ -782,7 +790,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
with gr.Row():
|
||||
sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names))
|
||||
with gr.Column():
|
||||
refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary")
|
||||
refresh_button = gr.Button(
|
||||
i18n("刷新音色列表和索引路径"), variant="primary"
|
||||
)
|
||||
clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
|
||||
spk_item = gr.Slider(
|
||||
minimum=0,
|
||||
@ -801,14 +811,19 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
vc_transform0 = gr.Number(
|
||||
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
|
||||
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
|
||||
value=0,
|
||||
)
|
||||
input_audio0 = gr.Textbox(
|
||||
label=i18n("输入待处理音频文件路径(默认是正确格式示例)"),
|
||||
label=i18n(
|
||||
"输入待处理音频文件路径(默认是正确格式示例)"
|
||||
),
|
||||
placeholder="C:\\Users\\Desktop\\audio_example.wav",
|
||||
)
|
||||
file_index1 = gr.Textbox(
|
||||
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
|
||||
label=i18n(
|
||||
"特征检索库文件路径,为空则使用下拉的选择结果"
|
||||
),
|
||||
placeholder="C:\\Users\\Desktop\\model_example.index",
|
||||
interactive=True,
|
||||
)
|
||||
@ -821,9 +836,11 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
label=i18n(
|
||||
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
|
||||
),
|
||||
choices=["pm", "harvest", "crepe", "rmvpe"]
|
||||
if config.dml == False
|
||||
else ["pm", "harvest", "rmvpe"],
|
||||
choices=(
|
||||
["pm", "harvest", "crepe", "rmvpe"]
|
||||
if config.dml == False
|
||||
else ["pm", "harvest", "rmvpe"]
|
||||
),
|
||||
value="rmvpe",
|
||||
interactive=True,
|
||||
)
|
||||
@ -840,7 +857,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
rms_mix_rate0 = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=1,
|
||||
label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
|
||||
label=i18n(
|
||||
"输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"
|
||||
),
|
||||
value=0.25,
|
||||
interactive=True,
|
||||
)
|
||||
@ -872,7 +891,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
interactive=True,
|
||||
)
|
||||
f0_file = gr.File(
|
||||
label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"),
|
||||
label=i18n(
|
||||
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
|
||||
),
|
||||
visible=False,
|
||||
)
|
||||
|
||||
@ -892,7 +913,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
but0 = gr.Button(i18n("转换"), variant="primary")
|
||||
with gr.Row():
|
||||
vc_output1 = gr.Textbox(label=i18n("输出信息"))
|
||||
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
|
||||
vc_output2 = gr.Audio(
|
||||
label=i18n("输出音频(右下角三个点,点了可以下载)")
|
||||
)
|
||||
|
||||
but0.click(
|
||||
vc.vc_single,
|
||||
@ -916,14 +939,19 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
)
|
||||
with gr.TabItem(i18n("批量推理")):
|
||||
gr.Markdown(
|
||||
value=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ")
|
||||
value=i18n(
|
||||
"批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. "
|
||||
)
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
vc_transform1 = gr.Number(
|
||||
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
|
||||
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"),
|
||||
value=0,
|
||||
)
|
||||
opt_input = gr.Textbox(
|
||||
label=i18n("指定输出文件夹"), value="opt"
|
||||
)
|
||||
opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
|
||||
file_index3 = gr.Textbox(
|
||||
label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
|
||||
value="",
|
||||
@ -938,9 +966,11 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
label=i18n(
|
||||
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU"
|
||||
),
|
||||
choices=["pm", "harvest", "crepe", "rmvpe"]
|
||||
if config.dml == False
|
||||
else ["pm", "harvest", "rmvpe"],
|
||||
choices=(
|
||||
["pm", "harvest", "crepe", "rmvpe"]
|
||||
if config.dml == False
|
||||
else ["pm", "harvest", "rmvpe"]
|
||||
),
|
||||
value="rmvpe",
|
||||
interactive=True,
|
||||
)
|
||||
@ -975,7 +1005,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
rms_mix_rate1 = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=1,
|
||||
label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
|
||||
label=i18n(
|
||||
"输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"
|
||||
),
|
||||
value=1,
|
||||
interactive=True,
|
||||
)
|
||||
@ -992,7 +1024,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
filter_radius1 = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=7,
|
||||
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
|
||||
label=i18n(
|
||||
">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"
|
||||
),
|
||||
value=3,
|
||||
step=1,
|
||||
interactive=True,
|
||||
@ -1006,11 +1040,14 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
)
|
||||
with gr.Row():
|
||||
dir_input = gr.Textbox(
|
||||
label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
|
||||
label=i18n(
|
||||
"输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"
|
||||
),
|
||||
placeholder="C:\\Users\\Desktop\\input_vocal_dir",
|
||||
)
|
||||
inputs = gr.File(
|
||||
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
|
||||
file_count="multiple",
|
||||
label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
@ -1059,10 +1096,13 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
placeholder="C:\\Users\\Desktop\\todo-songs",
|
||||
)
|
||||
wav_inputs = gr.File(
|
||||
file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
|
||||
file_count="multiple",
|
||||
label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹"),
|
||||
)
|
||||
with gr.Column():
|
||||
model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names)
|
||||
model_choose = gr.Dropdown(
|
||||
label=i18n("模型"), choices=uvr5_names
|
||||
)
|
||||
agg = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=20,
|
||||
@ -1143,7 +1183,8 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
)
|
||||
with gr.Row():
|
||||
trainset_dir4 = gr.Textbox(
|
||||
label=i18n("输入训练文件夹路径"), value=i18n("E:\\语音音频+标注\\米津玄师\\src")
|
||||
label=i18n("输入训练文件夹路径"),
|
||||
value=i18n("E:\\语音音频+标注\\米津玄师\\src"),
|
||||
)
|
||||
spk_id5 = gr.Slider(
|
||||
minimum=0,
|
||||
@ -1162,11 +1203,17 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
api_name="train_preprocess",
|
||||
)
|
||||
with gr.Group():
|
||||
gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"))
|
||||
gr.Markdown(
|
||||
value=i18n(
|
||||
"step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)"
|
||||
)
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
gpus6 = gr.Textbox(
|
||||
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
|
||||
label=i18n(
|
||||
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"
|
||||
),
|
||||
value=gpus,
|
||||
interactive=True,
|
||||
visible=F0GPUVisible,
|
||||
@ -1254,7 +1301,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
interactive=True,
|
||||
)
|
||||
if_save_every_weights18 = gr.Radio(
|
||||
label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"),
|
||||
label=i18n(
|
||||
"是否在每次保存时间点将最终小模型保存至weights文件夹"
|
||||
),
|
||||
choices=[i18n("是"), i18n("否")],
|
||||
value=i18n("否"),
|
||||
interactive=True,
|
||||
@ -1286,7 +1335,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
[f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15],
|
||||
)
|
||||
gpus16 = gr.Textbox(
|
||||
label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
|
||||
label=i18n(
|
||||
"以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"
|
||||
),
|
||||
value=gpus,
|
||||
interactive=True,
|
||||
)
|
||||
@ -1346,8 +1397,12 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
with gr.Group():
|
||||
gr.Markdown(value=i18n("模型融合, 可用于测试音色融合"))
|
||||
with gr.Row():
|
||||
ckpt_a = gr.Textbox(label=i18n("A模型路径"), value="", interactive=True)
|
||||
ckpt_b = gr.Textbox(label=i18n("B模型路径"), value="", interactive=True)
|
||||
ckpt_a = gr.Textbox(
|
||||
label=i18n("A模型路径"), value="", interactive=True
|
||||
)
|
||||
ckpt_b = gr.Textbox(
|
||||
label=i18n("B模型路径"), value="", interactive=True
|
||||
)
|
||||
alpha_a = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=1,
|
||||
@ -1369,7 +1424,10 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
interactive=True,
|
||||
)
|
||||
info__ = gr.Textbox(
|
||||
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
|
||||
label=i18n("要置入的模型信息"),
|
||||
value="",
|
||||
max_lines=8,
|
||||
interactive=True,
|
||||
)
|
||||
name_to_save0 = gr.Textbox(
|
||||
label=i18n("保存的模型名不带后缀"),
|
||||
@ -1402,13 +1460,18 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
api_name="ckpt_merge",
|
||||
) # def merge(path1,path2,alpha1,sr,f0,info):
|
||||
with gr.Group():
|
||||
gr.Markdown(value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)"))
|
||||
gr.Markdown(
|
||||
value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")
|
||||
)
|
||||
with gr.Row():
|
||||
ckpt_path0 = gr.Textbox(
|
||||
label=i18n("模型路径"), value="", interactive=True
|
||||
)
|
||||
info_ = gr.Textbox(
|
||||
label=i18n("要改的模型信息"), value="", max_lines=8, interactive=True
|
||||
label=i18n("要改的模型信息"),
|
||||
value="",
|
||||
max_lines=8,
|
||||
interactive=True,
|
||||
)
|
||||
name_to_save1 = gr.Textbox(
|
||||
label=i18n("保存的文件名, 默认空为和源文件同名"),
|
||||
@ -1426,7 +1489,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
api_name="ckpt_modify",
|
||||
)
|
||||
with gr.Group():
|
||||
gr.Markdown(value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)"))
|
||||
gr.Markdown(
|
||||
value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")
|
||||
)
|
||||
with gr.Row():
|
||||
ckpt_path1 = gr.Textbox(
|
||||
label=i18n("模型路径"), value="", interactive=True
|
||||
@ -1468,7 +1533,10 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
interactive=True,
|
||||
)
|
||||
info___ = gr.Textbox(
|
||||
label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True
|
||||
label=i18n("要置入的模型信息"),
|
||||
value="",
|
||||
max_lines=8,
|
||||
interactive=True,
|
||||
)
|
||||
but9 = gr.Button(i18n("提取"), variant="primary")
|
||||
info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8)
|
||||
@ -1484,7 +1552,9 @@ with gr.Blocks(title="RVC WebUI") as app:
|
||||
|
||||
with gr.TabItem(i18n("Onnx导出")):
|
||||
with gr.Row():
|
||||
ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True)
|
||||
ckpt_dir = gr.Textbox(
|
||||
label=i18n("RVC模型路径"), value="", interactive=True
|
||||
)
|
||||
with gr.Row():
|
||||
onnx_dir = gr.Textbox(
|
||||
label=i18n("Onnx输出路径"), value="", interactive=True
|
||||
|
@ -1,4 +1,4 @@
|
||||
import platform,os
|
||||
import platform, os
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
import av
|
||||
@ -46,6 +46,6 @@ def load_audio(file, sr):
|
||||
|
||||
|
||||
def clean_path(path_str):
|
||||
if platform.system() == 'Windows':
|
||||
path_str = path_str.replace('/', '\\')
|
||||
if platform.system() == "Windows":
|
||||
path_str = path_str.replace("/", "\\")
|
||||
return path_str.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
||||
|
@ -400,13 +400,17 @@ class SineGen(torch.nn.Module):
|
||||
f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (
|
||||
idx + 2
|
||||
) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic
|
||||
rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化
|
||||
rad_values = (
|
||||
f0_buf / self.sampling_rate
|
||||
) % 1 ###%1意味着n_har的乘积无法后处理优化
|
||||
rand_ini = torch.rand(
|
||||
f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
|
||||
)
|
||||
rand_ini[:, 0] = 0
|
||||
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
|
||||
tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化
|
||||
tmp_over_one = torch.cumsum(
|
||||
rad_values, 1
|
||||
) # % 1 #####%1意味着后面的cumsum无法再优化
|
||||
tmp_over_one *= upp
|
||||
tmp_over_one = F.interpolate(
|
||||
tmp_over_one.transpose(2, 1),
|
||||
|
@ -333,13 +333,17 @@ class SineGen(torch.nn.Module):
|
||||
f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * (
|
||||
idx + 2
|
||||
) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic
|
||||
rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化
|
||||
rad_values = (
|
||||
f0_buf / self.sampling_rate
|
||||
) % 1 ###%1意味着n_har的乘积无法后处理优化
|
||||
rand_ini = torch.rand(
|
||||
f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device
|
||||
)
|
||||
rand_ini[:, 0] = 0
|
||||
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
|
||||
tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化
|
||||
tmp_over_one = torch.cumsum(
|
||||
rad_values, 1
|
||||
) # % 1 #####%1意味着后面的cumsum无法再优化
|
||||
tmp_over_one *= upp
|
||||
tmp_over_one = F.interpolate(
|
||||
tmp_over_one.transpose(2, 1),
|
||||
|
@ -62,12 +62,12 @@ def torch_bmm(input, mat2, *, out=None):
|
||||
): # pylint: disable=invalid-name
|
||||
start_idx_2 = i2 * split_2_slice_size
|
||||
end_idx_2 = (i2 + 1) * split_2_slice_size
|
||||
hidden_states[
|
||||
start_idx:end_idx, start_idx_2:end_idx_2
|
||||
] = original_torch_bmm(
|
||||
input[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
mat2[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
out=out,
|
||||
hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = (
|
||||
original_torch_bmm(
|
||||
input[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
mat2[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
out=out,
|
||||
)
|
||||
)
|
||||
else:
|
||||
hidden_states[start_idx:end_idx] = original_torch_bmm(
|
||||
@ -138,61 +138,67 @@ def scaled_dot_product_attention(
|
||||
start_idx_2 = i2 * split_2_slice_size
|
||||
end_idx_2 = (i2 + 1) * split_2_slice_size
|
||||
if no_shape_one:
|
||||
hidden_states[
|
||||
start_idx:end_idx, start_idx_2:end_idx_2
|
||||
] = original_scaled_dot_product_attention(
|
||||
query[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
key[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
value[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
attn_mask=attn_mask[
|
||||
start_idx:end_idx, start_idx_2:end_idx_2
|
||||
]
|
||||
if attn_mask is not None
|
||||
else attn_mask,
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
hidden_states[start_idx:end_idx, start_idx_2:end_idx_2] = (
|
||||
original_scaled_dot_product_attention(
|
||||
query[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
key[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
value[start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
attn_mask=(
|
||||
attn_mask[start_idx:end_idx, start_idx_2:end_idx_2]
|
||||
if attn_mask is not None
|
||||
else attn_mask
|
||||
),
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
)
|
||||
)
|
||||
else:
|
||||
hidden_states[
|
||||
:, start_idx:end_idx, start_idx_2:end_idx_2
|
||||
] = original_scaled_dot_product_attention(
|
||||
query[:, start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
key[:, start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
value[:, start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
attn_mask=attn_mask[
|
||||
:, start_idx:end_idx, start_idx_2:end_idx_2
|
||||
]
|
||||
if attn_mask is not None
|
||||
else attn_mask,
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
hidden_states[:, start_idx:end_idx, start_idx_2:end_idx_2] = (
|
||||
original_scaled_dot_product_attention(
|
||||
query[:, start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
key[:, start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
value[:, start_idx:end_idx, start_idx_2:end_idx_2],
|
||||
attn_mask=(
|
||||
attn_mask[
|
||||
:, start_idx:end_idx, start_idx_2:end_idx_2
|
||||
]
|
||||
if attn_mask is not None
|
||||
else attn_mask
|
||||
),
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
)
|
||||
)
|
||||
else:
|
||||
if no_shape_one:
|
||||
hidden_states[
|
||||
start_idx:end_idx
|
||||
] = original_scaled_dot_product_attention(
|
||||
query[start_idx:end_idx],
|
||||
key[start_idx:end_idx],
|
||||
value[start_idx:end_idx],
|
||||
attn_mask=attn_mask[start_idx:end_idx]
|
||||
if attn_mask is not None
|
||||
else attn_mask,
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
hidden_states[start_idx:end_idx] = (
|
||||
original_scaled_dot_product_attention(
|
||||
query[start_idx:end_idx],
|
||||
key[start_idx:end_idx],
|
||||
value[start_idx:end_idx],
|
||||
attn_mask=(
|
||||
attn_mask[start_idx:end_idx]
|
||||
if attn_mask is not None
|
||||
else attn_mask
|
||||
),
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
)
|
||||
)
|
||||
else:
|
||||
hidden_states[
|
||||
:, start_idx:end_idx
|
||||
] = original_scaled_dot_product_attention(
|
||||
query[:, start_idx:end_idx],
|
||||
key[:, start_idx:end_idx],
|
||||
value[:, start_idx:end_idx],
|
||||
attn_mask=attn_mask[:, start_idx:end_idx]
|
||||
if attn_mask is not None
|
||||
else attn_mask,
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
hidden_states[:, start_idx:end_idx] = (
|
||||
original_scaled_dot_product_attention(
|
||||
query[:, start_idx:end_idx],
|
||||
key[:, start_idx:end_idx],
|
||||
value[:, start_idx:end_idx],
|
||||
attn_mask=(
|
||||
attn_mask[:, start_idx:end_idx]
|
||||
if attn_mask is not None
|
||||
else attn_mask
|
||||
),
|
||||
dropout_p=dropout_p,
|
||||
is_causal=is_causal,
|
||||
)
|
||||
)
|
||||
else:
|
||||
return original_scaled_dot_product_attention(
|
||||
|
@ -104,11 +104,11 @@ def return_xpu(device):
|
||||
return (
|
||||
f"xpu:{device[-1]}"
|
||||
if isinstance(device, str) and ":" in device
|
||||
else f"xpu:{device}"
|
||||
if isinstance(device, int)
|
||||
else torch.device("xpu")
|
||||
if isinstance(device, torch.device)
|
||||
else "xpu"
|
||||
else (
|
||||
f"xpu:{device}"
|
||||
if isinstance(device, int)
|
||||
else torch.device("xpu") if isinstance(device, torch.device) else "xpu"
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -271,12 +271,16 @@ def ipex_hijacks():
|
||||
"torch.batch_norm",
|
||||
lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
|
||||
input,
|
||||
weight
|
||||
if weight is not None
|
||||
else torch.ones(input.size()[1], device=input.device),
|
||||
bias
|
||||
if bias is not None
|
||||
else torch.zeros(input.size()[1], device=input.device),
|
||||
(
|
||||
weight
|
||||
if weight is not None
|
||||
else torch.ones(input.size()[1], device=input.device)
|
||||
),
|
||||
(
|
||||
bias
|
||||
if bias is not None
|
||||
else torch.zeros(input.size()[1], device=input.device)
|
||||
),
|
||||
*args,
|
||||
**kwargs,
|
||||
),
|
||||
@ -286,12 +290,16 @@ def ipex_hijacks():
|
||||
"torch.instance_norm",
|
||||
lambda orig_func, input, weight, bias, *args, **kwargs: orig_func(
|
||||
input,
|
||||
weight
|
||||
if weight is not None
|
||||
else torch.ones(input.size()[1], device=input.device),
|
||||
bias
|
||||
if bias is not None
|
||||
else torch.zeros(input.size()[1], device=input.device),
|
||||
(
|
||||
weight
|
||||
if weight is not None
|
||||
else torch.ones(input.size()[1], device=input.device)
|
||||
),
|
||||
(
|
||||
bias
|
||||
if bias is not None
|
||||
else torch.zeros(input.size()[1], device=input.device)
|
||||
),
|
||||
*args,
|
||||
**kwargs,
|
||||
),
|
||||
|
@ -113,9 +113,11 @@ else:
|
||||
feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
|
||||
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
|
||||
inputs = {
|
||||
"source": feats.half().to(device)
|
||||
if device not in ["mps", "cpu"]
|
||||
else feats.to(device),
|
||||
"source": (
|
||||
feats.half().to(device)
|
||||
if device not in ["mps", "cpu"]
|
||||
else feats.to(device)
|
||||
),
|
||||
"padding_mask": padding_mask.to(device),
|
||||
"output_layer": 9 if version == "v1" else 12, # layer 9
|
||||
}
|
||||
|
@ -38,26 +38,28 @@ class VC:
|
||||
|
||||
to_return_protect0 = {
|
||||
"visible": self.if_f0 != 0,
|
||||
"value": to_return_protect[0]
|
||||
if self.if_f0 != 0 and to_return_protect
|
||||
else 0.5,
|
||||
"value": (
|
||||
to_return_protect[0] if self.if_f0 != 0 and to_return_protect else 0.5
|
||||
),
|
||||
"__type__": "update",
|
||||
}
|
||||
to_return_protect1 = {
|
||||
"visible": self.if_f0 != 0,
|
||||
"value": to_return_protect[1]
|
||||
if self.if_f0 != 0 and to_return_protect
|
||||
else 0.33,
|
||||
"value": (
|
||||
to_return_protect[1] if self.if_f0 != 0 and to_return_protect else 0.33
|
||||
),
|
||||
"__type__": "update",
|
||||
}
|
||||
|
||||
if sid == "" or sid == []:
|
||||
if self.hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
|
||||
if (
|
||||
self.hubert_model is not None
|
||||
): # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
|
||||
logger.info("Clean model cache")
|
||||
del (self.net_g, self.n_spk, self.hubert_model, self.tgt_sr) # ,cpt
|
||||
self.hubert_model = (
|
||||
self.net_g
|
||||
) = self.n_spk = self.hubert_model = self.tgt_sr = None
|
||||
self.hubert_model = self.net_g = self.n_spk = self.hubert_model = (
|
||||
self.tgt_sr
|
||||
) = None
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
###楼下不这么折腾清理不干净
|
||||
|
28
tools/app.py
28
tools/app.py
@ -59,12 +59,18 @@ with app:
|
||||
)
|
||||
sid.change(fn=vc.get_vc, inputs=[sid], outputs=[spk_item])
|
||||
gr.Markdown(
|
||||
value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ")
|
||||
value=i18n(
|
||||
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. "
|
||||
)
|
||||
)
|
||||
vc_input3 = gr.Audio(label="上传音频(长度小于90秒)")
|
||||
vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0)
|
||||
vc_transform0 = gr.Number(
|
||||
label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
|
||||
)
|
||||
f0method0 = gr.Radio(
|
||||
label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"),
|
||||
label=i18n(
|
||||
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
|
||||
),
|
||||
choices=["pm", "harvest", "crepe", "rmvpe"],
|
||||
value="pm",
|
||||
interactive=True,
|
||||
@ -72,7 +78,9 @@ with app:
|
||||
filter_radius0 = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=7,
|
||||
label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
|
||||
label=i18n(
|
||||
">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"
|
||||
),
|
||||
value=3,
|
||||
step=1,
|
||||
interactive=True,
|
||||
@ -107,19 +115,25 @@ with app:
|
||||
rms_mix_rate0 = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=1,
|
||||
label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
|
||||
label=i18n(
|
||||
"输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"
|
||||
),
|
||||
value=1,
|
||||
interactive=True,
|
||||
)
|
||||
protect0 = gr.Slider(
|
||||
minimum=0,
|
||||
maximum=0.5,
|
||||
label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
|
||||
label=i18n(
|
||||
"保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
|
||||
),
|
||||
value=0.33,
|
||||
step=0.01,
|
||||
interactive=True,
|
||||
)
|
||||
f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"))
|
||||
f0_file = gr.File(
|
||||
label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")
|
||||
)
|
||||
but0 = gr.Button(i18n("转换"), variant="primary")
|
||||
vc_output1 = gr.Textbox(label=i18n("输出信息"))
|
||||
vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
对源特征进行检索
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
"""
|
||||
格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个
|
||||
"""
|
||||
|
||||
import os
|
||||
import traceback
|
||||
import logging
|
||||
|
@ -1,6 +1,7 @@
|
||||
"""
|
||||
格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
|
@ -8,7 +8,9 @@ f0_up_key = 0 # 升降调
|
||||
sid = 0 # 角色ID
|
||||
f0_method = "dio" # F0提取算法
|
||||
model_path = "ShirohaRVC.onnx" # 模型的完整路径
|
||||
vec_name = "vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
|
||||
vec_name = (
|
||||
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
|
||||
)
|
||||
wav_path = "123.wav" # 输入路径或ByteIO实例
|
||||
out_path = "out.wav" # 输出路径或ByteIO实例
|
||||
|
||||
|
@ -273,15 +273,17 @@ class RVC:
|
||||
f0 = f0[2:-3]
|
||||
else:
|
||||
f0 = f0[2:]
|
||||
f0bak[
|
||||
part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]
|
||||
] = f0
|
||||
f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = (
|
||||
f0
|
||||
)
|
||||
f0bak = signal.medfilt(f0bak, 3)
|
||||
f0bak *= pow(2, f0_up_key / 12)
|
||||
return self.get_f0_post(f0bak)
|
||||
|
||||
def get_f0_crepe(self, x, f0_up_key):
|
||||
if "privateuseone" in str(self.device): ###不支持dml,cpu又太慢用不成,拿fcpe顶替
|
||||
if "privateuseone" in str(
|
||||
self.device
|
||||
): ###不支持dml,cpu又太慢用不成,拿fcpe顶替
|
||||
return self.get_f0(x, f0_up_key, 1, "fcpe")
|
||||
# printt("using crepe,device:%s"%self.device)
|
||||
f0, pd = torchcrepe.predict(
|
||||
|
@ -9,4 +9,5 @@ torchgate imports all the functions from PyTorch, and in addition provides:
|
||||
TorchGating --- A PyTorch module that applies a spectral gate to an input signal
|
||||
|
||||
"""
|
||||
|
||||
from .torchgate import TorchGate
|
||||
|
Loading…
Reference in New Issue
Block a user