mirror of
https://github.com/svc-develop-team/so-vits-svc.git
synced 2025-01-08 11:57:43 +08:00
chore: code cleanup by ruff fix
This commit is contained in:
parent
88be2098fd
commit
a5f0e911ed
@ -1 +1,4 @@
|
|||||||
|
select = ["E", "F", "I"]
|
||||||
|
|
||||||
|
# Never enforce `E501` (line length violations).
|
||||||
|
ignore = ["E501"]
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import torch
|
import torch
|
||||||
from sklearn.cluster import KMeans
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
|
||||||
def get_cluster_model(ckpt_path):
|
def get_cluster_model(ckpt_path):
|
||||||
checkpoint = torch.load(ckpt_path)
|
checkpoint = torch.load(ckpt_path)
|
||||||
kmeans_dict = {}
|
kmeans_dict = {}
|
||||||
|
@ -1,7 +1,11 @@
|
|||||||
import torch,pynvml
|
|
||||||
from torch.nn.functional import normalize
|
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pynvml
|
||||||
|
import torch
|
||||||
|
from torch.nn.functional import normalize
|
||||||
|
|
||||||
|
|
||||||
# device=torch.device("cuda:0")
|
# device=torch.device("cuda:0")
|
||||||
def _kpp(data: torch.Tensor, k: int, sample_size: int = -1):
|
def _kpp(data: torch.Tensor, k: int, sample_size: int = -1):
|
||||||
""" Picks k points in the data based on the kmeans++ method.
|
""" Picks k points in the data based on the kmeans++ method.
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
import time
|
|
||||||
import tqdm
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
import logging
|
|
||||||
import argparse
|
import argparse
|
||||||
from kmeans import KMeansGPU
|
import logging
|
||||||
import torch
|
import os
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.cluster import KMeans,MiniBatchKMeans
|
import torch
|
||||||
|
import tqdm
|
||||||
|
from kmeans import KMeansGPU
|
||||||
|
from sklearn.cluster import KMeans, MiniBatchKMeans
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
import torch
|
|
||||||
|
|
||||||
def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉,虽然库支持但是也不考虑
|
def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉,虽然库支持但是也不考虑
|
||||||
logger.info(f"Loading features from {in_dir}")
|
logger.info(f"Loading features from {in_dir}")
|
||||||
|
@ -1,12 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.utils.data
|
import torch.utils.data
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
from modules.mel_processing import spectrogram_torch, spectrogram_torch
|
from modules.mel_processing import spectrogram_torch
|
||||||
from utils import load_wav_to_torch, load_filepaths_and_text
|
from utils import load_filepaths_and_text, load_wav_to_torch
|
||||||
|
|
||||||
# import h5py
|
# import h5py
|
||||||
|
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import numpy as np
|
|
||||||
import librosa
|
import librosa
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import random
|
|
||||||
from utils import repeat_expand_2d
|
|
||||||
from tqdm import tqdm
|
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from utils import repeat_expand_2d
|
||||||
|
|
||||||
|
|
||||||
def traverse_dir(
|
def traverse_dir(
|
||||||
root_dir,
|
root_dir,
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
from collections import deque
|
from collections import deque
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from inspect import isfunction
|
from inspect import isfunction
|
||||||
import torch.nn.functional as F
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
@ -254,7 +255,11 @@ class GaussianDiffusion(nn.Module):
|
|||||||
|
|
||||||
if method is not None and infer_speedup > 1:
|
if method is not None and infer_speedup > 1:
|
||||||
if method == 'dpm-solver' or method == 'dpm-solver++':
|
if method == 'dpm-solver' or method == 'dpm-solver++':
|
||||||
from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver
|
from .dpm_solver_pytorch import (
|
||||||
|
DPM_Solver,
|
||||||
|
NoiseScheduleVP,
|
||||||
|
model_wrapper,
|
||||||
|
)
|
||||||
# 1. Define the noise schedule.
|
# 1. Define the noise schedule.
|
||||||
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
||||||
|
|
||||||
@ -332,7 +337,7 @@ class GaussianDiffusion(nn.Module):
|
|||||||
infer_speedup, cond=cond
|
infer_speedup, cond=cond
|
||||||
)
|
)
|
||||||
elif method == 'unipc':
|
elif method == 'unipc':
|
||||||
from .uni_pc import NoiseScheduleVP, model_wrapper, UniPC
|
from .uni_pc import NoiseScheduleVP, UniPC, model_wrapper
|
||||||
# 1. Define the noise schedule.
|
# 1. Define the noise schedule.
|
||||||
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
||||||
|
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
|
import math
|
||||||
from collections import deque
|
from collections import deque
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from inspect import isfunction
|
from inspect import isfunction
|
||||||
import torch.nn.functional as F
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from torch.nn import Conv1d
|
|
||||||
from torch.nn import Mish
|
|
||||||
import torch
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
from torch.nn import Conv1d, Mish
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
import math
|
|
||||||
|
|
||||||
|
|
||||||
def exists(x):
|
def exists(x):
|
||||||
@ -390,7 +390,11 @@ class GaussianDiffusion(nn.Module):
|
|||||||
|
|
||||||
if method is not None and infer_speedup > 1:
|
if method is not None and infer_speedup > 1:
|
||||||
if method == 'dpm-solver':
|
if method == 'dpm-solver':
|
||||||
from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver
|
from .dpm_solver_pytorch import (
|
||||||
|
DPM_Solver,
|
||||||
|
NoiseScheduleVP,
|
||||||
|
model_wrapper,
|
||||||
|
)
|
||||||
# 1. Define the noise schedule.
|
# 1. Define the noise schedule.
|
||||||
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from diffusion.unit2mel import load_model_vocoder
|
from diffusion.unit2mel import load_model_vocoder
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,14 +2,16 @@
|
|||||||
author: wayn391@mastertones
|
author: wayn391@mastertones
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import datetime
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import yaml
|
|
||||||
import datetime
|
|
||||||
import torch
|
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
import torch
|
||||||
|
import yaml
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
|
|
||||||
|
|
||||||
class Saver(object):
|
class Saver(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
import os
|
|
||||||
import yaml
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
|
||||||
def traverse_dir(
|
def traverse_dir(
|
||||||
root_dir,
|
root_dir,
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
from diffusion_onnx import GaussianDiffusion
|
|
||||||
import os
|
import os
|
||||||
import yaml
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import numpy as np
|
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
import yaml
|
||||||
|
from diffusion_onnx import GaussianDiffusion
|
||||||
|
|
||||||
|
|
||||||
class DotDict(dict):
|
class DotDict(dict):
|
||||||
def __getattr__(*args):
|
def __getattr__(*args):
|
||||||
|
@ -1,12 +1,15 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import librosa
|
|
||||||
from diffusion.logger.saver import Saver
|
|
||||||
from diffusion.logger import utils
|
|
||||||
from torch import autocast
|
from torch import autocast
|
||||||
from torch.cuda.amp import GradScaler
|
from torch.cuda.amp import GradScaler
|
||||||
|
|
||||||
|
from diffusion.logger import utils
|
||||||
|
from diffusion.logger.saver import Saver
|
||||||
|
|
||||||
|
|
||||||
def test(args, model, vocoder, loader_test, saver):
|
def test(args, model, vocoder, loader_test, saver):
|
||||||
print(' [*] testing...')
|
print(' [*] testing...')
|
||||||
model.eval()
|
model.eval()
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import torch
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
class NoiseScheduleVP:
|
class NoiseScheduleVP:
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -1,11 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
import yaml
|
|
||||||
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import numpy as np
|
import yaml
|
||||||
|
|
||||||
from .diffusion import GaussianDiffusion
|
from .diffusion import GaussianDiffusion
|
||||||
from .wavenet import WaveNet
|
|
||||||
from .vocoder import Vocoder
|
from .vocoder import Vocoder
|
||||||
|
from .wavenet import WaveNet
|
||||||
|
|
||||||
|
|
||||||
class DotDict(dict):
|
class DotDict(dict):
|
||||||
def __getattr__(*args):
|
def __getattr__(*args):
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
import torch
|
import torch
|
||||||
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
|
||||||
from vdecoder.nsf_hifigan.models import load_model,load_config
|
|
||||||
from torchaudio.transforms import Resample
|
from torchaudio.transforms import Resample
|
||||||
|
|
||||||
|
from vdecoder.nsf_hifigan.models import load_config, load_model
|
||||||
|
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
||||||
|
|
||||||
|
|
||||||
class Vocoder:
|
class Vocoder:
|
||||||
def __init__(self, vocoder_type, vocoder_ckpt, device = None):
|
def __init__(self, vocoder_type, vocoder_ckpt, device = None):
|
||||||
if device is None:
|
if device is None:
|
||||||
|
@ -7,7 +7,7 @@ import torchaudio
|
|||||||
from flask import Flask, request, send_file
|
from flask import Flask, request, send_file
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
|
|
||||||
from inference.infer_tool import Svc, RealTimeVC
|
from inference.infer_tool import RealTimeVC, Svc
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
import io
|
import io
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile
|
import soundfile
|
||||||
from flask import Flask, request, send_file
|
from flask import Flask, request, send_file
|
||||||
|
|
||||||
from inference import infer_tool
|
from inference import infer_tool, slicer
|
||||||
from inference import slicer
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
|
import gc
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import pickle
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from inference import slicer
|
|
||||||
import gc
|
|
||||||
|
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
# import onnxruntime
|
# import onnxruntime
|
||||||
import soundfile
|
import soundfile
|
||||||
import torch
|
import torch
|
||||||
@ -17,10 +18,9 @@ import torchaudio
|
|||||||
|
|
||||||
import cluster
|
import cluster
|
||||||
import utils
|
import utils
|
||||||
from models import SynthesizerTrn
|
|
||||||
import pickle
|
|
||||||
|
|
||||||
from diffusion.unit2mel import load_model_vocoder
|
from diffusion.unit2mel import load_model_vocoder
|
||||||
|
from inference import slicer
|
||||||
|
from models import SynthesizerTrn
|
||||||
|
|
||||||
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
@ -1,16 +1,18 @@
|
|||||||
|
import io
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import io
|
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from inference import slicer
|
|
||||||
import parselmouth
|
import parselmouth
|
||||||
import soundfile
|
import soundfile
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
|
from inference import slicer
|
||||||
from models import SynthesizerTrn
|
from models import SynthesizerTrn
|
||||||
|
|
||||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||||
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
import logging
|
import logging
|
||||||
from spkmix import spk_mix_map
|
|
||||||
import soundfile
|
import soundfile
|
||||||
|
|
||||||
from inference import infer_tool
|
from inference import infer_tool
|
||||||
from inference.infer_tool import Svc
|
from inference.infer_tool import Svc
|
||||||
|
from spkmix import spk_mix_map
|
||||||
|
|
||||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||||
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
|
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
|
||||||
|
@ -1,18 +1,17 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
from torch.nn import Conv1d, Conv2d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
from torch.nn.utils import spectral_norm, weight_norm
|
||||||
|
|
||||||
import modules.attentions as attentions
|
import modules.attentions as attentions
|
||||||
import modules.commons as commons
|
import modules.commons as commons
|
||||||
import modules.modules as modules
|
import modules.modules as modules
|
||||||
|
|
||||||
from torch.nn import Conv1d, Conv2d
|
|
||||||
from torch.nn.utils import weight_norm, spectral_norm
|
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
from modules.commons import get_padding
|
from modules.commons import get_padding
|
||||||
from utils import f0_to_coarse
|
from utils import f0_to_coarse
|
||||||
|
|
||||||
|
|
||||||
class ResidualCouplingBlock(nn.Module):
|
class ResidualCouplingBlock(nn.Module):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
channels,
|
channels,
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
|
||||||
from modules.F0Predictor.crepe import CrepePitchExtractor
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from modules.F0Predictor.crepe import CrepePitchExtractor
|
||||||
|
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||||
|
|
||||||
|
|
||||||
class CrepeF0Predictor(F0Predictor):
|
class CrepeF0Predictor(F0Predictor):
|
||||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,device=None,sampling_rate=44100,threshold=0.05,model="full"):
|
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,device=None,sampling_rate=44100,threshold=0.05,model="full"):
|
||||||
self.F0Creper = CrepePitchExtractor(hop_length=hop_length,f0_min=f0_min,f0_max=f0_max,device=device,threshold=threshold,model=model)
|
self.F0Creper = CrepePitchExtractor(hop_length=hop_length,f0_min=f0_min,f0_max=f0_max,device=device,threshold=threshold,model=model)
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
|
||||||
import pyworld
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyworld
|
||||||
|
|
||||||
|
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||||
|
|
||||||
|
|
||||||
class DioF0Predictor(F0Predictor):
|
class DioF0Predictor(F0Predictor):
|
||||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
|
||||||
import pyworld
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyworld
|
||||||
|
|
||||||
|
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||||
|
|
||||||
|
|
||||||
class HarvestF0Predictor(F0Predictor):
|
class HarvestF0Predictor(F0Predictor):
|
||||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
|
||||||
import parselmouth
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import parselmouth
|
||||||
|
|
||||||
|
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||||
|
|
||||||
|
|
||||||
class PMF0Predictor(F0Predictor):
|
class PMF0Predictor(F0Predictor):
|
||||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from typing import Optional,Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
import math
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
|
||||||
def slice_pitch_segments(x, ids_str, segment_size=4):
|
def slice_pitch_segments(x, ids_str, segment_size=4):
|
||||||
ret = torch.zeros_like(x[:, :segment_size])
|
ret = torch.zeros_like(x[:, :segment_size])
|
||||||
for i in range(x.size(0)):
|
for i in range(x.size(0)):
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
|
||||||
from vdecoder.nsf_hifigan.models import load_model
|
|
||||||
from torchaudio.transforms import Resample
|
from torchaudio.transforms import Resample
|
||||||
|
|
||||||
|
from vdecoder.nsf_hifigan.models import load_model
|
||||||
|
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
||||||
|
|
||||||
|
|
||||||
class Enhancer:
|
class Enhancer:
|
||||||
def __init__(self, enhancer_type, enhancer_ckpt, device=None):
|
def __init__(self, enhancer_type, enhancer_ckpt, device=None):
|
||||||
if device is None:
|
if device is None:
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def feature_loss(fmap_r, fmap_g):
|
def feature_loss(fmap_r, fmap_g):
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from torch.nn import functional as F
|
|
||||||
|
|
||||||
from torch.nn import Conv1d
|
from torch.nn import Conv1d
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm
|
from torch.nn import functional as F
|
||||||
|
from torch.nn.utils import remove_weight_norm, weight_norm
|
||||||
|
|
||||||
import modules.commons as commons
|
import modules.commons as commons
|
||||||
from modules.commons import init_weights, get_padding
|
from modules.commons import get_padding, init_weights
|
||||||
|
|
||||||
|
|
||||||
LRELU_SLOPE = 0.1
|
LRELU_SLOPE = 0.1
|
||||||
|
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
import torch
|
import torch
|
||||||
from onnxexport.model_onnx import SynthesizerTrn
|
|
||||||
import utils
|
import utils
|
||||||
|
from onnxexport.model_onnx import SynthesizerTrn
|
||||||
|
|
||||||
|
|
||||||
def main(NetExport):
|
def main(NetExport):
|
||||||
path = "SoVits4.0"
|
path = "SoVits4.0"
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
import torch
|
|
||||||
from onnxexport.model_onnx_speaker_mix import SynthesizerTrn
|
|
||||||
import utils
|
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
import utils
|
||||||
|
from onnxexport.model_onnx_speaker_mix import SynthesizerTrn
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
path = "crs"
|
path = "crs"
|
||||||
|
|
||||||
|
@ -1,18 +1,16 @@
|
|||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
from torch.nn import Conv1d, Conv2d
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
from torch.nn.utils import spectral_norm, weight_norm
|
||||||
|
|
||||||
import modules.attentions as attentions
|
import modules.attentions as attentions
|
||||||
import modules.commons as commons
|
import modules.commons as commons
|
||||||
import modules.modules as modules
|
import modules.modules as modules
|
||||||
|
|
||||||
from torch.nn import Conv1d, Conv2d
|
|
||||||
from torch.nn.utils import weight_norm, spectral_norm
|
|
||||||
|
|
||||||
import utils
|
import utils
|
||||||
from modules.commons import get_padding
|
from modules.commons import get_padding
|
||||||
from vdecoder.hifigan.models import Generator
|
|
||||||
from utils import f0_to_coarse
|
from utils import f0_to_coarse
|
||||||
|
from vdecoder.hifigan.models import Generator
|
||||||
|
|
||||||
|
|
||||||
class ResidualCouplingBlock(nn.Module):
|
class ResidualCouplingBlock(nn.Module):
|
||||||
|
@ -4,10 +4,9 @@ from torch.nn import functional as F
|
|||||||
|
|
||||||
import modules.attentions as attentions
|
import modules.attentions as attentions
|
||||||
import modules.modules as modules
|
import modules.modules as modules
|
||||||
|
|
||||||
|
|
||||||
from utils import f0_to_coarse
|
from utils import f0_to_coarse
|
||||||
|
|
||||||
|
|
||||||
class ResidualCouplingBlock(nn.Module):
|
class ResidualCouplingBlock(nn.Module):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
channels,
|
channels,
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import wave
|
||||||
|
from random import shuffle
|
||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from random import shuffle
|
|
||||||
import json
|
|
||||||
import wave
|
|
||||||
|
|
||||||
import diffusion.logger.utils as du
|
import diffusion.logger.utils as du
|
||||||
|
|
||||||
|
@ -1,19 +1,20 @@
|
|||||||
import os
|
|
||||||
import utils
|
|
||||||
import torch
|
|
||||||
import random
|
|
||||||
import librosa
|
|
||||||
import logging
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import numpy as np
|
import os
|
||||||
import diffusion.logger.utils as du
|
import random
|
||||||
|
|
||||||
from glob import glob
|
|
||||||
from tqdm import tqdm
|
|
||||||
from random import shuffle
|
|
||||||
from diffusion.vocoder import Vocoder
|
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from glob import glob
|
||||||
|
from random import shuffle
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import diffusion.logger.utils as du
|
||||||
|
import utils
|
||||||
|
from diffusion.vocoder import Vocoder
|
||||||
from modules.mel_processing import spectrogram_torch
|
from modules.mel_processing import spectrogram_torch
|
||||||
|
|
||||||
logging.getLogger("numba").setLevel(logging.WARNING)
|
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
import librosa
|
|
||||||
import numpy as np
|
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
import os
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
from multiprocessing import cpu_count
|
from multiprocessing import cpu_count
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
from scipy.io import wavfile
|
from scipy.io import wavfile
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
19
train.py
19
train.py
@ -6,27 +6,24 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|||||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
import torch.distributed as dist
|
||||||
|
import torch.multiprocessing as mp
|
||||||
|
from torch.cuda.amp import GradScaler, autocast
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
|
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from torch.utils.tensorboard import SummaryWriter
|
from torch.utils.tensorboard import SummaryWriter
|
||||||
import torch.multiprocessing as mp
|
|
||||||
import torch.distributed as dist
|
|
||||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
|
||||||
from torch.cuda.amp import autocast, GradScaler
|
|
||||||
|
|
||||||
import modules.commons as commons
|
import modules.commons as commons
|
||||||
import utils
|
import utils
|
||||||
from data_utils import TextAudioSpeakerLoader, TextAudioCollate
|
from data_utils import TextAudioCollate, TextAudioSpeakerLoader
|
||||||
from models import (
|
from models import (
|
||||||
SynthesizerTrn,
|
|
||||||
MultiPeriodDiscriminator,
|
MultiPeriodDiscriminator,
|
||||||
|
SynthesizerTrn,
|
||||||
)
|
)
|
||||||
from modules.losses import (
|
from modules.losses import discriminator_loss, feature_loss, generator_loss, kl_loss
|
||||||
kl_loss,
|
|
||||||
generator_loss, discriminator_loss, feature_loss
|
|
||||||
)
|
|
||||||
|
|
||||||
from modules.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
from modules.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
||||||
|
|
||||||
torch.backends.cudnn.benchmark = True
|
torch.backends.cudnn.benchmark = True
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.optim import lr_scheduler
|
from torch.optim import lr_scheduler
|
||||||
from diffusion.logger import utils
|
|
||||||
from diffusion.data_loaders import get_data_loaders
|
from diffusion.data_loaders import get_data_loaders
|
||||||
|
from diffusion.logger import utils
|
||||||
from diffusion.solver import train
|
from diffusion.solver import train
|
||||||
from diffusion.unit2mel import Unit2Mel
|
from diffusion.unit2mel import Unit2Mel
|
||||||
from diffusion.vocoder import Vocoder
|
from diffusion.vocoder import Vocoder
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import utils
|
|
||||||
import pickle
|
|
||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
import utils
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
15
utils.py
15
utils.py
@ -1,17 +1,18 @@
|
|||||||
import os
|
|
||||||
import glob
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import glob
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import faiss
|
||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.io.wavfile import read
|
|
||||||
import torch
|
import torch
|
||||||
|
from scipy.io.wavfile import read
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
import faiss
|
|
||||||
|
|
||||||
MATPLOTLIB_FLAG = False
|
MATPLOTLIB_FLAG = False
|
||||||
|
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
import os
|
|
||||||
import json
|
import json
|
||||||
from .env import AttrDict
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||||
from .utils import init_weights, get_padding
|
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||||
|
|
||||||
|
from .env import AttrDict
|
||||||
|
from .utils import get_padding, init_weights
|
||||||
|
|
||||||
LRELU_SLOPE = 0.1
|
LRELU_SLOPE = 0.1
|
||||||
|
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
import torch
|
import torch
|
||||||
import torch.utils.data
|
import torch.utils.data
|
||||||
import numpy as np
|
|
||||||
import librosa
|
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
import soundfile as sf
|
|
||||||
|
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||||
|
|
||||||
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
||||||
sampling_rate = None
|
sampling_rate = None
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import torch
|
|
||||||
from torch.nn.utils import weight_norm
|
|
||||||
# matplotlib.use("Agg")
|
# matplotlib.use("Agg")
|
||||||
import matplotlib.pylab as plt
|
import matplotlib.pylab as plt
|
||||||
|
import torch
|
||||||
|
from torch.nn.utils import weight_norm
|
||||||
|
|
||||||
|
|
||||||
def plot_spectrogram(spectrogram):
|
def plot_spectrogram(spectrogram):
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||||
# LICENSE is in incl_licenses directory.
|
# LICENSE is in incl_licenses directory.
|
||||||
|
|
||||||
|
from .act import *
|
||||||
from .filter import *
|
from .filter import *
|
||||||
from .resample import *
|
from .resample import *
|
||||||
from .act import *
|
|
@ -4,10 +4,10 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from torch import pow, sin
|
||||||
from torch import sin, pow
|
|
||||||
from torch.nn import Parameter
|
from torch.nn import Parameter
|
||||||
from .resample import UpSample1d, DownSample1d
|
|
||||||
|
from .resample import DownSample1d, UpSample1d
|
||||||
|
|
||||||
|
|
||||||
class Activation1d(nn.Module):
|
class Activation1d(nn.Module):
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||||
# LICENSE is in incl_licenses directory.
|
# LICENSE is in incl_licenses directory.
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import math
|
|
||||||
|
|
||||||
if 'sinc' in dir(torch):
|
if 'sinc' in dir(torch):
|
||||||
sinc = torch.sinc
|
sinc = torch.sinc
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn import functional as F
|
from torch.nn import functional as F
|
||||||
from .filter import LowPassFilter1d
|
|
||||||
from .filter import kaiser_sinc_filter1d
|
from .filter import LowPassFilter1d, kaiser_sinc_filter1d
|
||||||
|
|
||||||
|
|
||||||
class UpSample1d(nn.Module):
|
class UpSample1d(nn.Module):
|
||||||
|
@ -1,15 +1,18 @@
|
|||||||
import os
|
|
||||||
import json
|
import json
|
||||||
from .env import AttrDict
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||||
from .utils import init_weights, get_padding
|
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||||
|
|
||||||
from vdecoder.hifiganwithsnake.alias.act import SnakeAlias
|
from vdecoder.hifiganwithsnake.alias.act import SnakeAlias
|
||||||
|
|
||||||
|
from .env import AttrDict
|
||||||
|
from .utils import get_padding, init_weights
|
||||||
|
|
||||||
LRELU_SLOPE = 0.1
|
LRELU_SLOPE = 0.1
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
|
||||||
|
import librosa
|
||||||
|
import numpy as np
|
||||||
|
import soundfile as sf
|
||||||
import torch
|
import torch
|
||||||
import torch.utils.data
|
import torch.utils.data
|
||||||
import numpy as np
|
|
||||||
import librosa
|
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
import soundfile as sf
|
|
||||||
|
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||||
|
|
||||||
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
||||||
sampling_rate = None
|
sampling_rate = None
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import torch
|
|
||||||
from torch.nn.utils import weight_norm
|
|
||||||
# matplotlib.use("Agg")
|
# matplotlib.use("Agg")
|
||||||
import matplotlib.pylab as plt
|
import matplotlib.pylab as plt
|
||||||
|
import torch
|
||||||
|
from torch.nn.utils import weight_norm
|
||||||
|
|
||||||
|
|
||||||
def plot_spectrogram(spectrogram):
|
def plot_spectrogram(spectrogram):
|
||||||
|
@ -1,13 +1,15 @@
|
|||||||
import os
|
|
||||||
import json
|
import json
|
||||||
from .env import AttrDict
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
import torch.nn.functional as F
|
||||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||||
from .utils import init_weights, get_padding
|
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||||
|
|
||||||
|
from .env import AttrDict
|
||||||
|
from .utils import get_padding, init_weights
|
||||||
|
|
||||||
LRELU_SLOPE = 0.1
|
LRELU_SLOPE = 0.1
|
||||||
|
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
import os
|
import os
|
||||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
|
||||||
import torch
|
|
||||||
import torch.utils.data
|
|
||||||
import numpy as np
|
|
||||||
import librosa
|
import librosa
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
import numpy as np
|
||||||
import soundfile as sf
|
import soundfile as sf
|
||||||
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
import torch.utils.data
|
||||||
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
|
|
||||||
|
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||||
|
|
||||||
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
||||||
sampling_rate = None
|
sampling_rate = None
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import matplotlib
|
import matplotlib
|
||||||
|
import matplotlib.pylab as plt
|
||||||
import torch
|
import torch
|
||||||
from torch.nn.utils import weight_norm
|
from torch.nn.utils import weight_norm
|
||||||
|
|
||||||
matplotlib.use("Agg")
|
matplotlib.use("Agg")
|
||||||
import matplotlib.pylab as plt
|
|
||||||
|
|
||||||
|
|
||||||
def plot_spectrogram(spectrogram):
|
def plot_spectrogram(spectrogram):
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
from fairseq import checkpoint_utils
|
from fairseq import checkpoint_utils
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class CNHubertLarge(SpeechEncoder):
|
class CNHubertLarge(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/chinese-hubert-large-fairseq-ckpt.pt", device=None):
|
def __init__(self, vec_path="pretrain/chinese-hubert-large-fairseq-ckpt.pt", device=None):
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class ContentVec256L12_Onnx(SpeechEncoder):
|
class ContentVec256L12_Onnx(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/vec-256-layer-12.onnx", device=None):
|
def __init__(self, vec_path="pretrain/vec-256-layer-12.onnx", device=None):
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
from fairseq import checkpoint_utils
|
from fairseq import checkpoint_utils
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class ContentVec256L9(SpeechEncoder):
|
class ContentVec256L9(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
|
def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class ContentVec256L9_Onnx(SpeechEncoder):
|
class ContentVec256L9_Onnx(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/vec-256-layer-9.onnx", device=None):
|
def __init__(self, vec_path="pretrain/vec-256-layer-9.onnx", device=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
from fairseq import checkpoint_utils
|
from fairseq import checkpoint_utils
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class ContentVec768L12(SpeechEncoder):
|
class ContentVec768L12(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
|
def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class ContentVec768L12_Onnx(SpeechEncoder):
|
class ContentVec768L12_Onnx(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/vec-768-layer-12.onnx", device=None):
|
def __init__(self, vec_path="pretrain/vec-768-layer-12.onnx", device=None):
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class ContentVec768L9_Onnx(SpeechEncoder):
|
class ContentVec768L9_Onnx(SpeechEncoder):
|
||||||
def __init__(self,vec_path = "pretrain/vec-768-layer-9.onnx",device=None):
|
def __init__(self,vec_path = "pretrain/vec-768-layer-9.onnx",device=None):
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vencoder.dphubert.model import wav2vec2_model
|
from vencoder.dphubert.model import wav2vec2_model
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class DPHubert(SpeechEncoder):
|
class DPHubert(SpeechEncoder):
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
from vencoder.hubert import hubert_model
|
from vencoder.hubert import hubert_model
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
|
|
||||||
|
|
||||||
class HubertSoft_Onnx(SpeechEncoder):
|
class HubertSoft_Onnx(SpeechEncoder):
|
||||||
def __init__(self, vec_path="pretrain/hubert-soft.onnx", device=None):
|
def __init__(self, vec_path="pretrain/hubert-soft.onnx", device=None):
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
from vencoder.encoder import SpeechEncoder
|
||||||
from vencoder.wavlm.WavLM import WavLM, WavLMConfig
|
from vencoder.wavlm.WavLM import WavLM, WavLMConfig
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vencoder.whisper.model import Whisper, ModelDimensions
|
from vencoder.encoder import SpeechEncoder
|
||||||
from vencoder.whisper.audio import pad_or_trim, log_mel_spectrogram
|
from vencoder.whisper.audio import log_mel_spectrogram, pad_or_trim
|
||||||
|
from vencoder.whisper.model import ModelDimensions, Whisper
|
||||||
|
|
||||||
|
|
||||||
class WhisperPPG(SpeechEncoder):
|
class WhisperPPG(SpeechEncoder):
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
from vencoder.encoder import SpeechEncoder
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vencoder.whisper.model import Whisper, ModelDimensions
|
from vencoder.encoder import SpeechEncoder
|
||||||
from vencoder.whisper.audio import pad_or_trim, log_mel_spectrogram
|
from vencoder.whisper.audio import log_mel_spectrogram, pad_or_trim
|
||||||
|
from vencoder.whisper.model import ModelDimensions, Whisper
|
||||||
|
|
||||||
|
|
||||||
class WhisperPPGLarge(SpeechEncoder):
|
class WhisperPPGLarge(SpeechEncoder):
|
||||||
|
@ -5,19 +5,19 @@ https://github.com/pytorch/audio/blob/main/torchaudio/models/wav2vec2/components
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import math
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
import math
|
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn, Tensor
|
from torch import Tensor, nn
|
||||||
from torch.nn import Module
|
from torch.nn import Module
|
||||||
|
|
||||||
from .hardconcrete import HardConcrete
|
from .hardconcrete import HardConcrete
|
||||||
from .pruning_utils import (
|
from .pruning_utils import (
|
||||||
prune_linear_layer,
|
|
||||||
prune_conv1d_layer,
|
prune_conv1d_layer,
|
||||||
prune_layer_norm,
|
prune_layer_norm,
|
||||||
|
prune_linear_layer,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ from typing import Any, Dict
|
|||||||
|
|
||||||
from torch.nn import Module
|
from torch.nn import Module
|
||||||
|
|
||||||
from ..model import wav2vec2_model, Wav2Vec2Model, wavlm_model
|
from ..model import Wav2Vec2Model, wav2vec2_model, wavlm_model
|
||||||
|
|
||||||
_LG = logging.getLogger(__name__)
|
_LG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -7,26 +7,26 @@
|
|||||||
# https://github.com/pytorch/fairseq
|
# https://github.com/pytorch/fairseq
|
||||||
# --------------------------------------------------------
|
# --------------------------------------------------------
|
||||||
|
|
||||||
import math
|
|
||||||
import logging
|
import logging
|
||||||
|
import math
|
||||||
from typing import List, Optional, Tuple
|
from typing import List, Optional, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.nn import LayerNorm
|
from torch.nn import LayerNorm
|
||||||
|
|
||||||
from vencoder.wavlm.modules import (
|
from vencoder.wavlm.modules import (
|
||||||
Fp32GroupNorm,
|
Fp32GroupNorm,
|
||||||
Fp32LayerNorm,
|
Fp32LayerNorm,
|
||||||
|
GLU_Linear,
|
||||||
GradMultiply,
|
GradMultiply,
|
||||||
MultiheadAttention,
|
MultiheadAttention,
|
||||||
SamePad,
|
SamePad,
|
||||||
init_bert_params,
|
|
||||||
get_activation_fn,
|
|
||||||
TransposeLast,
|
TransposeLast,
|
||||||
GLU_Linear,
|
get_activation_fn,
|
||||||
|
init_bert_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -10,10 +10,11 @@
|
|||||||
import math
|
import math
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Dict, Optional, Tuple
|
from typing import Dict, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
from torch import Tensor, nn
|
from torch import Tensor, nn
|
||||||
from torch.nn import Parameter
|
from torch.nn import Parameter
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
|
|
||||||
class TransposeLast(nn.Module):
|
class TransposeLast(nn.Module):
|
||||||
|
@ -5,11 +5,10 @@ import ffmpeg
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from librosa.filters import mel as librosa_mel_fn
|
||||||
|
|
||||||
from .utils import exact_div
|
from .utils import exact_div
|
||||||
|
|
||||||
from librosa.filters import mel as librosa_mel_fn
|
|
||||||
|
|
||||||
# hard-coded audio hyperparameters
|
# hard-coded audio hyperparameters
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
N_FFT = 400
|
N_FFT = 400
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Dict, List, Tuple, Iterable, Optional, Sequence, Union, TYPE_CHECKING
|
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Tuple, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict
|
from typing import Dict, Iterable, Optional
|
||||||
from typing import Iterable, Optional
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch import Tensor
|
from torch import Tensor, nn
|
||||||
from torch import nn
|
|
||||||
|
|
||||||
from .decoding import detect_language as detect_language_function, decode as decode_function
|
from .decoding import decode as decode_function
|
||||||
|
from .decoding import detect_language as detect_language_function
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
from google.colab import files
|
|
||||||
import shutil
|
|
||||||
import os
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from google.colab import files
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("--type", type=str, required=True, help="type of file to upload")
|
parser.add_argument("--type", type=str, required=True, help="type of file to upload")
|
||||||
|
23
webUI.py
23
webUI.py
@ -1,4 +1,11 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
from itertools import chain
|
||||||
|
|
||||||
# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt")
|
# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt")
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
@ -6,20 +13,12 @@ import gradio.processing_utils as gr_pu
|
|||||||
import librosa
|
import librosa
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import soundfile
|
import soundfile
|
||||||
from inference.infer_tool import Svc
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
from scipy.io import wavfile
|
|
||||||
import librosa
|
|
||||||
import torch
|
import torch
|
||||||
import time
|
from scipy.io import wavfile
|
||||||
import traceback
|
|
||||||
from itertools import chain
|
|
||||||
from utils import mix_model
|
|
||||||
from compress_model import removeOptimizer
|
from compress_model import removeOptimizer
|
||||||
|
from inference.infer_tool import Svc
|
||||||
|
from utils import mix_model
|
||||||
|
|
||||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||||
logging.getLogger('markdown_it').setLevel(logging.WARNING)
|
logging.getLogger('markdown_it').setLevel(logging.WARNING)
|
||||||
|
Loading…
Reference in New Issue
Block a user