mirror of
https://github.com/svc-develop-team/so-vits-svc.git
synced 2025-01-08 11:57:43 +08:00
chore: code cleanup by ruff fix
This commit is contained in:
parent
88be2098fd
commit
a5f0e911ed
@ -1 +1,4 @@
|
||||
select = ["E", "F", "I"]
|
||||
|
||||
# Never enforce `E501` (line length violations).
|
||||
ignore = ["E501"]
|
||||
|
@ -1,6 +1,7 @@
|
||||
import torch
|
||||
from sklearn.cluster import KMeans
|
||||
|
||||
|
||||
def get_cluster_model(ckpt_path):
|
||||
checkpoint = torch.load(ckpt_path)
|
||||
kmeans_dict = {}
|
||||
|
@ -1,7 +1,11 @@
|
||||
import torch,pynvml
|
||||
from torch.nn.functional import normalize
|
||||
from time import time
|
||||
|
||||
import numpy as np
|
||||
import pynvml
|
||||
import torch
|
||||
from torch.nn.functional import normalize
|
||||
|
||||
|
||||
# device=torch.device("cuda:0")
|
||||
def _kpp(data: torch.Tensor, k: int, sample_size: int = -1):
|
||||
""" Picks k points in the data based on the kmeans++ method.
|
||||
|
@ -1,17 +1,17 @@
|
||||
import time
|
||||
import tqdm
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
import argparse
|
||||
from kmeans import KMeansGPU
|
||||
import torch
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from sklearn.cluster import KMeans,MiniBatchKMeans
|
||||
import torch
|
||||
import tqdm
|
||||
from kmeans import KMeansGPU
|
||||
from sklearn.cluster import KMeans, MiniBatchKMeans
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
import torch
|
||||
|
||||
def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉,虽然库支持但是也不考虑
|
||||
logger.info(f"Loading features from {in_dir}")
|
||||
|
@ -1,12 +1,13 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.utils.data
|
||||
|
||||
import utils
|
||||
from modules.mel_processing import spectrogram_torch, spectrogram_torch
|
||||
from utils import load_wav_to_torch, load_filepaths_and_text
|
||||
from modules.mel_processing import spectrogram_torch
|
||||
from utils import load_filepaths_and_text, load_wav_to_torch
|
||||
|
||||
# import h5py
|
||||
|
||||
|
@ -1,12 +1,14 @@
|
||||
import os
|
||||
import random
|
||||
import numpy as np
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import random
|
||||
from utils import repeat_expand_2d
|
||||
from tqdm import tqdm
|
||||
from torch.utils.data import Dataset
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils import repeat_expand_2d
|
||||
|
||||
|
||||
def traverse_dir(
|
||||
root_dir,
|
||||
|
@ -1,9 +1,10 @@
|
||||
from collections import deque
|
||||
from functools import partial
|
||||
from inspect import isfunction
|
||||
import torch.nn.functional as F
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
@ -254,7 +255,11 @@ class GaussianDiffusion(nn.Module):
|
||||
|
||||
if method is not None and infer_speedup > 1:
|
||||
if method == 'dpm-solver' or method == 'dpm-solver++':
|
||||
from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver
|
||||
from .dpm_solver_pytorch import (
|
||||
DPM_Solver,
|
||||
NoiseScheduleVP,
|
||||
model_wrapper,
|
||||
)
|
||||
# 1. Define the noise schedule.
|
||||
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
||||
|
||||
@ -332,7 +337,7 @@ class GaussianDiffusion(nn.Module):
|
||||
infer_speedup, cond=cond
|
||||
)
|
||||
elif method == 'unipc':
|
||||
from .uni_pc import NoiseScheduleVP, model_wrapper, UniPC
|
||||
from .uni_pc import NoiseScheduleVP, UniPC, model_wrapper
|
||||
# 1. Define the noise schedule.
|
||||
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
||||
|
||||
|
@ -1,14 +1,14 @@
|
||||
import math
|
||||
from collections import deque
|
||||
from functools import partial
|
||||
from inspect import isfunction
|
||||
import torch.nn.functional as F
|
||||
|
||||
import numpy as np
|
||||
from torch.nn import Conv1d
|
||||
from torch.nn import Mish
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import nn
|
||||
from torch.nn import Conv1d, Mish
|
||||
from tqdm import tqdm
|
||||
import math
|
||||
|
||||
|
||||
def exists(x):
|
||||
@ -390,7 +390,11 @@ class GaussianDiffusion(nn.Module):
|
||||
|
||||
if method is not None and infer_speedup > 1:
|
||||
if method == 'dpm-solver':
|
||||
from .dpm_solver_pytorch import NoiseScheduleVP, model_wrapper, DPM_Solver
|
||||
from .dpm_solver_pytorch import (
|
||||
DPM_Solver,
|
||||
NoiseScheduleVP,
|
||||
model_wrapper,
|
||||
)
|
||||
# 1. Define the noise schedule.
|
||||
noise_schedule = NoiseScheduleVP(schedule='discrete', betas=self.betas[:t])
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from diffusion.unit2mel import load_model_vocoder
|
||||
|
||||
|
||||
|
@ -2,14 +2,16 @@
|
||||
author: wayn391@mastertones
|
||||
'''
|
||||
|
||||
import datetime
|
||||
import os
|
||||
import time
|
||||
import yaml
|
||||
import datetime
|
||||
import torch
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import torch
|
||||
import yaml
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
|
||||
|
||||
class Saver(object):
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -1,7 +1,9 @@
|
||||
import os
|
||||
import yaml
|
||||
import json
|
||||
import os
|
||||
|
||||
import torch
|
||||
import yaml
|
||||
|
||||
|
||||
def traverse_dir(
|
||||
root_dir,
|
||||
|
@ -1,10 +1,12 @@
|
||||
from diffusion_onnx import GaussianDiffusion
|
||||
import os
|
||||
import yaml
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import torch.nn.functional as F
|
||||
import yaml
|
||||
from diffusion_onnx import GaussianDiffusion
|
||||
|
||||
|
||||
class DotDict(dict):
|
||||
def __getattr__(*args):
|
||||
|
@ -1,12 +1,15 @@
|
||||
import time
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
import librosa
|
||||
from diffusion.logger.saver import Saver
|
||||
from diffusion.logger import utils
|
||||
from torch import autocast
|
||||
from torch.cuda.amp import GradScaler
|
||||
|
||||
from diffusion.logger import utils
|
||||
from diffusion.logger.saver import Saver
|
||||
|
||||
|
||||
def test(args, model, vocoder, loader_test, saver):
|
||||
print(' [*] testing...')
|
||||
model.eval()
|
||||
|
@ -1,6 +1,7 @@
|
||||
import torch
|
||||
import math
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
class NoiseScheduleVP:
|
||||
def __init__(
|
||||
|
@ -1,11 +1,14 @@
|
||||
import os
|
||||
import yaml
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import numpy as np
|
||||
import yaml
|
||||
|
||||
from .diffusion import GaussianDiffusion
|
||||
from .wavenet import WaveNet
|
||||
from .vocoder import Vocoder
|
||||
from .wavenet import WaveNet
|
||||
|
||||
|
||||
class DotDict(dict):
|
||||
def __getattr__(*args):
|
||||
|
@ -1,9 +1,10 @@
|
||||
import torch
|
||||
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
||||
from vdecoder.nsf_hifigan.models import load_model,load_config
|
||||
from torchaudio.transforms import Resample
|
||||
|
||||
|
||||
from vdecoder.nsf_hifigan.models import load_config, load_model
|
||||
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
||||
|
||||
|
||||
class Vocoder:
|
||||
def __init__(self, vocoder_type, vocoder_ckpt, device = None):
|
||||
if device is None:
|
||||
|
@ -7,7 +7,7 @@ import torchaudio
|
||||
from flask import Flask, request, send_file
|
||||
from flask_cors import CORS
|
||||
|
||||
from inference.infer_tool import Svc, RealTimeVC
|
||||
from inference.infer_tool import RealTimeVC, Svc
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
import io
|
||||
|
||||
import numpy as np
|
||||
import soundfile
|
||||
from flask import Flask, request, send_file
|
||||
|
||||
from inference import infer_tool
|
||||
from inference import slicer
|
||||
from inference import infer_tool, slicer
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
@ -1,15 +1,16 @@
|
||||
import gc
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
import time
|
||||
from pathlib import Path
|
||||
from inference import slicer
|
||||
import gc
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
|
||||
# import onnxruntime
|
||||
import soundfile
|
||||
import torch
|
||||
@ -17,10 +18,9 @@ import torchaudio
|
||||
|
||||
import cluster
|
||||
import utils
|
||||
from models import SynthesizerTrn
|
||||
import pickle
|
||||
|
||||
from diffusion.unit2mel import load_model_vocoder
|
||||
from inference import slicer
|
||||
from models import SynthesizerTrn
|
||||
|
||||
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
||||
|
||||
|
@ -1,16 +1,18 @@
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import io
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
from inference import slicer
|
||||
import parselmouth
|
||||
import soundfile
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
import utils
|
||||
from inference import slicer
|
||||
from models import SynthesizerTrn
|
||||
|
||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
||||
|
||||
|
@ -1,8 +1,10 @@
|
||||
import logging
|
||||
from spkmix import spk_mix_map
|
||||
|
||||
import soundfile
|
||||
|
||||
from inference import infer_tool
|
||||
from inference.infer_tool import Svc
|
||||
from spkmix import spk_mix_map
|
||||
|
||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
|
||||
|
@ -1,18 +1,17 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import Conv1d, Conv2d
|
||||
from torch.nn import functional as F
|
||||
from torch.nn.utils import spectral_norm, weight_norm
|
||||
|
||||
import modules.attentions as attentions
|
||||
import modules.commons as commons
|
||||
import modules.modules as modules
|
||||
|
||||
from torch.nn import Conv1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, spectral_norm
|
||||
|
||||
import utils
|
||||
from modules.commons import get_padding
|
||||
from utils import f0_to_coarse
|
||||
|
||||
|
||||
class ResidualCouplingBlock(nn.Module):
|
||||
def __init__(self,
|
||||
channels,
|
||||
|
@ -1,7 +1,9 @@
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
from modules.F0Predictor.crepe import CrepePitchExtractor
|
||||
import torch
|
||||
|
||||
from modules.F0Predictor.crepe import CrepePitchExtractor
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
|
||||
|
||||
class CrepeF0Predictor(F0Predictor):
|
||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,device=None,sampling_rate=44100,threshold=0.05,model="full"):
|
||||
self.F0Creper = CrepePitchExtractor(hop_length=hop_length,f0_min=f0_min,f0_max=f0_max,device=device,threshold=threshold,model=model)
|
||||
|
@ -1,6 +1,8 @@
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
import pyworld
|
||||
import numpy as np
|
||||
import pyworld
|
||||
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
|
||||
|
||||
class DioF0Predictor(F0Predictor):
|
||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
||||
|
@ -1,6 +1,8 @@
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
import pyworld
|
||||
import numpy as np
|
||||
import pyworld
|
||||
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
|
||||
|
||||
class HarvestF0Predictor(F0Predictor):
|
||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
||||
|
@ -1,6 +1,8 @@
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
import parselmouth
|
||||
import numpy as np
|
||||
import parselmouth
|
||||
|
||||
from modules.F0Predictor.F0Predictor import F0Predictor
|
||||
|
||||
|
||||
class PMF0Predictor(F0Predictor):
|
||||
def __init__(self,hop_length=512,f0_min=50,f0_max=1100,sampling_rate=44100):
|
||||
|
@ -1,4 +1,5 @@
|
||||
from typing import Optional,Union
|
||||
from typing import Optional, Union
|
||||
|
||||
try:
|
||||
from typing import Literal
|
||||
except Exception:
|
||||
|
@ -1,4 +1,5 @@
|
||||
import math
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
@ -1,7 +1,9 @@
|
||||
import math
|
||||
|
||||
import torch
|
||||
from torch.nn import functional as F
|
||||
|
||||
|
||||
def slice_pitch_segments(x, ids_str, segment_size=4):
|
||||
ret = torch.zeros_like(x[:, :segment_size])
|
||||
for i in range(x.size(0)):
|
||||
|
@ -1,10 +1,12 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
||||
from vdecoder.nsf_hifigan.models import load_model
|
||||
from torchaudio.transforms import Resample
|
||||
|
||||
from vdecoder.nsf_hifigan.models import load_model
|
||||
from vdecoder.nsf_hifigan.nvSTFT import STFT
|
||||
|
||||
|
||||
class Enhancer:
|
||||
def __init__(self, enhancer_type, enhancer_ckpt, device=None):
|
||||
if device is None:
|
||||
|
@ -1,5 +1,4 @@
|
||||
import torch
|
||||
|
||||
import torch
|
||||
|
||||
|
||||
def feature_loss(fmap_r, fmap_g):
|
||||
|
@ -1,13 +1,11 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import functional as F
|
||||
|
||||
from torch.nn import Conv1d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm
|
||||
from torch.nn import functional as F
|
||||
from torch.nn.utils import remove_weight_norm, weight_norm
|
||||
|
||||
import modules.commons as commons
|
||||
from modules.commons import init_weights, get_padding
|
||||
|
||||
from modules.commons import get_padding, init_weights
|
||||
|
||||
LRELU_SLOPE = 0.1
|
||||
|
||||
|
@ -1,6 +1,8 @@
|
||||
import torch
|
||||
from onnxexport.model_onnx import SynthesizerTrn
|
||||
|
||||
import utils
|
||||
from onnxexport.model_onnx import SynthesizerTrn
|
||||
|
||||
|
||||
def main(NetExport):
|
||||
path = "SoVits4.0"
|
||||
|
@ -1,8 +1,11 @@
|
||||
import torch
|
||||
from onnxexport.model_onnx_speaker_mix import SynthesizerTrn
|
||||
import utils
|
||||
import json
|
||||
|
||||
import torch
|
||||
|
||||
import utils
|
||||
from onnxexport.model_onnx_speaker_mix import SynthesizerTrn
|
||||
|
||||
|
||||
def main():
|
||||
path = "crs"
|
||||
|
||||
|
@ -1,18 +1,16 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.nn import Conv1d, Conv2d
|
||||
from torch.nn import functional as F
|
||||
from torch.nn.utils import spectral_norm, weight_norm
|
||||
|
||||
import modules.attentions as attentions
|
||||
import modules.commons as commons
|
||||
import modules.modules as modules
|
||||
|
||||
from torch.nn import Conv1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, spectral_norm
|
||||
|
||||
import utils
|
||||
from modules.commons import get_padding
|
||||
from vdecoder.hifigan.models import Generator
|
||||
from utils import f0_to_coarse
|
||||
from vdecoder.hifigan.models import Generator
|
||||
|
||||
|
||||
class ResidualCouplingBlock(nn.Module):
|
||||
|
@ -4,10 +4,9 @@ from torch.nn import functional as F
|
||||
|
||||
import modules.attentions as attentions
|
||||
import modules.modules as modules
|
||||
|
||||
|
||||
from utils import f0_to_coarse
|
||||
|
||||
|
||||
class ResidualCouplingBlock(nn.Module):
|
||||
def __init__(self,
|
||||
channels,
|
||||
|
@ -1,11 +1,11 @@
|
||||
import os
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import wave
|
||||
from random import shuffle
|
||||
|
||||
from tqdm import tqdm
|
||||
from random import shuffle
|
||||
import json
|
||||
import wave
|
||||
|
||||
import diffusion.logger.utils as du
|
||||
|
||||
|
@ -1,19 +1,20 @@
|
||||
import os
|
||||
import utils
|
||||
import torch
|
||||
import random
|
||||
import librosa
|
||||
import logging
|
||||
import argparse
|
||||
import logging
|
||||
import multiprocessing
|
||||
import numpy as np
|
||||
import diffusion.logger.utils as du
|
||||
|
||||
from glob import glob
|
||||
from tqdm import tqdm
|
||||
from random import shuffle
|
||||
from diffusion.vocoder import Vocoder
|
||||
import os
|
||||
import random
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from glob import glob
|
||||
from random import shuffle
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
import diffusion.logger.utils as du
|
||||
import utils
|
||||
from diffusion.vocoder import Vocoder
|
||||
from modules.mel_processing import spectrogram_torch
|
||||
|
||||
logging.getLogger("numba").setLevel(logging.WARNING)
|
||||
|
@ -1,10 +1,11 @@
|
||||
import os
|
||||
import argparse
|
||||
import librosa
|
||||
import numpy as np
|
||||
import concurrent.futures
|
||||
import os
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
from scipy.io import wavfile
|
||||
from tqdm import tqdm
|
||||
|
||||
|
19
train.py
19
train.py
@ -6,27 +6,24 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||
|
||||
import os
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import torch.multiprocessing as mp
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
from torch.nn import functional as F
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.utils.data import DataLoader
|
||||
from torch.utils.tensorboard import SummaryWriter
|
||||
import torch.multiprocessing as mp
|
||||
import torch.distributed as dist
|
||||
from torch.nn.parallel import DistributedDataParallel as DDP
|
||||
from torch.cuda.amp import autocast, GradScaler
|
||||
|
||||
import modules.commons as commons
|
||||
import utils
|
||||
from data_utils import TextAudioSpeakerLoader, TextAudioCollate
|
||||
from data_utils import TextAudioCollate, TextAudioSpeakerLoader
|
||||
from models import (
|
||||
SynthesizerTrn,
|
||||
MultiPeriodDiscriminator,
|
||||
SynthesizerTrn,
|
||||
)
|
||||
from modules.losses import (
|
||||
kl_loss,
|
||||
generator_loss, discriminator_loss, feature_loss
|
||||
)
|
||||
|
||||
from modules.losses import discriminator_loss, feature_loss, generator_loss, kl_loss
|
||||
from modules.mel_processing import mel_spectrogram_torch, spec_to_mel_torch
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
@ -1,8 +1,10 @@
|
||||
import argparse
|
||||
|
||||
import torch
|
||||
from torch.optim import lr_scheduler
|
||||
from diffusion.logger import utils
|
||||
|
||||
from diffusion.data_loaders import get_data_loaders
|
||||
from diffusion.logger import utils
|
||||
from diffusion.solver import train
|
||||
from diffusion.unit2mel import Unit2Mel
|
||||
from diffusion.vocoder import Vocoder
|
||||
|
@ -1,8 +1,8 @@
|
||||
import utils
|
||||
import pickle
|
||||
import os
|
||||
import argparse
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import utils
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
|
15
utils.py
15
utils.py
@ -1,17 +1,18 @@
|
||||
import os
|
||||
import glob
|
||||
import re
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
import glob
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import faiss
|
||||
import librosa
|
||||
import numpy as np
|
||||
from scipy.io.wavfile import read
|
||||
import torch
|
||||
from scipy.io.wavfile import read
|
||||
from torch.nn import functional as F
|
||||
import faiss
|
||||
|
||||
MATPLOTLIB_FLAG = False
|
||||
|
||||
|
@ -1,13 +1,15 @@
|
||||
import os
|
||||
import json
|
||||
from .env import AttrDict
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from .utils import init_weights, get_padding
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||
|
||||
from .env import AttrDict
|
||||
from .utils import get_padding, init_weights
|
||||
|
||||
LRELU_SLOPE = 0.1
|
||||
|
||||
|
@ -1,11 +1,13 @@
|
||||
import os
|
||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import numpy as np
|
||||
import librosa
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
import soundfile as sf
|
||||
|
||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||
|
||||
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
||||
sampling_rate = None
|
||||
|
@ -1,9 +1,10 @@
|
||||
import glob
|
||||
import os
|
||||
import torch
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
# matplotlib.use("Agg")
|
||||
import matplotlib.pylab as plt
|
||||
import torch
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
|
||||
def plot_spectrogram(spectrogram):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
from .act import *
|
||||
from .filter import *
|
||||
from .resample import *
|
||||
from .act import *
|
@ -4,10 +4,10 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from torch import sin, pow
|
||||
from torch import pow, sin
|
||||
from torch.nn import Parameter
|
||||
from .resample import UpSample1d, DownSample1d
|
||||
|
||||
from .resample import DownSample1d, UpSample1d
|
||||
|
||||
|
||||
class Activation1d(nn.Module):
|
||||
|
@ -1,10 +1,11 @@
|
||||
# Adapted from https://github.com/junjun3518/alias-free-torch under the Apache License 2.0
|
||||
# LICENSE is in incl_licenses directory.
|
||||
|
||||
import math
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import math
|
||||
|
||||
if 'sinc' in dir(torch):
|
||||
sinc = torch.sinc
|
||||
|
@ -3,8 +3,8 @@
|
||||
|
||||
import torch.nn as nn
|
||||
from torch.nn import functional as F
|
||||
from .filter import LowPassFilter1d
|
||||
from .filter import kaiser_sinc_filter1d
|
||||
|
||||
from .filter import LowPassFilter1d, kaiser_sinc_filter1d
|
||||
|
||||
|
||||
class UpSample1d(nn.Module):
|
||||
|
@ -1,15 +1,18 @@
|
||||
import os
|
||||
import json
|
||||
from .env import AttrDict
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from .utils import init_weights, get_padding
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||
|
||||
from vdecoder.hifiganwithsnake.alias.act import SnakeAlias
|
||||
|
||||
from .env import AttrDict
|
||||
from .utils import get_padding, init_weights
|
||||
|
||||
LRELU_SLOPE = 0.1
|
||||
|
||||
|
||||
|
@ -1,11 +1,13 @@
|
||||
import os
|
||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import numpy as np
|
||||
import librosa
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
import soundfile as sf
|
||||
|
||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||
|
||||
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
||||
sampling_rate = None
|
||||
|
@ -1,9 +1,10 @@
|
||||
import glob
|
||||
import os
|
||||
import torch
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
# matplotlib.use("Agg")
|
||||
import matplotlib.pylab as plt
|
||||
import torch
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
|
||||
def plot_spectrogram(spectrogram):
|
||||
|
@ -1,13 +1,15 @@
|
||||
import os
|
||||
import json
|
||||
from .env import AttrDict
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.nn as nn
|
||||
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
||||
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
||||
from .utils import init_weights, get_padding
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import AvgPool1d, Conv1d, Conv2d, ConvTranspose1d
|
||||
from torch.nn.utils import remove_weight_norm, spectral_norm, weight_norm
|
||||
|
||||
from .env import AttrDict
|
||||
from .utils import get_padding, init_weights
|
||||
|
||||
LRELU_SLOPE = 0.1
|
||||
|
||||
|
@ -1,12 +1,14 @@
|
||||
import os
|
||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import numpy as np
|
||||
|
||||
import librosa
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
import torch.utils.data
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
|
||||
os.environ["LRU_CACHE_CAPACITY"] = "3"
|
||||
|
||||
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):
|
||||
sampling_rate = None
|
||||
|
@ -1,10 +1,12 @@
|
||||
import glob
|
||||
import os
|
||||
|
||||
import matplotlib
|
||||
import matplotlib.pylab as plt
|
||||
import torch
|
||||
from torch.nn.utils import weight_norm
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pylab as plt
|
||||
|
||||
|
||||
def plot_spectrogram(spectrogram):
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
from fairseq import checkpoint_utils
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class CNHubertLarge(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/chinese-hubert-large-fairseq-ckpt.pt", device=None):
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import onnxruntime
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class ContentVec256L12_Onnx(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/vec-256-layer-12.onnx", device=None):
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
from fairseq import checkpoint_utils
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class ContentVec256L9(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
|
||||
|
@ -1,7 +1,9 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import onnxruntime
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class ContentVec256L9_Onnx(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/vec-256-layer-9.onnx", device=None):
|
||||
super().__init__()
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
from fairseq import checkpoint_utils
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class ContentVec768L12(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/checkpoint_best_legacy_500.pt", device=None):
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import onnxruntime
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class ContentVec768L12_Onnx(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/vec-768-layer-12.onnx", device=None):
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import onnxruntime
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class ContentVec768L9_Onnx(SpeechEncoder):
|
||||
def __init__(self,vec_path = "pretrain/vec-768-layer-9.onnx",device=None):
|
||||
|
@ -1,6 +1,7 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
|
||||
from vencoder.dphubert.model import wav2vec2_model
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class DPHubert(SpeechEncoder):
|
||||
|
@ -1,5 +1,6 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
from vencoder.hubert import hubert_model
|
||||
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import onnxruntime
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
|
||||
|
||||
class HubertSoft_Onnx(SpeechEncoder):
|
||||
def __init__(self, vec_path="pretrain/hubert-soft.onnx", device=None):
|
||||
|
@ -1,5 +1,6 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
from vencoder.wavlm.WavLM import WavLM, WavLMConfig
|
||||
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
|
||||
from vencoder.whisper.model import Whisper, ModelDimensions
|
||||
from vencoder.whisper.audio import pad_or_trim, log_mel_spectrogram
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
from vencoder.whisper.audio import log_mel_spectrogram, pad_or_trim
|
||||
from vencoder.whisper.model import ModelDimensions, Whisper
|
||||
|
||||
|
||||
class WhisperPPG(SpeechEncoder):
|
||||
|
@ -1,8 +1,8 @@
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
import torch
|
||||
|
||||
from vencoder.whisper.model import Whisper, ModelDimensions
|
||||
from vencoder.whisper.audio import pad_or_trim, log_mel_spectrogram
|
||||
from vencoder.encoder import SpeechEncoder
|
||||
from vencoder.whisper.audio import log_mel_spectrogram, pad_or_trim
|
||||
from vencoder.whisper.model import ModelDimensions, Whisper
|
||||
|
||||
|
||||
class WhisperPPGLarge(SpeechEncoder):
|
||||
|
@ -5,19 +5,19 @@ https://github.com/pytorch/audio/blob/main/torchaudio/models/wav2vec2/components
|
||||
|
||||
"""
|
||||
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from typing import List, Optional, Tuple
|
||||
import math
|
||||
|
||||
import torch
|
||||
from torch import nn, Tensor
|
||||
from torch import Tensor, nn
|
||||
from torch.nn import Module
|
||||
|
||||
from .hardconcrete import HardConcrete
|
||||
from .pruning_utils import (
|
||||
prune_linear_layer,
|
||||
prune_conv1d_layer,
|
||||
prune_layer_norm,
|
||||
prune_linear_layer,
|
||||
)
|
||||
|
||||
|
||||
|
@ -10,7 +10,7 @@ from typing import Any, Dict
|
||||
|
||||
from torch.nn import Module
|
||||
|
||||
from ..model import wav2vec2_model, Wav2Vec2Model, wavlm_model
|
||||
from ..model import Wav2Vec2Model, wav2vec2_model, wavlm_model
|
||||
|
||||
_LG = logging.getLogger(__name__)
|
||||
|
||||
|
@ -7,26 +7,26 @@
|
||||
# https://github.com/pytorch/fairseq
|
||||
# --------------------------------------------------------
|
||||
|
||||
import math
|
||||
import logging
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.nn import LayerNorm
|
||||
|
||||
from vencoder.wavlm.modules import (
|
||||
Fp32GroupNorm,
|
||||
Fp32LayerNorm,
|
||||
GLU_Linear,
|
||||
GradMultiply,
|
||||
MultiheadAttention,
|
||||
SamePad,
|
||||
init_bert_params,
|
||||
get_activation_fn,
|
||||
TransposeLast,
|
||||
GLU_Linear,
|
||||
get_activation_fn,
|
||||
init_bert_params,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -10,10 +10,11 @@
|
||||
import math
|
||||
import warnings
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import Tensor, nn
|
||||
from torch.nn import Parameter
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class TransposeLast(nn.Module):
|
||||
|
@ -5,11 +5,10 @@ import ffmpeg
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
|
||||
from .utils import exact_div
|
||||
|
||||
from librosa.filters import mel as librosa_mel_fn
|
||||
|
||||
# hard-coded audio hyperparameters
|
||||
SAMPLE_RATE = 16000
|
||||
N_FFT = 400
|
||||
|
@ -1,5 +1,5 @@
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Tuple, Iterable, Optional, Sequence, Union, TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
@ -1,14 +1,13 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict
|
||||
from typing import Iterable, Optional
|
||||
from typing import Dict, Iterable, Optional
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch import Tensor
|
||||
from torch import nn
|
||||
from torch import Tensor, nn
|
||||
|
||||
from .decoding import detect_language as detect_language_function, decode as decode_function
|
||||
from .decoding import decode as decode_function
|
||||
from .decoding import detect_language as detect_language_function
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -1,7 +1,9 @@
|
||||
from google.colab import files
|
||||
import shutil
|
||||
import os
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from google.colab import files
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--type", type=str, required=True, help="type of file to upload")
|
||||
|
23
webUI.py
23
webUI.py
@ -1,4 +1,11 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import time
|
||||
import traceback
|
||||
from itertools import chain
|
||||
|
||||
# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt")
|
||||
import gradio as gr
|
||||
@ -6,20 +13,12 @@ import gradio.processing_utils as gr_pu
|
||||
import librosa
|
||||
import numpy as np
|
||||
import soundfile
|
||||
from inference.infer_tool import Svc
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
|
||||
import subprocess
|
||||
from scipy.io import wavfile
|
||||
import librosa
|
||||
import torch
|
||||
import time
|
||||
import traceback
|
||||
from itertools import chain
|
||||
from utils import mix_model
|
||||
from scipy.io import wavfile
|
||||
|
||||
from compress_model import removeOptimizer
|
||||
from inference.infer_tool import Svc
|
||||
from utils import mix_model
|
||||
|
||||
logging.getLogger('numba').setLevel(logging.WARNING)
|
||||
logging.getLogger('markdown_it').setLevel(logging.WARNING)
|
||||
|
Loading…
Reference in New Issue
Block a user