chore: code cleanup by ruff fix

This commit is contained in:
magic-akari 2023-06-25 23:46:26 +08:00
parent 30975cd468
commit 88be2098fd
No known key found for this signature in database
GPG Key ID: EC005B1159285BDD
48 changed files with 101 additions and 175 deletions

1
.ruff.toml Normal file
View File

@ -0,0 +1 @@

6
.vscode/extensions.json vendored Normal file
View File

@ -0,0 +1,6 @@
{
"recommendations": [
"charliermarsh.ruff",
"ms-python.python"
]
}

View File

@ -1,4 +1,3 @@
import numpy as np
import torch
from sklearn.cluster import KMeans

View File

@ -1,4 +1,3 @@
import math,pdb
import torch,pynvml
from torch.nn.functional import normalize
from time import time

View File

@ -1,6 +1,5 @@
import time,pdb
import time
import tqdm
from time import time as ttime
import os
from pathlib import Path
import logging
@ -12,8 +11,7 @@ from sklearn.cluster import KMeans,MiniBatchKMeans
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
from time import time as ttime
import pynvml,torch
import torch
def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=False):#gpu_minibatch真拉虽然库支持但是也不考虑
logger.info(f"Loading features from {in_dir}")
@ -29,7 +27,7 @@ def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=
features = features.astype(np.float32)
logger.info(f"Clustering features of shape: {features.shape}")
t = time.time()
if(use_gpu==False):
if(use_gpu is False):
if use_minibatch:
kmeans = MiniBatchKMeans(n_clusters=n_clusters,verbose=verbose, batch_size=4096, max_iter=80).fit(features)
else:
@ -37,14 +35,14 @@ def train_cluster(in_dir, n_clusters, use_minibatch=True, verbose=False,use_gpu=
else:
kmeans = KMeansGPU(n_clusters=n_clusters, mode='euclidean', verbose=2 if verbose else 0,max_iter=500,tol=1e-2)#
features=torch.from_numpy(features)#.to(device)
labels = kmeans.fit_predict(features)#
kmeans.fit_predict(features)#
print(time.time()-t, "s")
x = {
"n_features_in_": kmeans.n_features_in_ if use_gpu==False else features.shape[1],
"_n_threads": kmeans._n_threads if use_gpu==False else 4,
"cluster_centers_": kmeans.cluster_centers_ if use_gpu==False else kmeans.centroids.cpu().numpy(),
"n_features_in_": kmeans.n_features_in_ if use_gpu is False else features.shape[1],
"_n_threads": kmeans._n_threads if use_gpu is False else 4,
"cluster_centers_": kmeans.cluster_centers_ if use_gpu is False else kmeans.centroids.cpu().numpy(),
}
print("end")

View File

@ -1,13 +1,11 @@
import time
import os
import random
import numpy as np
import torch
import torch.utils.data
import modules.commons as commons
import utils
from modules.mel_processing import spectrogram_torch, spec_to_mel_torch, spectrogram_torch
from modules.mel_processing import spectrogram_torch, spectrogram_torch
from utils import load_wav_to_torch, load_filepaths_and_text
# import h5py
@ -87,7 +85,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
assert abs(audio_norm.shape[1]-lmin * self.hop_length) < 3 * self.hop_length
spec, c, f0, uv = spec[:, :lmin], c[:, :lmin], f0[:lmin], uv[:lmin]
audio_norm = audio_norm[:, :lmin * self.hop_length]
if volume!= None:
if volume is not None:
volume = volume[:lmin]
return c, f0, spec, audio_norm, spk, uv, volume
@ -96,7 +94,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
# print("skip too short audio:", filename)
# return None
if random.choice([True, False]) and self.vol_aug and volume!=None:
if random.choice([True, False]) and self.vol_aug and volume is not None:
max_amp = float(torch.max(torch.abs(audio_norm))) + 1e-5
max_shift = min(1, np.log10(1/max_amp))
log10_vol_shift = random.uniform(-1, max_shift)
@ -114,7 +112,7 @@ class TextAudioSpeakerLoader(torch.utils.data.Dataset):
end = start + 790
spec, c, f0, uv = spec[:, start:end], c[:, start:end], f0[start:end], uv[start:end]
audio_norm = audio_norm[:, start * self.hop_length : end * self.hop_length]
if volume !=None:
if volume is not None:
volume = volume[start:end]
return c, f0, spec, audio_norm, spk, uv,volume
@ -178,7 +176,7 @@ class TextAudioCollate:
uv = row[5]
uv_padded[i, :uv.size(0)] = uv
volume = row[6]
if volume != None:
if volume is not None:
volume_padded[i, :volume.size(0)] = volume
else :
volume_padded = None

View File

@ -1,6 +1,5 @@
import os
import random
import re
import numpy as np
import librosa
import torch
@ -130,7 +129,7 @@ class AudioDataset(Dataset):
with open(filelists,"r") as f:
self.paths = f.read().splitlines()
for name_ext in tqdm(self.paths, total=len(self.paths)):
name = os.path.splitext(name_ext)[0]
os.path.splitext(name_ext)[0]
path_audio = name_ext
duration = librosa.get_duration(filename = path_audio, sr = self.sample_rate)

View File

@ -2,7 +2,6 @@ from collections import deque
from functools import partial
from inspect import isfunction
import torch.nn.functional as F
import librosa.sequence
import numpy as np
import torch
from torch import nn
@ -26,8 +25,10 @@ def extract(a, t, x_shape):
def noise_like(shape, device, repeat=False):
repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
noise = lambda: torch.randn(shape, device=device)
def repeat_noise():
return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
def noise():
return torch.randn(shape, device=device)
return repeat_noise() if repeat else noise()

View File

@ -2,7 +2,6 @@ from collections import deque
from functools import partial
from inspect import isfunction
import torch.nn.functional as F
import librosa.sequence
import numpy as np
from torch.nn import Conv1d
from torch.nn import Mish
@ -27,8 +26,10 @@ def extract(a, t):
def noise_like(shape, device, repeat=False):
repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
noise = lambda: torch.randn(shape, device=device)
def repeat_noise():
return torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
def noise():
return torch.randn(shape, device=device)
return repeat_noise() if repeat else noise()
@ -577,7 +578,7 @@ class GaussianDiffusion(nn.Module):
noise_list = torch.zeros((0, 1, 1, self.mel_bins, n_frames), device=device)
ot = step_range[0]
ot_1 = torch.full((1,), ot, device=device, dtype=torch.long)
torch.full((1,), ot, device=device, dtype=torch.long)
for t in step_range:
t_1 = torch.full((1,), t, device=device, dtype=torch.long)

View File

@ -1,6 +1,4 @@
import torch
import torch.nn.functional as F
import math
class NoiseScheduleVP:
@ -559,7 +557,7 @@ class DPM_Solver:
x_t: A pytorch tensor. The approximated solution at time `t`.
"""
ns = self.noise_schedule
dims = x.dim()
x.dim()
lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
h = lambda_t - lambda_s
log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t)
@ -984,12 +982,16 @@ class DPM_Solver:
nfe = 0
if order == 2:
r1 = 0.5
lower_update = lambda x, s, t: self.dpm_solver_first_update(x, s, t, return_intermediate=True)
higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
def lower_update(x, s, t):
return self.dpm_solver_first_update(x, s, t, return_intermediate=True)
def higher_update(x, s, t, **kwargs):
return self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, solver_type=solver_type, **kwargs)
elif order == 3:
r1, r2 = 1. / 3., 2. / 3.
lower_update = lambda x, s, t: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, return_intermediate=True, solver_type=solver_type)
higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
def lower_update(x, s, t):
return self.singlestep_dpm_solver_second_update(x, s, t, r1=r1, return_intermediate=True, solver_type=solver_type)
def higher_update(x, s, t, **kwargs):
return self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2, solver_type=solver_type, **kwargs)
else:
raise ValueError("For adaptive step size solver, order must be 2 or 3, got {}".format(order))
while torch.abs((s - t_0)).mean() > t_err:
@ -997,7 +999,8 @@ class DPM_Solver:
x_lower, lower_noise_kwargs = lower_update(x, s, t)
x_higher = higher_update(x, s, t, **lower_noise_kwargs)
delta = torch.max(torch.ones_like(x).to(x) * atol, rtol * torch.max(torch.abs(x_lower), torch.abs(x_prev)))
norm_fn = lambda v: torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
def norm_fn(v):
return torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
E = norm_fn((x_higher - x_lower) / delta).max()
if torch.all(E <= 1.):
x = x_higher

View File

@ -1,4 +1,3 @@
import numpy as np
import torch
import torch.nn.functional as F
from diffusion.unit2mel import load_model_vocoder

View File

@ -3,13 +3,11 @@ author: wayn391@mastertones
'''
import os
import json
import time
import yaml
import datetime
import torch
import matplotlib.pyplot as plt
from . import utils
from torch.utils.tensorboard import SummaryWriter
class Saver(object):

View File

@ -1,7 +1,6 @@
import os
import yaml
import json
import pickle
import torch
def traverse_dir(
@ -121,6 +120,6 @@ def load_model(
ckpt = torch.load(path_pt, map_location=torch.device(device))
global_step = ckpt['global_step']
model.load_state_dict(ckpt['model'], strict=False)
if ckpt.get('optimizer') != None:
if ckpt.get("optimizer") is not None:
optimizer.load_state_dict(ckpt['optimizer'])
return global_step, model, optimizer

View File

@ -4,9 +4,7 @@ import yaml
import torch
import torch.nn as nn
import numpy as np
from wavenet import WaveNet
import torch.nn.functional as F
import diffusion
class DotDict(dict):
def __getattr__(*args):
@ -147,8 +145,8 @@ class Unit2Mel(nn.Module):
spks.update({i:1.0/float(self.n_spk)})
spk_mix = torch.tensor(spk_mix)
spk_mix = spk_mix.repeat(n_frames, 1)
orgouttt = self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
outtt = self.forward(hubert, mel2ph, f0, volume, spk_mix)
self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
self.forward(hubert, mel2ph, f0, volume, spk_mix)
if export_encoder:
torch.onnx.export(
self,
@ -182,8 +180,8 @@ class Unit2Mel(nn.Module):
spk_mix.append(1.0/float(self.n_spk))
spks.update({i:1.0/float(self.n_spk)})
spk_mix = torch.tensor(spk_mix)
orgouttt = self.orgforward(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
outtt = self.forward(hubert, mel2ph, f0, volume, spk_mix)
self.orgforward(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
self.forward(hubert, mel2ph, f0, volume, spk_mix)
torch.onnx.export(
self,

View File

@ -1,4 +1,3 @@
import os
import time
import numpy as np
import torch

View File

@ -1,5 +1,4 @@
import torch
import torch.nn.functional as F
import math
@ -109,7 +108,8 @@ class NoiseScheduleVP:
elif self.schedule == 'linear':
return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0
elif self.schedule == 'cosine':
log_alpha_fn = lambda s: torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.))
def log_alpha_fn(s):
return torch.log(torch.cos((s + self.cosine_s) / (1.0 + self.cosine_s) * math.pi / 2.0))
log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0
return log_alpha_t
@ -147,7 +147,8 @@ class NoiseScheduleVP:
return t.reshape((-1,))
else:
log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb))
t_fn = lambda log_alpha_t: torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (1. + self.cosine_s) / math.pi - self.cosine_s
def t_fn(log_alpha_t):
return torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2.0 * (1.0 + self.cosine_s) / math.pi - self.cosine_s
t = t_fn(log_alpha)
return t

View File

@ -116,13 +116,13 @@ class Unit2Mel(nn.Module):
hubert_hidden_size = self.input_channel
n_frames = 10
hubert = torch.randn((1, n_frames, hubert_hidden_size))
mel2ph = torch.arange(end=n_frames).unsqueeze(0).long()
torch.arange(end=n_frames).unsqueeze(0).long()
f0 = torch.randn((1, n_frames))
volume = torch.randn((1, n_frames))
spks = {}
for i in range(n_spk):
spks.update({i:1.0/float(self.n_spk)})
orgouttt = self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
self.init_spkembed(hubert, f0.unsqueeze(-1), volume.unsqueeze(-1), spk_mix_dict=spks)
def forward(self, units, f0, volume, spk_id = None, spk_mix_dict = None, aug_shift = None,
gt_spec=None, infer=True, infer_speedup=10, method='dpm-solver', k_step=300, use_tqdm=True):

View File

@ -21,7 +21,6 @@ from models import SynthesizerTrn
import pickle
from diffusion.unit2mel import load_model_vocoder
import yaml
logging.getLogger('matplotlib').setLevel(logging.WARNING)
@ -153,7 +152,7 @@ class Svc(object):
self.hop_size = self.diffusion_args.data.block_size
self.spk2id = self.diffusion_args.spk
self.speech_encoder = self.diffusion_args.data.encoder
self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode!=None else 'left'
self.unit_interpolate_mode = self.diffusion_args.data.unit_interpolate_mode if self.diffusion_args.data.unit_interpolate_mode is not None else 'left'
if spk_mix_enable:
self.diffusion_model.init_spkmix(len(self.spk2id))
else:
@ -290,7 +289,7 @@ class Svc(object):
audio = torch.FloatTensor(wav).to(self.dev)
audio_mel = None
if self.only_diffusion or self.shallow_diffusion:
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol==None else vol[:,:,None]
vol = self.volume_extractor.extract(audio[None,:])[None,:,None].to(self.dev) if vol is None else vol[:,:,None]
if self.shallow_diffusion and second_encoding:
audio16k = librosa.resample(audio.detach().cpu().numpy(), orig_sr=self.target_sample, target_sr=16000)
audio16k = torch.from_numpy(audio16k).to(self.dev)

View File

@ -1,12 +1,7 @@
import hashlib
import json
import logging
import os
import time
from pathlib import Path
import io
import librosa
import maad
import numpy as np
from inference import slicer
import parselmouth
@ -14,7 +9,6 @@ import soundfile
import torch
import torchaudio
from hubert import hubert_model
import utils
from models import SynthesizerTrn
logging.getLogger('numba').setLevel(logging.WARNING)
@ -93,7 +87,7 @@ class VitsSvc(object):
def set_device(self, device):
self.device = torch.device(device)
self.hubert_soft.to(self.device)
if self.SVCVITS != None:
if self.SVCVITS is not None:
self.SVCVITS.to(self.device)
def loadCheckpoint(self, path):

View File

@ -1,14 +1,7 @@
import io
import logging
import time
from pathlib import Path
from spkmix import spk_mix_map
import librosa
import matplotlib.pyplot as plt
import numpy as np
import soundfile
from inference import infer_tool
from inference import slicer
from inference.infer_tool import Svc
logging.getLogger('numba').setLevel(logging.WARNING)

View File

@ -1,5 +1,3 @@
import copy
import math
import torch
from torch import nn
from torch.nn import functional as F
@ -8,11 +6,11 @@ import modules.attentions as attentions
import modules.commons as commons
import modules.modules as modules
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from torch.nn import Conv1d, Conv2d
from torch.nn.utils import weight_norm, spectral_norm
import utils
from modules.commons import init_weights, get_padding
from modules.commons import get_padding
from utils import f0_to_coarse
class ResidualCouplingBlock(nn.Module):
@ -125,7 +123,7 @@ class DiscriminatorP(torch.nn.Module):
super(DiscriminatorP, self).__init__()
self.period = period
self.use_spectral_norm = use_spectral_norm
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
@ -160,7 +158,7 @@ class DiscriminatorP(torch.nn.Module):
class DiscriminatorS(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(DiscriminatorS, self).__init__()
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv1d(1, 16, 15, 1, padding=7)),
norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)),
@ -407,7 +405,7 @@ class SynthesizerTrn(nn.Module):
g = self.emb_g(g).transpose(1,2)
# vol proj
vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol!=None and self.vol_embedding else 0
vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol is not None and self.vol_embedding else 0
# ssl prenet
x_mask = torch.unsqueeze(commons.sequence_mask(c_lengths, c.size(2)), 1).to(c.dtype)
@ -452,7 +450,7 @@ class SynthesizerTrn(nn.Module):
x_mask = torch.unsqueeze(commons.sequence_mask(c_lengths, c.size(2)), 1).to(c.dtype)
# vol proj
vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol!=None and self.vol_embedding else 0
vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol is not None and self.vol_embedding else 0
x = self.pre(c) * x_mask + self.emb_uv(uv.long()).transpose(1, 2) + vol

View File

@ -1,14 +1,13 @@
from typing import Optional,Union
try:
from typing import Literal
except Exception as e:
except Exception:
from typing_extensions import Literal
import numpy as np
import torch
import torchcrepe
from torch import nn
from torch.nn import functional as F
import scipy
#from:https://github.com/fishaudio/fish-diffusion
@ -334,7 +333,7 @@ class CrepePitchExtractor(BasePitchExtractor):
f0 = torch.where(torch.isnan(f0), torch.full_like(f0, 0), f0)[0]
if torch.all(f0 == 0):
rtn = f0.cpu().numpy() if pad_to==None else np.zeros(pad_to)
rtn = f0.cpu().numpy() if pad_to is None else np.zeros(pad_to)
return rtn,rtn
return self.post_process(x, sampling_rate, f0, pad_to)

View File

@ -1,12 +1,9 @@
import copy
import math
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
import modules.commons as commons
import modules.modules as modules
from modules.modules import LayerNorm
@ -243,7 +240,7 @@ class MultiHeadAttention(nn.Module):
return ret
def _get_relative_embeddings(self, relative_embeddings, length):
max_relative_position = 2 * self.window_size + 1
2 * self.window_size + 1
# Pad first before slice to avoid using cond ops.
pad_length = max(length - (self.window_size + 1), 0)
slice_start_position = max((self.window_size + 1) - length, 0)

View File

@ -1,7 +1,5 @@
import math
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
def slice_pitch_segments(x, ids_str, segment_size=4):
@ -157,7 +155,6 @@ def generate_path(duration, mask):
duration: [b, 1, t_x]
mask: [b, 1, t_y, t_x]
"""
device = duration.device
b, _, t_y, t_x = mask.shape
cum_duration = torch.cumsum(duration, -1)

View File

@ -1,7 +1,5 @@
import torch
from torch.nn import functional as F
import modules.commons as commons
def feature_loss(fmap_r, fmap_g):

View File

@ -1,16 +1,5 @@
import math
import os
import random
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.data
import numpy as np
import librosa
import librosa.util as librosa_util
from librosa.util import normalize, pad_center, tiny
from scipy.signal import get_window
from scipy.io.wavfile import read
from librosa.filters import mel as librosa_mel_fn
MAX_WAV_VALUE = 32768.0

View File

@ -1,12 +1,8 @@
import copy
import math
import numpy as np
import scipy
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn import Conv1d
from torch.nn.utils import weight_norm, remove_weight_norm
import modules.commons as commons

View File

@ -127,7 +127,7 @@ def main():
"Characters": spklist
}
MoeVSConfJson = json.dumps(MoeVSConf)
json.dumps(MoeVSConf)
with open(f"checkpoints/{path}.json", 'w') as MoeVsConfFile:
json.dump(MoeVSConf, MoeVsConfFile, indent = 4)

View File

@ -6,11 +6,11 @@ import modules.attentions as attentions
import modules.commons as commons
import modules.modules as modules
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from torch.nn import Conv1d, Conv2d
from torch.nn.utils import weight_norm, spectral_norm
import utils
from modules.commons import init_weights, get_padding
from modules.commons import get_padding
from vdecoder.hifigan.models import Generator
from utils import f0_to_coarse
@ -124,7 +124,7 @@ class DiscriminatorP(torch.nn.Module):
super(DiscriminatorP, self).__init__()
self.period = period
self.use_spectral_norm = use_spectral_norm
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))),
@ -159,7 +159,7 @@ class DiscriminatorP(torch.nn.Module):
class DiscriminatorS(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(DiscriminatorS, self).__init__()
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv1d(1, 16, 15, 1, padding=7)),
norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)),

View File

@ -1,18 +1,11 @@
import copy
import math
import torch
from torch import nn
from torch.nn import functional as F
import modules.attentions as attentions
import modules.commons as commons
import modules.modules as modules
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
import utils
from modules.commons import init_weights, get_padding
from utils import f0_to_coarse
class ResidualCouplingBlock(nn.Module):
@ -259,7 +252,7 @@ class SynthesizerTrn(nn.Module):
x_mask = torch.unsqueeze(torch.ones_like(f0), 1).to(c.dtype)
# vol proj
vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol!=None and self.vol_embedding else 0
vol = self.emb_vol(vol[:,:,None]).transpose(1,2) if vol is not None and self.vol_embedding else 0
x = self.pre(c) * x_mask + self.emb_uv(uv.long()).transpose(1, 2) + vol

View File

@ -3,8 +3,8 @@ import argparse
import librosa
import numpy as np
import concurrent.futures
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from multiprocessing import Pool, cpu_count
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from scipy.io import wavfile
from tqdm import tqdm

View File

@ -6,12 +6,7 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
logging.getLogger('numba').setLevel(logging.WARNING)
import os
import json
import argparse
import itertools
import math
import torch
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
@ -287,7 +282,7 @@ def evaluate(hps, generator, eval_loader, writer_eval):
c = c[:1].cuda(0)
f0 = f0[:1].cuda(0)
uv= uv[:1].cuda(0)
if volume!=None:
if volume is not None:
volume = volume[:1].cuda(0)
mel = spec_to_mel_torch(
spec,
@ -314,7 +309,7 @@ def evaluate(hps, generator, eval_loader, writer_eval):
f"gt/audio_{batch_idx}": y[0]
})
image_dict.update({
f"gen/mel": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy()),
"gen/mel": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy()),
"gt/mel": utils.plot_spectrogram_to_numpy(mel[0].cpu().numpy())
})
utils.summarize(

View File

@ -1,4 +1,3 @@
import os
import argparse
import torch
from torch.optim import lr_scheduler

View File

@ -6,17 +6,12 @@ import argparse
import logging
import json
import subprocess
import warnings
import random
import functools
import librosa
import numpy as np
from scipy.io.wavfile import read
import torch
from torch.nn import functional as F
from modules.commons import sequence_mask
import faiss
import tqdm
MATPLOTLIB_FLAG = False
@ -201,15 +196,20 @@ def clean_checkpoints(path_to_models='logs/44k/', n_ckpts_to_keep=2, sort_by_tim
False -> lexicographically delete ckpts
"""
ckpts_files = [f for f in os.listdir(path_to_models) if os.path.isfile(os.path.join(path_to_models, f))]
name_key = (lambda _f: int(re.compile('._(\d+)\.pth').match(_f).group(1)))
time_key = (lambda _f: os.path.getmtime(os.path.join(path_to_models, _f)))
def name_key(_f):
return int(re.compile("._(\\d+)\\.pth").match(_f).group(1))
def time_key(_f):
return os.path.getmtime(os.path.join(path_to_models, _f))
sort_key = time_key if sort_by_time else name_key
x_sorted = lambda _x: sorted([f for f in ckpts_files if f.startswith(_x) and not f.endswith('_0.pth')], key=sort_key)
def x_sorted(_x):
return sorted([f for f in ckpts_files if f.startswith(_x) and not f.endswith("_0.pth")], key=sort_key)
to_del = [os.path.join(path_to_models, fn) for fn in
(x_sorted('G')[:-n_ckpts_to_keep] + x_sorted('D')[:-n_ckpts_to_keep])]
del_info = lambda fn: logger.info(f".. Free up space by deleting ckpt {fn}")
del_routine = lambda x: [os.remove(x), del_info(x)]
rs = [del_routine(fn) for fn in to_del]
def del_info(fn):
return logger.info(f".. Free up space by deleting ckpt {fn}")
def del_routine(x):
return [os.remove(x), del_info(x)]
[del_routine(fn) for fn in to_del]
def summarize(writer, global_step, scalars={}, histograms={}, images={}, audios={}, audio_sampling_rate=22050):
for k, v in scalars.items():

View File

@ -199,7 +199,7 @@ class SineGen(torch.nn.Module):
output uv: tensor(batchsize=1, length, 1)
"""
with torch.no_grad():
f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
torch.zeros(f0.shape[0], f0.shape[1], self.dim,
device=f0.device)
# fundamental component
fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
@ -353,7 +353,7 @@ class DiscriminatorP(torch.nn.Module):
def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
super(DiscriminatorP, self).__init__()
self.period = period
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
@ -412,7 +412,7 @@ class MultiPeriodDiscriminator(torch.nn.Module):
class DiscriminatorS(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(DiscriminatorS, self).__init__()
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv1d(1, 128, 15, 1, padding=7)),
norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),

View File

@ -1,14 +1,10 @@
import math
import os
os.environ["LRU_CACHE_CAPACITY"] = "3"
import random
import torch
import torch.utils.data
import numpy as np
import librosa
from librosa.util import normalize
from librosa.filters import mel as librosa_mel_fn
from scipy.io.wavfile import read
import soundfile as sf
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):

View File

@ -1,6 +1,5 @@
import glob
import os
import matplotlib
import torch
from torch.nn.utils import weight_norm
# matplotlib.use("Agg")

View File

@ -211,7 +211,7 @@ class SineGen(torch.nn.Module):
output uv: tensor(batchsize=1, length, 1)
"""
with torch.no_grad():
f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
torch.zeros(f0.shape[0], f0.shape[1], self.dim,
device=f0.device)
# fundamental component
fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
@ -370,7 +370,7 @@ class DiscriminatorP(torch.nn.Module):
def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
super(DiscriminatorP, self).__init__()
self.period = period
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
@ -429,7 +429,7 @@ class MultiPeriodDiscriminator(torch.nn.Module):
class DiscriminatorS(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(DiscriminatorS, self).__init__()
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv1d(1, 128, 15, 1, padding=7)),
norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),

View File

@ -1,14 +1,10 @@
import math
import os
os.environ["LRU_CACHE_CAPACITY"] = "3"
import random
import torch
import torch.utils.data
import numpy as np
import librosa
from librosa.util import normalize
from librosa.filters import mel as librosa_mel_fn
from scipy.io.wavfile import read
import soundfile as sf
def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False):

View File

@ -1,6 +1,5 @@
import glob
import os
import matplotlib
import torch
from torch.nn.utils import weight_norm
# matplotlib.use("Agg")

View File

@ -289,7 +289,7 @@ class DiscriminatorP(torch.nn.Module):
def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
super(DiscriminatorP, self).__init__()
self.period = period
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
@ -348,7 +348,7 @@ class MultiPeriodDiscriminator(torch.nn.Module):
class DiscriminatorS(torch.nn.Module):
def __init__(self, use_spectral_norm=False):
super(DiscriminatorS, self).__init__()
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
norm_f = weight_norm if use_spectral_norm is False else spectral_norm
self.convs = nn.ModuleList([
norm_f(Conv1d(1, 128, 15, 1, padding=7)),
norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),

View File

@ -1,14 +1,10 @@
import math
import os
os.environ["LRU_CACHE_CAPACITY"] = "3"
import random
import torch
import torch.utils.data
import numpy as np
import librosa
from librosa.util import normalize
from librosa.filters import mel as librosa_mel_fn
from scipy.io.wavfile import read
import soundfile as sf
import torch.nn.functional as F

View File

@ -11,7 +11,7 @@ import math
import torch
from torch import nn, Tensor
from torch.nn import Module, Parameter
from torch.nn import Module
from .hardconcrete import HardConcrete
from .pruning_utils import (

View File

@ -402,9 +402,7 @@ class ConvFeatureExtractionModel(nn.Module):
nn.init.kaiming_normal_(conv.weight)
return conv
assert (
is_layer_norm and is_group_norm
) == False, "layer norm and group norm are exclusive"
assert (is_layer_norm and is_group_norm) is False, "layer norm and group norm are exclusive"
if is_layer_norm:
return nn.Sequential(

View File

@ -1,4 +1,3 @@
import os
from functools import lru_cache
from typing import Union

View File

@ -32,7 +32,7 @@ def detect_language(model: "Whisper", mel: Tensor, tokenizer: Tokenizer = None)
if tokenizer is None:
tokenizer = get_tokenizer(model.is_multilingual)
if tokenizer.language is None or tokenizer.language_token not in tokenizer.sot_sequence:
raise ValueError(f"This model doesn't have language tokens so it can't perform lang id")
raise ValueError("This model doesn't have language tokens so it can't perform lang id")
single = mel.ndim == 2
if single:

View File

@ -196,7 +196,7 @@ class Tokenizer:
def language_token(self) -> int:
"""Returns the token id corresponding to the value of the `language` field"""
if self.language is None:
raise ValueError(f"This tokenizer does not have language token configured")
raise ValueError("This tokenizer does not have language token configured")
additional_tokens = dict(
zip(

View File

@ -1,4 +1,3 @@
import io
import os
# os.system("wget -P cvec/ https://huggingface.co/spaces/innnky/nanami/resolve/main/checkpoint_best_legacy_500.pt")
@ -13,8 +12,6 @@ import re
import json
import subprocess
import edge_tts
import asyncio
from scipy.io import wavfile
import librosa
import torch
@ -42,7 +39,7 @@ if torch.cuda.is_available():
def upload_mix_append_file(files,sfiles):
try:
if(sfiles == None):
if(sfiles is None):
file_paths = [file.name for file in files]
else:
file_paths = [file.name for file in chain(files,sfiles)]
@ -68,7 +65,7 @@ def mix_submit_click(js,mode):
def updata_mix_info(files):
try:
if files == None : return mix_model_output1.update(value="")
if files is None : return mix_model_output1.update(value="")
p = {file.name:100 for file in files}
return mix_model_output1.update(value=json.dumps(p,indent=2))
except Exception as e: