WIP: Beatrice

This commit is contained in:
w-okada 2023-11-04 03:29:54 +09:00
parent e62a140698
commit 56e26d438e
12 changed files with 297 additions and 57 deletions

3
.gitignore vendored
View File

@ -66,4 +66,5 @@ start_trainer.sh
venv/
beatrice_internal_api.cp310-win_amd64.pyd
beatrice_internal_api.cp310-win_amd64.pyd
108_average_110b_10.bin

View File

@ -5,3 +5,9 @@
Diffusion SVC and DDSP SVC uses DiffSinger Community Vocoders. Please check the license from the following link.
Please place it on pretrain\\nsf_hifigan if you are using a different model.
https://openvpi.github.io/vocoders/
2. Beatrice JVS Corpus Edition のライセンスについてはこちらを確認してください。
[readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)
Please check here for the license of the Beatrice JVS Corpus Edition.
[readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)

View File

@ -28,8 +28,9 @@
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/))
2. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。
1. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)

View File

@ -26,8 +26,9 @@
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/))
2. Distribute the load by running Voice Changer on a different PC
1. Distribute the load by running Voice Changer on a different PC
The real-time voice changer of this application works on a server-client configuration. By running the MMVC server on a separate PC, you can run it while minimizing the impact on other resource-intensive processes such as gaming commentary.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)

View File

@ -1,17 +1,13 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"flake8.args": ["--max-line-length=1024", "--ignore=E402,E203,E722"],
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"python.formatting.provider": "black",
"python.linting.mypyEnabled": false,
"[python]": {
"editor.defaultFormatter": null, // Prettier 使
"editor.formatOnSave": true //
},
"python.formatting.blackArgs": ["--line-length", "550"],
"python.linting.flake8Enabled": true,
"python.linting.flake8Args": [
"--max-line-length=99999"
],
"python.linting.enabled": true
"black-formatter.args": ["--line-length", "550"],
"python.testing.pytestArgs": ["test"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}

View File

@ -15,6 +15,8 @@ VoiceChangerType: TypeAlias = Literal[
"Beatrice",
]
StaticSlot: TypeAlias = Literal["Beatrice-JVS",]
STORED_SETTING_FILE = "stored_setting.json"
SERVER_DEVICE_SAMPLE_RATES = [16000, 32000, 44100, 48000, 96000, 192000]
@ -23,6 +25,8 @@ tmpdir = tempfile.TemporaryDirectory()
SSL_KEY_DIR = os.path.join(tmpdir.name, "keys") if hasattr(sys, "_MEIPASS") else "keys"
MODEL_DIR = os.path.join(tmpdir.name, "logs") if hasattr(sys, "_MEIPASS") else "logs"
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
UPLOAD_DIR = os.path.join(tmpdir.name, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"
NATIVE_CLIENT_FILE_WIN = os.path.join(sys._MEIPASS, "voice-changer-native-client.exe") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" # type: ignore
NATIVE_CLIENT_FILE_MAC = (
os.path.join(
@ -36,6 +40,9 @@ NATIVE_CLIENT_FILE_MAC = (
else "voice-changer-native-client"
)
MODEL_DIR_STATIC = os.path.join(sys._MEIPASS, "model_dir_static") if hasattr(sys, "_MEIPASS") else "model_dir_static"
HUBERT_ONNX_MODEL_PATH = os.path.join(sys._MEIPASS, "model_hubert/hubert_simple.onnx") if hasattr(sys, "_MEIPASS") else "model_hubert/hubert_simple.onnx" # type: ignore
@ -48,11 +55,7 @@ def getFrontendPath():
return frontend_path
EmbedderType: TypeAlias = Literal[
"hubert_base",
"contentvec",
"hubert-base-japanese"
]
EmbedderType: TypeAlias = Literal["hubert_base", "contentvec", "hubert-base-japanese"]
class EnumInferenceTypes(Enum):
@ -67,9 +70,7 @@ class EnumInferenceTypes(Enum):
onnxRVCNono = "onnxRVCNono"
DiffusionSVCInferenceType: TypeAlias = Literal[
"combo",
]
DiffusionSVCInferenceType: TypeAlias = Literal["combo",]
PitchExtractorType: TypeAlias = Literal[
@ -82,10 +83,7 @@ PitchExtractorType: TypeAlias = Literal[
"rmvpe_onnx",
]
ServerAudioDeviceType: TypeAlias = Literal[
"audioinput",
"audiooutput"
]
ServerAudioDeviceType: TypeAlias = Literal["audioinput", "audiooutput"]
RVCSampleMode: TypeAlias = Literal[
"production",
@ -147,7 +145,6 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
("test-ddpn-v2-nof0-40k-l12-hubert_o_full", {"useIndex": False}),
("test-ddpn-v2-f0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
("test-ddpn-v2-nof0-40k-l12-hubert_jp_o_full", {"useIndex": False}),
]
elif mode == "testOfficial":
return [

View File

@ -1,5 +1,5 @@
from typing import TypeAlias, Union
from const import MAX_SLOT_NUM, DiffusionSVCInferenceType, EnumInferenceTypes, EmbedderType, VoiceChangerType
from const import MAX_SLOT_NUM, MODEL_DIR_STATIC, DiffusionSVCInferenceType, EnumInferenceTypes, EmbedderType, StaticSlot, VoiceChangerType
from dataclasses import dataclass, asdict, field
@ -9,7 +9,7 @@ import json
@dataclass
class ModelSlot:
slotIndex: int = -1
slotIndex: int | StaticSlot = -1
voiceChangerType: VoiceChangerType | None = None
name: str = ""
description: str = ""
@ -40,7 +40,7 @@ class RVCModelSlot(ModelSlot):
sampleId: str = ""
speakers: dict = field(default_factory=lambda: {0: "target"})
version:str = "v2"
version: str = "v2"
@dataclass
@ -137,7 +137,7 @@ class BeatriceModelSlot(ModelSlot):
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot, DiffusionSVCModelSlot, BeatriceModelSlot]
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
def loadSlotInfo(model_dir: str, slotIndex: int | StaticSlot) -> ModelSlots:
slotDir = os.path.join(model_dir, str(slotIndex))
jsonFile = os.path.join(slotDir, "params.json")
if not os.path.exists(jsonFile):
@ -165,6 +165,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
return DiffusionSVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
elif slotInfo.voiceChangerType == "Beatrice":
slotInfoKey.extend(list(BeatriceModelSlot.__annotations__.keys()))
if slotIndex == "Beatrice-JVS":
return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
return BeatriceModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
else:
return ModelSlot()
@ -176,6 +179,9 @@ def loadAllSlotInfo(model_dir: str):
slotInfo = loadSlotInfo(model_dir, slotIndex)
slotInfo.slotIndex = slotIndex # スロットインデックスは動的に注入
slotInfos.append(slotInfo)
slotInfo = loadSlotInfo(MODEL_DIR_STATIC, "Beatrice-JVS")
slotInfos.append(slotInfo)
return slotInfos

View File

@ -0,0 +1,15 @@
{
"slotIndex": "Beatrice-JVS",
"voiceChangerType": "Beatrice",
"name": "108_average_110b_10",
"description": "",
"credit": "",
"termsOfUseUrl": "",
"iconFile": "",
"speakers": {
"1": "user1",
"2": "user2"
},
"modelFile": "108_average_110b_10.bin",
"dstId": 1
}

View File

@ -1,31 +1,120 @@
"""
"""
from dataclasses import asdict
from typing import Union
import os
import numpy as np
from const import MODEL_DIR_STATIC
from data.ModelSlot import BeatriceModelSlot
from mods.log_control import VoiceChangaerLogger
from voice_changer.Beatrice.BeatriceSettings import BeatriceSettings
from voice_changer.utils.VoiceChangerModel import AudioInOut, VoiceChangerModel
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from beatrice_internal_api import BeatriceInternalAPI
logger = VoiceChangaerLogger.get_instance().getLogger()
class BeatriceAPI(BeatriceInternalAPI):
def __init__(self, sample_rate: float = 48000.0):
if sample_rate < 1000.0:
raise ValueError(sample_rate)
super().__init__(float(sample_rate))
def get_n_speakers(self):
return 500
def get_target_speaker_names(self):
names = []
for i in range(1, 101):
names.append(f"[商用不可] jvs{i:03d}")
names.append(f"[商用不可] jvs{i:03d} -1")
names.append(f"[商用不可] jvs{i:03d} -2")
names.append(f"[商用不可] jvs{i:03d} +1")
names.append(f"[商用不可] jvs{i:03d} +2")
return names
def set_sample_rate(self, sample_rate: float):
if sample_rate < 1000.0:
raise ValueError(sample_rate)
super().set_sample_rate(float(sample_rate))
def set_target_speaker_id(self, target_speaker_id: int):
if not 0 <= target_speaker_id < self.get_n_speakers():
raise ValueError(target_speaker_id)
super().set_target_speaker_id(int(target_speaker_id))
def read_parameters(self, filename: Union[str, bytes, os.PathLike]):
super().read_parameters(filename)
def convert(self, in_wav: np.ndarray) -> np.ndarray:
if in_wav.ndim != 1:
raise ValueError(in_wav.ndim)
if in_wav.dtype != np.float32:
raise ValueError(in_wav.dtype)
out_wav = super().convert(in_wav)
assert in_wav.shape == out_wav.shape
return out_wav
class Beatrice(VoiceChangerModel):
def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot):
raise RuntimeError("not implemented")
def __init__(self, params: VoiceChangerParams, slotInfo: BeatriceModelSlot, static: bool = False):
logger.info("[Voice Changer] [Beatrice] Creating instance ")
self.settings = BeatriceSettings()
self.params = params
self.prevVol = 0.0
self.slotInfo = slotInfo
self.audio_buffer: AudioInOut | None = None
self.static = static
def initialize(self):
raise RuntimeError("not implemented")
logger.info("[Voice Changer] [Beatrice] Initializing... ")
self.beatrice_api = BeatriceAPI()
if self.static:
modelPath = os.path.join(MODEL_DIR_STATIC, str(self.slotInfo.slotIndex), os.path.basename(self.slotInfo.modelFile))
else:
modelPath = os.path.join(self.params.model_dir, str(self.slotInfo.slotIndex), os.path.basename(self.slotInfo.modelFile))
self.beatrice_api.read_parameters(modelPath)
self.beatrice_api.set_sample_rate(self.inputSampleRate)
# その他の設定
self.settings.dstId = self.slotInfo.dstId
logger.info("[Voice Changer] [Beatrice] Initializing... done")
def setSamplingRate(self, inputSampleRate, outputSampleRate):
raise RuntimeError("not implemented")
if inputSampleRate == outputSampleRate:
self.inputSampleRate = inputSampleRate
self.outputSampleRate = outputSampleRate
self.initialize()
else:
print("inputSampleRate, outputSampleRate", inputSampleRate, outputSampleRate)
def update_settings(self, key: str, val: int | float | str):
raise RuntimeError("not implemented")
logger.info(f"[Voice Changer][Beatrice]: update_settings {key}:{val}")
if key in self.settings.intData:
setattr(self.settings, key, int(val))
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
else:
return False
return True
def get_info(self):
raise RuntimeError("not implemented")
data = asdict(self.settings)
return data
def get_processing_sampling_rate(self):
raise RuntimeError("not implemented")
return self.inputSampleRate
def generate_input(
self,
@ -33,14 +122,61 @@ class Beatrice(VoiceChangerModel):
crossfadeSize: int,
solaSearchFrame: int = 0,
):
raise RuntimeError("not implemented")
newData = newData.astype(np.float32) / 32768.0
# 過去のデータに連結
if self.audio_buffer is not None:
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
else:
self.audio_buffer = newData
convertSize = newData.shape[0] + crossfadeSize + solaSearchFrame
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
# 変換対象の部分だけ抽出
convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:]
return (self.audio_buffer,)
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
raise RuntimeError("not implemented")
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame)
audio = (data[0]).astype(np.float32)
self.beatrice_api.set_target_speaker_id(self.settings.dstId)
block_size = 500
out_wav_blocks = []
head = 0
while head < len(audio):
in_wav_block = audio[head : head + block_size]
out_wav_block = self.beatrice_api.convert(in_wav_block)
out_wav_blocks.append(out_wav_block)
head += block_size
out_wav = np.concatenate(out_wav_blocks)
assert audio.shape == out_wav.shape
return (out_wav * 32767.0).astype(np.int16)
def __del__(self):
del self.pipeline
# def export2onnx(self):
# modelSlot = self.slotInfo
# if modelSlot.isONNX:
# print("[Voice Changer] export2onnx, No pyTorch filepath.")
# return {"status": "ng", "path": ""}
# output_file_simple = export2onnx(self.settings.gpu, modelSlot)
# return {
# "status": "ok",
# "path": f"/tmp/{output_file_simple}",
# "filename": output_file_simple,
# }
def get_model_current(self):
return [
{

View File

@ -0,0 +1,63 @@
# Beatrice API
Beatrice API (1.1.0 JVS Corpus Edition)
Copyright 2023 Project Beatrice
https://prj-beatrice.com
営利目的の使用を禁ずる。(COMMERCIAL USE IS PROHIBITED.)
営利目的の基準は JVS コーパスに準ずる。
Beatrice API を、再配布等、著作権法で定められた範囲を超えて利用する場合は、事前に Project Beatrice の許諾を得る必要がある。
Beatrice API は以下の OSS の一部または全部を含む。
* PocketFFT: https://gitlab.mpcdf.mpg.de/mtr/pocketfft/-/tree/cpp
* Copyright 2010-2018 Max-Planck-Society
* BSD-3-Clause license
* https://opensource.org/license/bsd-3-clause/
* fmath: https://github.com/herumi/fmath
* Copyright 2009 MITSUNARI Shigeo
* BSD-3-Clause license
* https://opensource.org/license/bsd-3-clause/
* NumPy: https://github.com/numpy/numpy
* Copyright 2005-2023 NumPy Developers
* BSD-3-Clause license
* https://opensource.org/license/bsd-3-clause/
* Python: https://github.com/python/cpython
* Copyright 2001-2023 Python Software Foundation
* Copyright 2000 BeOpen.com
* Copyright 1995-2000 Corporation for National Research Initiatives
* Copyright 1991-1995 Stichting Mathematisch Centrum
* PSF License
* https://docs.python.org/3.10/license.html#psf-license
モデルの学習に使用したデータセットは以下の一部または全部を含む。
* JVS Corpus: https://sites.google.com/site/shinnosuketakamichi/research-topics/jvs_corpus
* プロプライエタリライセンス
* LibriTTS-R: https://www.openslr.org/141/
* CC BY 4.0
* https://creativecommons.org/licenses/by/4.0/
* このデータセットは以下のデータセットを元に作成されている。
* LibriTTS: https://www.openslr.org/60/
* CC BY 4.0
* https://creativecommons.org/licenses/by/4.0/
* LibriSpeech: https://www.openslr.org/12/
* CC BY 4.0
* https://creativecommons.org/licenses/by/4.0/
* DNS Challenge: https://github.com/microsoft/DNS-Challenge
* CC BY 4.0
* このデータセットのうち、以下のデータセットを含む部分を使用している。
* Audioset: https://research.google.com/audioset/index.html
* CC BY 4.0
* https://creativecommons.org/licenses/by/4.0/
* Freesound: https://freesound.org/
* このうち CC0 でライセンスされたファイルのみを使用している。
* https://creativecommons.org/publicdomain/zero/1.0/
* OpenSLR26: https://www.openslr.org/26/
* Apache 2.0
* https://opensource.org/license/apache-2-0/
* OpenSLR28: https://www.openslr.org/28/
* Apache 2.0
* https://opensource.org/license/apache-2-0/

View File

@ -1,4 +1,4 @@
from const import UPLOAD_DIR
from const import UPLOAD_DIR, StaticSlot
from data.ModelSlot import ModelSlots, loadAllSlotInfo, saveSlotInfo
import json
import os
@ -30,13 +30,23 @@ class ModelSlotManager:
def _load_model_slot(self, slotIndex: int):
return self.modelSlots[slotIndex]
def _search_model_slot(self, slotIndex: StaticSlot):
target = [x for x in self.modelSlots if x.slotIndex == slotIndex]
if len(target) > 0:
return target[0]
else:
return None
def getAllSlotInfo(self, reload: bool = False):
if reload:
self.modelSlots = loadAllSlotInfo(self.model_dir)
return self.modelSlots
def get_slot_info(self, slotIndex: int):
return self._load_model_slot(slotIndex)
def get_slot_info(self, slotIndex: int | StaticSlot):
if slotIndex == "Beatrice-JVS":
return self._search_model_slot(slotIndex)
else:
return self._load_model_slot(slotIndex)
def save_model_slot(self, slotIndex: int, slotInfo: ModelSlots):
self._save_model_slot(slotIndex, slotInfo)

View File

@ -10,7 +10,7 @@ from voice_changer.Local.ServerDevice import ServerDevice, ServerDeviceCallbacks
from voice_changer.ModelSlotManager import ModelSlotManager
from voice_changer.RVC.RVCModelMerger import RVCModelMerger
from voice_changer.VoiceChanger import VoiceChanger
from const import STORED_SETTING_FILE, UPLOAD_DIR
from const import STORED_SETTING_FILE, UPLOAD_DIR, StaticSlot
from voice_changer.VoiceChangerV2 import VoiceChangerV2
from voice_changer.utils.LoadModelParams import LoadModelParamFile, LoadModelParams
from voice_changer.utils.ModelMerger import MergeElement, ModelMergerRequest
@ -22,7 +22,7 @@ import torch
# import threading
from typing import Callable
from typing import Any
import re
logger = VoiceChangaerLogger.get_instance().getLogger()
@ -36,15 +36,15 @@ class GPUInfo:
@dataclass()
class VoiceChangerManagerSettings:
modelSlotIndex: int = -1
modelSlotIndex: int | StaticSlot = -1
passThrough: bool = False # 0: off, 1: on
# ↓mutableな物だけ列挙
boolData: list[str] = field(default_factory=lambda: [
"passThrough"
])
intData: list[str] = field(default_factory=lambda: [
"modelSlotIndex",
])
boolData: list[str] = field(default_factory=lambda: ["passThrough"])
intData: list[str] = field(
default_factory=lambda: [
"modelSlotIndex",
]
)
class VoiceChangerManager(ServerDeviceCallbacks):
@ -227,7 +227,7 @@ class VoiceChangerManager(ServerDeviceCallbacks):
else:
return {"status": "ERROR", "msg": "no model loaded"}
def generateVoiceChanger(self, val: int):
def generateVoiceChanger(self, val: int | StaticSlot):
slotInfo = self.modelSlotManager.get_slot_info(val)
if slotInfo is None:
logger.info(f"[Voice Changer] model slot is not found {val}")
@ -285,7 +285,10 @@ class VoiceChangerManager(ServerDeviceCallbacks):
logger.info("................Beatrice")
from voice_changer.Beatrice.Beatrice import Beatrice
self.voiceChangerModel = Beatrice(self.params, slotInfo)
if val == "Beatrice-JVS":
self.voiceChangerModel = Beatrice(self.params, slotInfo, static=True)
else:
self.voiceChangerModel = Beatrice(self.params, slotInfo)
self.voiceChanger = VoiceChangerV2(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
else:
@ -304,15 +307,20 @@ class VoiceChangerManager(ServerDeviceCallbacks):
newVal = False
setattr(self.settings, key, newVal)
elif key in self.settings.intData:
newVal = int(val)
if key == "modelSlotIndex":
newVal = newVal % 1000
try:
newVal = int(val)
newVal = newVal % 1000
except:
newVal = re.sub("^\d+", "", val) # 先頭の数字を取り除く。
logger.info(f"[Voice Changer] model slot is changed {self.settings.modelSlotIndex} -> {newVal}")
self.generateVoiceChanger(newVal)
# キャッシュ設定の反映
for k, v in self.stored_setting.items():
if k != "modelSlotIndex":
self.update_settings(k, v)
else:
newVal = int(val)
setattr(self.settings, key, newVal)