WIP support onnx trial

This commit is contained in:
wataru 2023-02-21 09:17:59 +09:00
parent 7a1391ce08
commit 341b05dc2f
5 changed files with 30 additions and 9 deletions

View File

@ -23,6 +23,10 @@ RUN pip install h5py==3.8.0
RUN pip install matplotlib==3.6.3
#RUN pip install matplotlib==3.2.2
RUN pip install onnx==1.13.0
RUN pip install onnxruntime==1.14.0
RUN pip install onnxsim==0.4.17
ADD /warmup.py /
RUN python3 warmup.py
@ -30,11 +34,15 @@ ADD dummy /
RUN git clone -b v1.5.0.0_SiFiGAN https://github.com/isletennos/MMVC_Trainer.git
WORKDIR /MMVC_Trainer/
RUN git checkout c242d3d1cf7f768af70d9735082ca2bdd90c45f3
#RUN git checkout c242d3d1cf7f768af70d9735082ca2bdd90c45f3
RUN git checkout 8cca023f5f709c70c2c2fc3e880cb1a119e18f44
RUN git clone https://github.com/isletennos/MMVC_Client.git
WORKDIR /MMVC_Trainer/MMVC_Client
RUN git checkout 3374a1177b73e3f6d600e5dbe93af033c36ee120
#RUN git checkout 3374a1177b73e3f6d600e5dbe93af033c36ee120
RUN git checkout 1424609e53c79e2d629add10ae4bfb16fc0c3c82
WORKDIR /
@ -46,6 +54,3 @@ ADD /model/G_v15_best.pth /MMVC_Trainer/fine_model/
RUN cp -r /MMVC_Trainer/configs /MMVC_Trainer/configs_org
WORKDIR /MMVC_Trainer/

View File

@ -62,4 +62,8 @@ $ python3 train_ms.py -c configs/train_config.json -m 20220306_24000
(x) テスト
```
$ python3 MMVC_Client/python/conver_test.py -m logs/G_40000.pth -c configs/train_config.json -s 0 -t 101 --input dataset/00_myvoice/wav/emotion011.wav --output dataset/test.wav --f0_scale 3
```
```
(X) onnx
python3 onnx_export.py --config_file logs/train_config.json --convert_pth logs/G_220000.pth

View File

@ -1,7 +1,7 @@
#!/bin/bash
set -eu
DOCKER_IMAGE=dannadori/trainer:20230210_153105
DOCKER_IMAGE=dannadori/trainer:20230221_085208
# DOCKER_IMAGE=trainer
docker run --gpus all --rm -ti \

View File

@ -29,7 +29,10 @@ class MMVC_Namespace(socketio.AsyncNamespace):
await self.emit('response', [timestamp, 0], to=sid)
else:
unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
audio1, perf = self.voiceChangerManager.changeVoice(unpackedData)
# audio1, perf = self.voiceChangerManager.changeVoice(unpackedData)
res = self.voiceChangerManager.changeVoice(unpackedData)
audio1 = res[0]
perf = res[1] if len(res) == 2 else [0, 0, 0]
bin = struct.pack('<%sh' % len(audio1), *audio1)
await self.emit('response', [timestamp, bin, perf], to=sid)

View File

@ -367,7 +367,10 @@ class VoiceChanger():
print("[Voice Changer] No ONNX session.")
return np.zeros(1).astype(np.int16)
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
# x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
# sid_tgt1 = torch.LongTensor([self.settings.dstId])
spec, spec_lengths, sid_src, sin, d = data
sid_tgt1 = torch.LongTensor([self.settings.dstId])
# if spec.size()[2] >= 8:
audio1 = self.onnx_session.run(
@ -375,9 +378,15 @@ class VoiceChanger():
{
"specs": spec.numpy(),
"lengths": spec_lengths.numpy(),
"sin": sin.numpy(),
"d0": d[0][:1].numpy(),
"d1": d[1][:1].numpy(),
"d2": d[2][:1].numpy(),
"d3": d[3][:1].numpy(),
"sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy()
})[0][0, 0] * self.hps.data.max_wav_value
if hasattr(self, 'np_prev_audio1') == True:
overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.np_prev_audio1[-1 * overlapSize:]