WIP support onnx trial

2025-01-08 11:57:48 +08:00 · 2023-02-21 09:17:59 +09:00 · 2023-02-21 09:17:59 +09:00 · 341b05dc2f
commit 341b05dc2f
parent 7a1391ce08
5 changed files with 30 additions and 9 deletions
--- a/docker_trainer/Dockerfile
+++ b/docker_trainer/Dockerfile
@ -23,6 +23,10 @@ RUN pip install h5py==3.8.0
 RUN pip install matplotlib==3.6.3
 #RUN pip install matplotlib==3.2.2

+RUN pip install onnx==1.13.0
+RUN pip install onnxruntime==1.14.0
+RUN pip install onnxsim==0.4.17
+
 ADD /warmup.py /
 RUN python3 warmup.py

@ -30,11 +34,15 @@ ADD dummy /

 RUN git clone -b v1.5.0.0_SiFiGAN https://github.com/isletennos/MMVC_Trainer.git 
 WORKDIR /MMVC_Trainer/
-RUN git checkout c242d3d1cf7f768af70d9735082ca2bdd90c45f3
+#RUN git checkout c242d3d1cf7f768af70d9735082ca2bdd90c45f3
+RUN git checkout 8cca023f5f709c70c2c2fc3e880cb1a119e18f44

 RUN git clone https://github.com/isletennos/MMVC_Client.git
 WORKDIR /MMVC_Trainer/MMVC_Client
-RUN git checkout 3374a1177b73e3f6d600e5dbe93af033c36ee120
+#RUN git checkout 3374a1177b73e3f6d600e5dbe93af033c36ee120
+RUN git checkout 1424609e53c79e2d629add10ae4bfb16fc0c3c82
+
+


 WORKDIR /
@ -46,6 +54,3 @@ ADD /model/G_v15_best.pth /MMVC_Trainer/fine_model/
 RUN cp -r /MMVC_Trainer/configs /MMVC_Trainer/configs_org

 WORKDIR /MMVC_Trainer/
-
-
-
--- a/docker_trainer/README.md
+++ b/docker_trainer/README.md
@ -62,4 +62,8 @@ $ python3 train_ms.py -c configs/train_config.json -m 20220306_24000
 (x) テスト
 ```
 $ python3 MMVC_Client/python/conver_test.py -m logs/G_40000.pth -c configs/train_config.json -s 0 -t 101 --input dataset/00_myvoice/wav/emotion011.wav --output dataset/test.wav --f0_scale 3
-```
+```
+
+
+(X) onnx
+python3 onnx_export.py  --config_file logs/train_config.json  --convert_pth logs/G_220000.pth
--- a/docker_trainer/start_trainer.sh
+++ b/docker_trainer/start_trainer.sh
@ -1,7 +1,7 @@
 #!/bin/bash
 set -eu

-DOCKER_IMAGE=dannadori/trainer:20230210_153105
+DOCKER_IMAGE=dannadori/trainer:20230221_085208
 # DOCKER_IMAGE=trainer

 docker run --gpus all --rm -ti \
--- a/server/sio/MMVC_Namespace.py
+++ b/server/sio/MMVC_Namespace.py
@ -29,7 +29,10 @@ class MMVC_Namespace(socketio.AsyncNamespace):
            await self.emit('response', [timestamp, 0], to=sid)
        else:
            unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
-            audio1, perf = self.voiceChangerManager.changeVoice(unpackedData)
+            # audio1, perf = self.voiceChangerManager.changeVoice(unpackedData)
+            res = self.voiceChangerManager.changeVoice(unpackedData)
+            audio1 = res[0]
+            perf = res[1] if len(res) == 2 else [0, 0, 0]
            bin = struct.pack('<%sh' % len(audio1), *audio1)
            await self.emit('response', [timestamp, bin, perf], to=sid)

--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -367,7 +367,10 @@ class VoiceChanger():
            print("[Voice Changer] No ONNX session.")
            return np.zeros(1).astype(np.int16)

-        x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
+        # x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
+        # sid_tgt1 = torch.LongTensor([self.settings.dstId])
+
+        spec, spec_lengths, sid_src, sin, d = data
        sid_tgt1 = torch.LongTensor([self.settings.dstId])
        # if spec.size()[2] >= 8:
        audio1 = self.onnx_session.run(
@ -375,9 +378,15 @@ class VoiceChanger():
            {
                "specs": spec.numpy(),
                "lengths": spec_lengths.numpy(),
+                "sin": sin.numpy(),
+                "d0": d[0][:1].numpy(),
+                "d1": d[1][:1].numpy(),
+                "d2": d[2][:1].numpy(),
+                "d3": d[3][:1].numpy(),
                "sid_src": sid_src.numpy(),
                "sid_tgt": sid_tgt1.numpy()
            })[0][0, 0] * self.hps.data.max_wav_value
+
        if hasattr(self, 'np_prev_audio1') == True:
            overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
            prev_overlap = self.np_prev_audio1[-1 * overlapSize:]