You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

64 lines
2.1 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import io
import logging
import time
from pathlib import Path
import librosa
import numpy as np
import soundfile
import os
useCUDA = True
if not useCUDA:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
from inference import infer_tool
from inference import slicer
from inference.infer_tool import Svc
logging.getLogger('numba').setLevel(logging.WARNING)
chunks_dict = infer_tool.read_temp("inference/chunks_temp.json")
# model_path = "logs/32k/G_112000.pth"
model_path = "/dev/shm/G_120000.pth"
config_path = "configs/config.json"
svc_model = Svc(model_path, config_path)
infer_tool.mkdir(["raw", "results"])
# 支持多个wav文件放在raw文件夹下
clean_names = ["18"]
trans = [10] # 音高调整,支持正负(半音)
spk_list = ['JP_Hoshino'] # 每次同时合成多语者音色
slice_db = -40 # 默认-40嘈杂的音频可以-30干声保留呼吸可以-50
wav_format = 'flac' # 音频输出格式
infer_tool.fill_a_to_b(trans, clean_names)
for clean_name, tran in zip(clean_names, trans):
raw_audio_path = f"raw/{clean_name}"
if "." not in raw_audio_path:
raw_audio_path += ".wav"
infer_tool.format_wav(raw_audio_path)
wav_path = Path(raw_audio_path).with_suffix('.wav')
chunks = slicer.cut(wav_path, db_thresh=slice_db)
audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks)
for spk in spk_list:
audio = []
for (slice_tag, data) in audio_data:
print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======')
length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample))
raw_path = io.BytesIO()
soundfile.write(raw_path, data, audio_sr, format="wav")
raw_path.seek(0)
if slice_tag:
print('jump empty segment')
_audio = np.zeros(length)
else:
out_audio, out_sr = svc_model.infer(spk, tran, raw_path)
_audio = out_audio.cpu().numpy()
audio.extend(list(_audio))
res_path = f'./results/{clean_name}_{tran}key_{spk}.{wav_format}'
soundfile.write(res_path, audio, svc_model.target_sample, format=wav_format)