add HF hub compatibility
This commit is contained in:
@@ -20,7 +20,8 @@ from .download_utils import load_or_download_config, load_or_download_model
|
|||||||
class TTS(nn.Module):
|
class TTS(nn.Module):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
language,
|
language,
|
||||||
device='auto'):
|
device='auto',
|
||||||
|
use_hf=True):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
if device == 'auto':
|
if device == 'auto':
|
||||||
device = 'cpu'
|
device = 'cpu'
|
||||||
@@ -30,7 +31,7 @@ class TTS(nn.Module):
|
|||||||
assert torch.cuda.is_available()
|
assert torch.cuda.is_available()
|
||||||
|
|
||||||
# config_path =
|
# config_path =
|
||||||
hps = load_or_download_config(language)
|
hps = load_or_download_config(language, use_hf=use_hf)
|
||||||
|
|
||||||
num_languages = hps.num_languages
|
num_languages = hps.num_languages
|
||||||
num_tones = hps.num_tones
|
num_tones = hps.num_tones
|
||||||
@@ -53,7 +54,7 @@ class TTS(nn.Module):
|
|||||||
self.device = device
|
self.device = device
|
||||||
|
|
||||||
# load state_dict
|
# load state_dict
|
||||||
checkpoint_dict = load_or_download_model(language, device)
|
checkpoint_dict = load_or_download_model(language, device, use_hf=use_hf)
|
||||||
self.model.load_state_dict(checkpoint_dict['model'], strict=True)
|
self.model.load_state_dict(checkpoint_dict['model'], strict=True)
|
||||||
|
|
||||||
language = language.split('_')[0]
|
language = language.split('_')[0]
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ import torch
|
|||||||
import os
|
import os
|
||||||
from . import utils
|
from . import utils
|
||||||
from cached_path import cached_path
|
from cached_path import cached_path
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
|
||||||
DOWNLOAD_CKPT_URLS = {
|
DOWNLOAD_CKPT_URLS = {
|
||||||
'EN': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/EN/checkpoint.pth',
|
'EN': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/EN/checkpoint.pth',
|
||||||
'EN_V2': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/EN_V2/checkpoint.pth',
|
'EN_V2': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/EN_V2/checkpoint.pth',
|
||||||
@@ -22,14 +24,32 @@ DOWNLOAD_CONFIG_URLS = {
|
|||||||
'KR': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/KR/config.json',
|
'KR': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/KR/config.json',
|
||||||
}
|
}
|
||||||
|
|
||||||
def load_or_download_config(locale):
|
LANG_TO_HF_REPO_ID = {
|
||||||
|
'EN': 'myshell-ai/MeloTTS-English',
|
||||||
|
'EN_V2': 'myshell-ai/MeloTTS-English-v2',
|
||||||
|
'FR': 'myshell-ai/MeloTTS-French',
|
||||||
|
'JP': 'myshell-ai/MeloTTS-Japanese',
|
||||||
|
'ES': 'myshell-ai/MeloTTS-Spanish',
|
||||||
|
'ZH': 'myshell-ai/MeloTTS-Chinese',
|
||||||
|
'KR': 'myshell-ai/MeloTTS-Korean',
|
||||||
|
}
|
||||||
|
|
||||||
|
def load_or_download_config(locale, use_hf=True):
|
||||||
language = locale.split('-')[0].upper()
|
language = locale.split('-')[0].upper()
|
||||||
assert language in DOWNLOAD_CONFIG_URLS
|
if use_hf:
|
||||||
config_path = cached_path(DOWNLOAD_CONFIG_URLS[language])
|
assert language in LANG_TO_HF_REPO_ID
|
||||||
|
config_path = hf_hub_download(repo_id=LANG_TO_HF_REPO_ID[language], filename="config.json")
|
||||||
|
else:
|
||||||
|
assert language in DOWNLOAD_CONFIG_URLS
|
||||||
|
config_path = cached_path(DOWNLOAD_CONFIG_URLS[language])
|
||||||
return utils.get_hparams_from_file(config_path)
|
return utils.get_hparams_from_file(config_path)
|
||||||
|
|
||||||
def load_or_download_model(locale, device):
|
def load_or_download_model(locale, device, use_hf=True):
|
||||||
language = locale.split('-')[0].upper()
|
language = locale.split('-')[0].upper()
|
||||||
assert language in DOWNLOAD_CKPT_URLS
|
if use_hf:
|
||||||
ckpt_path = cached_path(DOWNLOAD_CKPT_URLS[language])
|
assert language in LANG_TO_HF_REPO_ID
|
||||||
|
ckpt_path = hf_hub_download(repo_id=LANG_TO_HF_REPO_ID[language], filename="checkpoint.pth")
|
||||||
|
else:
|
||||||
|
assert language in DOWNLOAD_CKPT_URLS
|
||||||
|
ckpt_path = cached_path(DOWNLOAD_CKPT_URLS[language])
|
||||||
return torch.load(ckpt_path, map_location=device)
|
return torch.load(ckpt_path, map_location=device)
|
||||||
|
|||||||
43
test/test_base_model_tts_package_from_S3.py
Normal file
43
test/test_base_model_tts_package_from_S3.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
from melo.api import TTS
|
||||||
|
import os
|
||||||
|
import glob
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
language = sys.argv[1]
|
||||||
|
model = TTS(language=language, use_hf=False)
|
||||||
|
|
||||||
|
speaker_ids = model.hps.data.spk2id
|
||||||
|
speakers = list(speaker_ids.keys())
|
||||||
|
|
||||||
|
root_folder = language.lower()
|
||||||
|
if 'zh' in root_folder:
|
||||||
|
texts = open('basetts_test_resources/zh_mix_en_egs_text.txt', 'r').readlines()
|
||||||
|
language = 'ZH_MIX_EN'
|
||||||
|
elif 'es' in root_folder:
|
||||||
|
texts = open('basetts_test_resources/es_egs_text.txt', 'r').readlines()
|
||||||
|
language = 'SP'
|
||||||
|
elif 'fr' in root_folder:
|
||||||
|
texts = open('basetts_test_resources/fr_egs_text.txt', 'r').readlines()
|
||||||
|
language = 'FR'
|
||||||
|
elif 'en' in root_folder:
|
||||||
|
texts = open('basetts_test_resources/en_egs_text.txt', 'r').readlines()
|
||||||
|
# texts = ["Boss? You're not my boss, you're just a sad little person who likes to hide behind a computer screen and pretend you have power over others. "]
|
||||||
|
language = 'EN'
|
||||||
|
elif 'jp' in root_folder:
|
||||||
|
texts = open('basetts_test_resources/jp_egs_text.txt', 'r').readlines()
|
||||||
|
language = 'JP'
|
||||||
|
elif 'kr' in root_folder:
|
||||||
|
texts = open('basetts_test_resources/kr_egs_text.txt', 'r').readlines()
|
||||||
|
language = 'KR'
|
||||||
|
else:
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
save_dir = os.path.join('basetts_outputs_package_from_S3', root_folder.split('/')[-1])
|
||||||
|
|
||||||
|
for speed in [1.0]:
|
||||||
|
for speaker in speakers:
|
||||||
|
for sent_id, text in enumerate(texts):
|
||||||
|
output_path = f'{save_dir}/{speaker}/speed_{speed}/sent_{sent_id:03d}.wav'
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
model.tts_to_file(text, speaker_ids[speaker], output_path, speed=speed)
|
||||||
Reference in New Issue
Block a user