diff --git a/README.md b/README.md index baa294a..7aaf2b5 100644 --- a/README.md +++ b/README.md @@ -23,17 +23,29 @@ Some other features include: - The Chinese speaker supports `mixed Chinese and English`. - Fast enough for `CPU real-time inference`. -## Install on Linux +## Install on Linux or macOS + ```bash git clone git@github.com:myshell-ai/MeloTTS.git cd MeloTTS pip install -e . python -m unidic download ``` -We welcome the open-source community to make this repo `Mac` and `Windows` compatible. If you find this repo useful, please consider contributing to the repo. + +We welcome the open-source community to make this repo `Windows` compatible. If you find this repo useful, please consider contributing to the repo. ## Usage +An unofficial [live demo](https://huggingface.co/spaces/mrfakename/MeloTTS) is hosted on Hugging Face Spaces. + +### WebUI + +The WebUI supports muliple languages and voices. First, follow the installation steps. Then, simply run: + +```bash +python app.py +``` + ### English with Multi Accents ```python from melo.api import TTS @@ -42,8 +54,8 @@ from melo.api import TTS speed = 1.0 # CPU is sufficient for real-time inference. -# You can also change to cuda:0 -device = 'cpu' +# You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps' +device = 'auto' # Will automatically use GPU if available # English text = "Did you ever hear a folk tale about a giant turtle?" @@ -156,7 +168,7 @@ model.tts_to_file(text, speaker_ids['KR'], output_path, speed=speed) ``` ## License -This library is under MIT License. Free for both commercial and non-commercial use. +This library is under MIT License, which means it is free for both commercial and non-commercial use. ## Acknowledgement This implementation is based on several excellent projects, [TTS](https://github.com/coqui-ai/TTS), [VITS](https://github.com/jaywalnut310/vits), [VITS2](https://github.com/daniilrobnikov/vits2) and [Bert-VITS2](https://github.com/fishaudio/Bert-VITS2). We appreciate their awesome work! diff --git a/app.py b/app.py index b7d2650..093738e 100644 --- a/app.py +++ b/app.py @@ -7,7 +7,7 @@ print("Make sure you've downloaded unidic (python -m unidic download) for this W from melo.api import TTS speed = 1.0 import tempfile -device = 'cuda' if torch.cuda.is_available() else 'cpu' +device = 'auto' models = { 'EN': TTS(language='EN', device=device), 'ES': TTS(language='ES', device=device), diff --git a/melo/api.py b/melo/api.py index 6e236f9..3727ae2 100644 --- a/melo/api.py +++ b/melo/api.py @@ -8,6 +8,7 @@ import torchaudio import numpy as np import torch.nn as nn from tqdm import tqdm +import torch from . import utils from . import commons @@ -19,8 +20,12 @@ from .download_utils import load_or_download_config, load_or_download_model class TTS(nn.Module): def __init__(self, language, - device='cuda:0'): + device='auto'): super().__init__() + if device == 'auto': + device = 'cpu' + if torch.cuda.is_available(): device = 'cuda' + if torch.backends.mps.is_available(): device = 'mps' if 'cuda' in device: assert torch.cuda.is_available() diff --git a/requirements.txt b/requirements.txt index 4a85b5c..1ec5e6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,6 @@ unidecode==1.3.7 pypinyin==0.50.0 cn2an==0.5.22 jieba==0.42.1 -gradio==3.48.0 +gradio langid==1.1.6 tqdm \ No newline at end of file