This commit is contained in:
qinzy
2024-02-20 17:50:56 -05:00
parent 194ffa2f60
commit be5e4cf164

View File

@@ -4,7 +4,7 @@
</div> </div>
## Introduction ## Introduction
MeloTTS is a high-quality multi-lingual text-to-speech library by [MyShell.ai](https://myshell.ai). Supported languages include: MeloTTS is a **high-quality multi-lingual** text-to-speech library by [MyShell.ai](https://myshell.ai). Supported languages include:
| Language | Example | | Language | Example |
| --- | --- | | --- | --- |
@@ -19,15 +19,18 @@ MeloTTS is a high-quality multi-lingual text-to-speech library by [MyShell.ai](h
| Japanese | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/jp/JP/speed_1.0/sent_000.wav) | | Japanese | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/jp/JP/speed_1.0/sent_000.wav) |
| Korean | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/kr/KR/speed_1.0/sent_000.wav) | | Korean | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/kr/KR/speed_1.0/sent_000.wav) |
The Chinese speaker supports `mixed Chinese and English`. Some other features include:
- The Chinese speaker supports `mixed Chinese and English`.
- Fast enough for `CPU real-time inference`.
## Install ## Install on Linux
```bash ```bash
git clone git@github.com:myshell-ai/MeloTTS.git git clone git@github.com:myshell-ai/MeloTTS.git
cd MeloTTS cd MeloTTS
pip install -e . pip install -e .
python -m unidic download python -m unidic download
``` ```
We welcome the open-source community to make this repo `Mac` and `Windows` compatible. If you find this repo useful, please consider contributing to the repo.
## Usage ## Usage
@@ -38,9 +41,13 @@ from melo.api import TTS
# Speed is adjustable # Speed is adjustable
speed = 1.0 speed = 1.0
# CPU is sufficient for real-time inference.
# You can also change to cuda:0
device = 'cpu'
# English # English
text = "Did you ever hear a folk tale about a giant turtle?" text = "Did you ever hear a folk tale about a giant turtle?"
model = TTS(language='EN') model = TTS(language='EN', device=device)
speaker_ids = model.hps.data.spk2id speaker_ids = model.hps.data.spk2id
# Default accent # Default accent
@@ -71,8 +78,13 @@ from melo.api import TTS
# Speed is adjustable # Speed is adjustable
speed = 1.0 speed = 1.0
# CPU is sufficient for real-time inference.
# You can also change to cuda:0
device = 'cpu'
text = "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante." text = "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante."
model = TTS(language='ES') model = TTS(language='ES', device=device)
speaker_ids = model.hps.data.spk2id speaker_ids = model.hps.data.spk2id
output_path = 'es.wav' output_path = 'es.wav'
@@ -85,8 +97,10 @@ from melo.api import TTS
# Speed is adjustable # Speed is adjustable
speed = 1.0 speed = 1.0
device = 'cpu' # or cuda:0
text = "La lueur dorée du soleil caresse les vagues, peignant le ciel d'une palette éblouissante." text = "La lueur dorée du soleil caresse les vagues, peignant le ciel d'une palette éblouissante."
model = TTS(language='FR') model = TTS(language='FR', device=device)
speaker_ids = model.hps.data.spk2id speaker_ids = model.hps.data.spk2id
output_path = 'fr.wav' output_path = 'fr.wav'
@@ -99,8 +113,10 @@ from melo.api import TTS
# Speed is adjustable # Speed is adjustable
speed = 1.0 speed = 1.0
device = 'cpu' # or cuda:0
text = "我最近在学习machine learning希望能够在未来的artificial intelligence领域有所建树。" text = "我最近在学习machine learning希望能够在未来的artificial intelligence领域有所建树。"
model = TTS(language='ZH') model = TTS(language='ZH', device=device)
speaker_ids = model.hps.data.spk2id speaker_ids = model.hps.data.spk2id
output_path = 'zh.wav' output_path = 'zh.wav'
@@ -113,8 +129,10 @@ from melo.api import TTS
# Speed is adjustable # Speed is adjustable
speed = 1.0 speed = 1.0
device = 'cpu' # or cuda:0
text = "彼は毎朝ジョギングをして体を健康に保っています。" text = "彼は毎朝ジョギングをして体を健康に保っています。"
model = TTS(language='JP') model = TTS(language='JP', device=device)
speaker_ids = model.hps.data.spk2id speaker_ids = model.hps.data.spk2id
output_path = 'jp.wav' output_path = 'jp.wav'
@@ -127,8 +145,10 @@ from melo.api import TTS
# Speed is adjustable # Speed is adjustable
speed = 1.0 speed = 1.0
device = 'cpu' # or cuda:0
text = "안녕하세요! 오늘은 날씨가 정말 좋네요." text = "안녕하세요! 오늘은 날씨가 정말 좋네요."
model = TTS(language='KR') model = TTS(language='KR', device=device)
speaker_ids = model.hps.data.spk2id speaker_ids = model.hps.data.spk2id
output_path = 'kr.wav' output_path = 'kr.wav'