update

2024-02-20 17:50:56 -05:00
parent 194ffa2f60
commit be5e4cf164
1 changed files with 29 additions and 9 deletions
@@ -4,7 +4,7 @@
 </div>

 ## Introduction
-MeloTTS is a high-quality multi-lingual text-to-speech library by [MyShell.ai](https://myshell.ai). Supported languages include:
+MeloTTS is a **high-quality multi-lingual** text-to-speech library by [MyShell.ai](https://myshell.ai). Supported languages include:

 | Language | Example |
 | --- | --- |
@@ -19,15 +19,18 @@ MeloTTS is a high-quality multi-lingual text-to-speech library by [MyShell.ai](h
 | Japanese              | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/jp/JP/speed_1.0/sent_000.wav) |
 | Korean                | [Link](https://myshell-public-repo-hosting.s3.amazonaws.com/myshellttsbase/examples/kr/KR/speed_1.0/sent_000.wav) |

-The Chinese speaker supports `mixed Chinese and English`.
+Some other features include:
+- The Chinese speaker supports `mixed Chinese and English`.
+- Fast enough for `CPU real-time inference`.

-## Install
+## Install on Linux
 ```bash
 git clone git@github.com:myshell-ai/MeloTTS.git
 cd MeloTTS
 pip install -e .
 python -m unidic download
 ```
+We welcome the open-source community to make this repo `Mac` and `Windows` compatible. If you find this repo useful, please consider contributing to the repo.

 ## Usage

@@ -38,9 +41,13 @@ from melo.api import TTS
 # Speed is adjustable
 speed = 1.0

+# CPU is sufficient for real-time inference.
+# You can also change to cuda:0
+device = 'cpu'
+
 # English 
 text = "Did you ever hear a folk tale about a giant turtle?"
-model = TTS(language='EN')
+model = TTS(language='EN', device=device)
 speaker_ids = model.hps.data.spk2id

 # Default accent
@@ -71,8 +78,13 @@ from melo.api import TTS

 # Speed is adjustable
 speed = 1.0
+
+# CPU is sufficient for real-time inference.
+# You can also change to cuda:0
+device = 'cpu'
+
 text = "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante."
-model = TTS(language='ES')
+model = TTS(language='ES', device=device)
 speaker_ids = model.hps.data.spk2id

 output_path = 'es.wav'
@@ -85,8 +97,10 @@ from melo.api import TTS

 # Speed is adjustable
 speed = 1.0
+device = 'cpu' # or cuda:0
+
 text = "La lueur dorée du soleil caresse les vagues, peignant le ciel d'une palette éblouissante."
-model = TTS(language='FR')
+model = TTS(language='FR', device=device)
 speaker_ids = model.hps.data.spk2id

 output_path = 'fr.wav'
@@ -99,8 +113,10 @@ from melo.api import TTS

 # Speed is adjustable
 speed = 1.0
+device = 'cpu' # or cuda:0
+
 text = "我最近在学习machine learning，希望能够在未来的artificial intelligence领域有所建树。"
-model = TTS(language='ZH')
+model = TTS(language='ZH', device=device)
 speaker_ids = model.hps.data.spk2id

 output_path = 'zh.wav'
@@ -113,8 +129,10 @@ from melo.api import TTS

 # Speed is adjustable
 speed = 1.0
+device = 'cpu' # or cuda:0
+
 text = "彼は毎朝ジョギングをして体を健康に保っています。"
-model = TTS(language='JP')
+model = TTS(language='JP', device=device)
 speaker_ids = model.hps.data.spk2id

 output_path = 'jp.wav'
@@ -127,8 +145,10 @@ from melo.api import TTS

 # Speed is adjustable
 speed = 1.0
+device = 'cpu' # or cuda:0
+
 text = "안녕하세요! 오늘은 날씨가 정말 좋네요."
-model = TTS(language='KR')
+model = TTS(language='KR', device=device)
 speaker_ids = model.hps.data.spk2id

 output_path = 'kr.wav'