From 60d18a08fe7cdb9633679a5ef052d4c761da04ff Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:50:22 -0800 Subject: [PATCH 01/15] Update requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d661d8f..0fe9d30 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +txtsplit torch<2.0 torchaudio transformers==4.27.4 @@ -22,4 +23,4 @@ pypinyin==0.50.0 cn2an==0.5.22 jieba==0.42.1 gradio==3.48.0 -langid==1.1.6 \ No newline at end of file +langid==1.1.6 From 5daa5c978d8e09cddfc8216867be11a6b166e5f3 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:51:05 -0800 Subject: [PATCH 02/15] Enhance text splitting --- melo/split_utils.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/melo/split_utils.py b/melo/split_utils.py index 4bba978..9158e2a 100644 --- a/melo/split_utils.py +++ b/melo/split_utils.py @@ -4,7 +4,7 @@ import glob import numpy as np import soundfile as sf import torchaudio - +from txtsplit import txtsplit def split_sentence(text, min_len=10, language_str='EN'): if language_str in ['EN', 'FR', 'ES', 'SP', 'DE', 'RU']: sentences = split_sentences_latin(text, min_len=min_len) @@ -18,26 +18,27 @@ def split_sentences_latin(text, min_len=10): text = re.sub('[“”]', '"', text) text = re.sub('[‘’]', "'", text) text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text) + return [item.strip() for item in txtsplit(text, 512, 512) if item.strip()] # 将文本中的换行符、空格和制表符替换为空格 - text = re.sub('[\n\t ]+', ' ', text) - # 在标点符号后添加一个空格 - text = re.sub('([,.!?;])', r'\1 $#!', text) - # 分隔句子并去除前后空格 - sentences = [s.strip() for s in text.split('$#!')] - if len(sentences[-1]) == 0: del sentences[-1] + # text = re.sub('[\n\t ]+', ' ', text) + # # 在标点符号后添加一个空格 + # text = re.sub('([,.!?;])', r'\1 $#!', text) + # # 分隔句子并去除前后空格 + # sentences = [s.strip() for s in text.split('$#!')] + # if len(sentences[-1]) == 0: del sentences[-1] - new_sentences = [] - new_sent = [] - count_len = 0 - for ind, sent in enumerate(sentences): - # print(sent) - new_sent.append(sent) - count_len += len(sent.split(" ")) - if count_len > min_len or ind == len(sentences) - 1: - count_len = 0 - new_sentences.append(' '.join(new_sent)) - new_sent = [] - return merge_short_sentences_en(new_sentences) + # new_sentences = [] + # new_sent = [] + # count_len = 0 + # for ind, sent in enumerate(sentences): + # # print(sent) + # new_sent.append(sent) + # count_len += len(sent.split(" ")) + # if count_len > min_len or ind == len(sentences) - 1: + # count_len = 0 + # new_sentences.append(' '.join(new_sent)) + # new_sent = [] + # return merge_short_sentences_en(new_sentences) def split_sentences_zh(text, min_len=10): text = re.sub('[。!?;]', '.', text) @@ -127,4 +128,4 @@ if __name__ == '__main__': print(split_sentence(sp_text, language_str='SP')) print(split_sentence(fr_text, language_str='FR')) print(split_sentence(de_text, language_str='DE')) - print(split_sentence(ru_text, language_str='RU')) \ No newline at end of file + print(split_sentence(ru_text, language_str='RU')) From 6196c161e29580f8cd0ff0e81c47c9d350b1ef01 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:51:39 -0800 Subject: [PATCH 03/15] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0fe9d30..94648ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ txtsplit torch<2.0 torchaudio +cached_path transformers==4.27.4 mecab-python3==1.0.5 num2words==0.5.12 From 99f902d86c1b8ddbd1be9a7f090c23175a0b20f0 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:53:10 -0800 Subject: [PATCH 04/15] Use cached_path for better caching --- melo/download_utils.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/melo/download_utils.py b/melo/download_utils.py index e7f4afe..5d538ef 100644 --- a/melo/download_utils.py +++ b/melo/download_utils.py @@ -1,7 +1,7 @@ import torch import os from . import utils - +from cached_path import cached_path DOWNLOAD_CKPT_URLS = { 'EN': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/EN/checkpoint.pth', 'EN_V2': 'https://myshell-public-repo-hosting.s3.amazonaws.com/openvoice/basespeakers/EN_V2/checkpoint.pth', @@ -25,23 +25,11 @@ DOWNLOAD_CONFIG_URLS = { def load_or_download_config(locale): language = locale.split('-')[0].upper() assert language in DOWNLOAD_CONFIG_URLS - config_path = os.path.expanduser(f'~/.local/share/openvoice/basespeakers/{language}/config.json') - try: - return utils.get_hparams_from_file(config_path) - except: - # download - os.makedirs(os.path.dirname(config_path), exist_ok=True) - os.system(f'wget {DOWNLOAD_CONFIG_URLS[language]} -O {config_path}') + config_path = cached_path(DOWNLOAD_CONFIG_URLS[language]) return utils.get_hparams_from_file(config_path) def load_or_download_model(locale, device): language = locale.split('-')[0].upper() assert language in DOWNLOAD_CKPT_URLS - ckpt_path = os.path.expanduser(f'~/.local/share/openvoice/basespeakers/{language}/checkpoint.pth') - try: - return torch.load(ckpt_path, map_location=device) - except: - # download - os.makedirs(os.path.dirname(ckpt_path), exist_ok=True) - os.system(f'wget {DOWNLOAD_CKPT_URLS[language]} -O {ckpt_path}') - return torch.load(ckpt_path, map_location=device) \ No newline at end of file + ckpt_path = cached_path(DOWNLOAD_CKPT_URLS[language]) + return torch.load(ckpt_path, map_location=device) From c888d112480c14d40234338cdd311917e14b8776 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:53:45 -0800 Subject: [PATCH 05/15] Splitted -> Split --- melo/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/melo/api.py b/melo/api.py index 852b24e..32ea97f 100644 --- a/melo/api.py +++ b/melo/api.py @@ -65,7 +65,7 @@ class TTS(nn.Module): @staticmethod def split_sentences_into_pieces(text, language): texts = split_sentence(text, language_str=language) - print(" > Text splitted to sentences.") + print(" > Text split to sentences.") print('\n'.join(texts)) print(" > ===========================") return texts @@ -110,4 +110,4 @@ class TTS(nn.Module): if output_path is None: return audio else: - soundfile.write(output_path, audio, self.hps.data.sampling_rate) \ No newline at end of file + soundfile.write(output_path, audio, self.hps.data.sampling_rate) From e17527623af93d36bbfa2f492672db32f645168e Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:53:58 -0800 Subject: [PATCH 06/15] Bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c500bd3..af5ce20 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ class PostDevelopCommand(develop): setup( name='melo', - version='0.1.0', + version='0.1.1', packages=find_packages(), include_package_data=True, install_requires=requirements, From ed9fe64c6beae71f55d06ed47dafffac6ab16359 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:57:18 -0800 Subject: [PATCH 07/15] More API features --- melo/api.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/melo/api.py b/melo/api.py index 32ea97f..203d4a2 100644 --- a/melo/api.py +++ b/melo/api.py @@ -63,16 +63,17 @@ class TTS(nn.Module): return audio_segments @staticmethod - def split_sentences_into_pieces(text, language): + def split_sentences_into_pieces(text, language, quiet=False): texts = split_sentence(text, language_str=language) - print(" > Text split to sentences.") - print('\n'.join(texts)) - print(" > ===========================") + if not quiet: + print(" > Text split to sentences.") + print('\n'.join(texts)) + print(" > ===========================") return texts - def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0): + def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, quiet=False, format=None): language = self.language - texts = self.split_sentences_into_pieces(text, language) + texts = self.split_sentences_into_pieces(text, language, quiet) audio_list = [] for t in texts: if language in ['EN', 'ZH_MIX_EN']: @@ -110,4 +111,7 @@ class TTS(nn.Module): if output_path is None: return audio else: - soundfile.write(output_path, audio, self.hps.data.sampling_rate) + if format: + soundfile.write(output_path, audio, self.hps.data.sampling_rate, format=format) + else: + soundfile.write(output_path, audio, self.hps.data.sampling_rate) From ad495c616920f93875ac3088b1b33bd38f1a9325 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 15:58:53 -0800 Subject: [PATCH 08/15] Add progress bar support --- melo/api.py | 14 ++++++++++++-- requirements.txt | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/melo/api.py b/melo/api.py index 203d4a2..6e236f9 100644 --- a/melo/api.py +++ b/melo/api.py @@ -7,6 +7,7 @@ import soundfile import torchaudio import numpy as np import torch.nn as nn +from tqdm import tqdm from . import utils from . import commons @@ -71,11 +72,20 @@ class TTS(nn.Module): print(" > ===========================") return texts - def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, quiet=False, format=None): + def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, pbar=None, format=None, position=None, quiet=False,): language = self.language texts = self.split_sentences_into_pieces(text, language, quiet) audio_list = [] - for t in texts: + if pbar: + tx = pbar(texts) + else: + if position: + tx = tqdm(texts, position=position) + elif quiet: + tx = texts + else: + tx = tqdm(texts) + for t in tx: if language in ['EN', 'ZH_MIX_EN']: t = re.sub(r'([a-z])([A-Z])', r'\1 \2', t) device = self.device diff --git a/requirements.txt b/requirements.txt index 94648ae..4a85b5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,4 @@ cn2an==0.5.22 jieba==0.42.1 gradio==3.48.0 langid==1.1.6 +tqdm \ No newline at end of file From 71d1249d167cee0f9677391c12a9b76534f74ba6 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 16:03:42 -0800 Subject: [PATCH 09/15] Add demo --- app.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 app.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..b7d2650 --- /dev/null +++ b/app.py @@ -0,0 +1,38 @@ +# WebUI by mrfakename +# Demo also available on HF Spaces: https://huggingface.co/spaces/mrfakename/MeloTTS +import gradio as gr +import os, torch, io +# os.system('python -m unidic download') +print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.") +from melo.api import TTS +speed = 1.0 +import tempfile +device = 'cuda' if torch.cuda.is_available() else 'cpu' +models = { + 'EN': TTS(language='EN', device=device), + 'ES': TTS(language='ES', device=device), + 'FR': TTS(language='FR', device=device), + 'ZH': TTS(language='ZH', device=device), + 'JP': TTS(language='JP', device=device), + 'KR': TTS(language='KR', device=device), +} +speaker_ids = models['EN'].hps.data.spk2id +def synthesize(speaker, text, speed, language, progress=gr.Progress()): + bio = io.BytesIO() + models[language].tts_to_file(text, models[language].hps.data.spk2id[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav') + return bio.getvalue() +def load_speakers(language): + return gr.update(value=list(models[language].hps.data.spk2id.keys())[0], choices=list(models[language].hps.data.spk2id.keys())) +with gr.Blocks() as demo: + gr.Markdown('# MeloTTS WebUI\n\nA WebUI for MeloTTS.') + with gr.Group(): + speaker = gr.Dropdown(speaker_ids.keys(), interactive=True, value='EN-Default', label='Speaker') + language = gr.Radio(['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'], label='Language', value='EN') + language.input(load_speakers, inputs=language, outputs=speaker) + speed = gr.Slider(label='Speed', minimum=0.1, maximum=10.0, value=1.0, interactive=True, step=0.1) + text = gr.Textbox(label="Text to speak", value='The field of text to speech has seen rapid development recently') + btn = gr.Button('Synthesize', variant='primary') + aud = gr.Audio(interactive=False) + btn.click(synthesize, inputs=[speaker, text, speed, language], outputs=[aud]) + gr.Markdown('WebUI by [mrfakename](https://twitter.com/realmrfakename).') +demo.queue(api_open=False, default_concurrency_limit=10).launch(show_api=False) From d0fd6417c33ef002722f0cf00d09ba2e3d4bf081 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 16:09:58 -0800 Subject: [PATCH 10/15] Add `auto` device to automatically use GPU, add WebUI, update documentation, add link to live demo --- README.md | 22 +++++++++++++++++----- app.py | 2 +- melo/api.py | 7 ++++++- requirements.txt | 2 +- 4 files changed, 25 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index baa294a..7aaf2b5 100644 --- a/README.md +++ b/README.md @@ -23,17 +23,29 @@ Some other features include: - The Chinese speaker supports `mixed Chinese and English`. - Fast enough for `CPU real-time inference`. -## Install on Linux +## Install on Linux or macOS + ```bash git clone git@github.com:myshell-ai/MeloTTS.git cd MeloTTS pip install -e . python -m unidic download ``` -We welcome the open-source community to make this repo `Mac` and `Windows` compatible. If you find this repo useful, please consider contributing to the repo. + +We welcome the open-source community to make this repo `Windows` compatible. If you find this repo useful, please consider contributing to the repo. ## Usage +An unofficial [live demo](https://huggingface.co/spaces/mrfakename/MeloTTS) is hosted on Hugging Face Spaces. + +### WebUI + +The WebUI supports muliple languages and voices. First, follow the installation steps. Then, simply run: + +```bash +python app.py +``` + ### English with Multi Accents ```python from melo.api import TTS @@ -42,8 +54,8 @@ from melo.api import TTS speed = 1.0 # CPU is sufficient for real-time inference. -# You can also change to cuda:0 -device = 'cpu' +# You can set it manually to 'cpu' or 'cuda' or 'cuda:0' or 'mps' +device = 'auto' # Will automatically use GPU if available # English text = "Did you ever hear a folk tale about a giant turtle?" @@ -156,7 +168,7 @@ model.tts_to_file(text, speaker_ids['KR'], output_path, speed=speed) ``` ## License -This library is under MIT License. Free for both commercial and non-commercial use. +This library is under MIT License, which means it is free for both commercial and non-commercial use. ## Acknowledgement This implementation is based on several excellent projects, [TTS](https://github.com/coqui-ai/TTS), [VITS](https://github.com/jaywalnut310/vits), [VITS2](https://github.com/daniilrobnikov/vits2) and [Bert-VITS2](https://github.com/fishaudio/Bert-VITS2). We appreciate their awesome work! diff --git a/app.py b/app.py index b7d2650..093738e 100644 --- a/app.py +++ b/app.py @@ -7,7 +7,7 @@ print("Make sure you've downloaded unidic (python -m unidic download) for this W from melo.api import TTS speed = 1.0 import tempfile -device = 'cuda' if torch.cuda.is_available() else 'cpu' +device = 'auto' models = { 'EN': TTS(language='EN', device=device), 'ES': TTS(language='ES', device=device), diff --git a/melo/api.py b/melo/api.py index 6e236f9..3727ae2 100644 --- a/melo/api.py +++ b/melo/api.py @@ -8,6 +8,7 @@ import torchaudio import numpy as np import torch.nn as nn from tqdm import tqdm +import torch from . import utils from . import commons @@ -19,8 +20,12 @@ from .download_utils import load_or_download_config, load_or_download_model class TTS(nn.Module): def __init__(self, language, - device='cuda:0'): + device='auto'): super().__init__() + if device == 'auto': + device = 'cpu' + if torch.cuda.is_available(): device = 'cuda' + if torch.backends.mps.is_available(): device = 'mps' if 'cuda' in device: assert torch.cuda.is_available() diff --git a/requirements.txt b/requirements.txt index 4a85b5c..1ec5e6c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,6 +23,6 @@ unidecode==1.3.7 pypinyin==0.50.0 cn2an==0.5.22 jieba==0.42.1 -gradio==3.48.0 +gradio langid==1.1.6 tqdm \ No newline at end of file From f0b03eaa9ca3ebf441505019a95fa7e330302ebf Mon Sep 17 00:00:00 2001 From: mrfakename Date: Tue, 27 Feb 2024 00:49:07 +0000 Subject: [PATCH 11/15] Add CLI tool --- README.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++----- melo/main.py | 28 ++++++++++++++++++++++ setup.py | 6 +++++ 3 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 melo/main.py diff --git a/README.md b/README.md index 7aaf2b5..af62279 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,54 @@ The WebUI supports muliple languages and voices. First, follow the installation python app.py ``` -### English with Multi Accents +### CLI + +You may use the MeloTTS CLI to interact with MeloTTS. The CLI may be invoked using either `melotts` or `melo`. Here are some examples: + +**Read English text:** + +```bash +melo "Text to read" output.wav +``` + +**Specify a language:** + +```bash +melo "Text to read" output.wav --language EN +``` + +**Specify a speaker:** + +```bash +melo "Text to read" output.wav --language EN --speaker EN-US +melo "Text to read" output.wav --language EN --speaker EN-AU +``` + +The available speakers are: `EN-Default`, `EN-US`, `EN-BR`, `EN-INDIA` `EN-AU`. + +**Specify a speed:** + +```bash +melo "Text to read" output.wav --language EN --speaker EN-US --speed 1.5 +melo "Text to read" output.wav --speed 1.5 +``` + +**Use a different language:** + +```bash +melo "语音合成领域近年来发展迅速" zh.wav -l ZH +``` + +The full API documentation may be found using: + +```bash +melo --help +``` + +### Python API + +#### English with Multi Accents + ```python from melo.api import TTS @@ -103,7 +150,8 @@ output_path = 'es.wav' model.tts_to_file(text, speaker_ids['ES'], output_path, speed=speed) ``` -### French +#### French + ```python from melo.api import TTS @@ -119,7 +167,8 @@ output_path = 'fr.wav' model.tts_to_file(text, speaker_ids['FR'], output_path, speed=speed) ``` -### Chinese +#### Chinese + ```python from melo.api import TTS @@ -135,7 +184,8 @@ output_path = 'zh.wav' model.tts_to_file(text, speaker_ids['ZH'], output_path, speed=speed) ``` -### Japanese +#### Japanese + ```python from melo.api import TTS @@ -151,7 +201,8 @@ output_path = 'jp.wav' model.tts_to_file(text, speaker_ids['JP'], output_path, speed=speed) ``` -### Korean +#### Korean + ```python from melo.api import TTS @@ -168,7 +219,9 @@ model.tts_to_file(text, speaker_ids['KR'], output_path, speed=speed) ``` ## License + This library is under MIT License, which means it is free for both commercial and non-commercial use. -## Acknowledgement +## Acknowledgements + This implementation is based on several excellent projects, [TTS](https://github.com/coqui-ai/TTS), [VITS](https://github.com/jaywalnut310/vits), [VITS2](https://github.com/daniilrobnikov/vits2) and [Bert-VITS2](https://github.com/fishaudio/Bert-VITS2). We appreciate their awesome work! diff --git a/melo/main.py b/melo/main.py new file mode 100644 index 0000000..265e088 --- /dev/null +++ b/melo/main.py @@ -0,0 +1,28 @@ +import click +import warnings + +@click.command +@click.argument('text') +@click.argument('output_path') +@click.option('--language', '-l', default='EN', help='Language, defaults to English', type=click.Choice(['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'], case_sensitive=False)) +@click.option('--speaker', '-spk', default='EN-Default', help='Speaker ID, only for English, leave empty for default, ignored if not English. If English, defaults to "EN-Default"', type=click.Choice(['EN-Default', 'EN-US', 'EN-BR', 'EN-INDIA', 'EN-AU'])) +@click.option('--speed', '-s', default=1.0, help='Speed, defaults to 1.0', type=float) +@click.option('--device', '-d', default='auto', help='Device, defaults to auto') +def main(text, output_path, language, speaker, speed, device): + language = language.upper() + if language == '': language = 'EN' + if speaker == '': speaker = None + if (not language == 'EN') and speaker: + warnings.warn('You specified a speaker but the language is English.') + from melo.api import TTS + model = TTS(language=language, device=device) + speaker_ids = model.hps.data.spk2id + if language == 'EN': + if not speaker: speaker = 'EN-Default' + spkr = speaker_ids[speaker] + else: + spkr = speaker_ids[list(speaker_ids.keys())[0]] + model.tts_to_file(text, spkr, output_path, speed=speed) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/setup.py b/setup.py index af5ce20..ad67a65 100644 --- a/setup.py +++ b/setup.py @@ -28,4 +28,10 @@ setup( package_data={ '': ['*.txt', 'cmudict_*'], }, + entry_points={ + "console_scripts": [ + "melotts = melo.main:main", + "melo = melo.main:main", + ], + }, ) From 20544c33b9d522fba8d8e2d096f1e0ffc9b2510d Mon Sep 17 00:00:00 2001 From: mrfakename Date: Tue, 27 Feb 2024 01:00:33 +0000 Subject: [PATCH 12/15] Allow loading from file --- app.py => melo/app.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename app.py => melo/app.py (100%) diff --git a/app.py b/melo/app.py similarity index 100% rename from app.py rename to melo/app.py From 28e76e2cd11fb03189d73713bb7be4f352b7d588 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Tue, 27 Feb 2024 01:00:46 +0000 Subject: [PATCH 13/15] Allow loading from file --- README.md | 9 ++++++++- melo/app.py | 11 ++++++++++- melo/main.py | 12 +++++++++++- setup.py | 1 + 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index af62279..ed658ba 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,8 @@ An unofficial [live demo](https://huggingface.co/spaces/mrfakename/MeloTTS) is h The WebUI supports muliple languages and voices. First, follow the installation steps. Then, simply run: ```bash -python app.py +melo-ui +# Or: python melo/app.py ``` ### CLI @@ -84,6 +85,12 @@ melo "Text to read" output.wav --speed 1.5 melo "语音合成领域近年来发展迅速" zh.wav -l ZH ``` +**Load from a file:** + +```bash +melo file.txt out.wav --file +``` + The full API documentation may be found using: ```bash diff --git a/melo/app.py b/melo/app.py index 093738e..2fe949b 100644 --- a/melo/app.py +++ b/melo/app.py @@ -7,6 +7,7 @@ print("Make sure you've downloaded unidic (python -m unidic download) for this W from melo.api import TTS speed = 1.0 import tempfile +import click device = 'auto' models = { 'EN': TTS(language='EN', device=device), @@ -35,4 +36,12 @@ with gr.Blocks() as demo: aud = gr.Audio(interactive=False) btn.click(synthesize, inputs=[speaker, text, speed, language], outputs=[aud]) gr.Markdown('WebUI by [mrfakename](https://twitter.com/realmrfakename).') -demo.queue(api_open=False, default_concurrency_limit=10).launch(show_api=False) +@click.command() +@click.option('--share', '-s', is_flag=True, show_default=True, default=False, help="Expose a publicly-accessible shared Gradio link usable by anyone with the link. Only share the link with people you trust.") +@click.option('--host', '-h', default=None) +@click.option('--port', '-p', default=None) +def main(share, host, port): + demo.queue(api_open=False, default_concurrency_limit=10).launch(show_api=False, share=share, server_name=host, server_port=port) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/melo/main.py b/melo/main.py index 265e088..58064f1 100644 --- a/melo/main.py +++ b/melo/main.py @@ -1,14 +1,24 @@ import click import warnings +import os @click.command @click.argument('text') @click.argument('output_path') +@click.option("--file", '-f', is_flag=True, show_default=True, default=False, help="Text is a file") @click.option('--language', '-l', default='EN', help='Language, defaults to English', type=click.Choice(['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'], case_sensitive=False)) @click.option('--speaker', '-spk', default='EN-Default', help='Speaker ID, only for English, leave empty for default, ignored if not English. If English, defaults to "EN-Default"', type=click.Choice(['EN-Default', 'EN-US', 'EN-BR', 'EN-INDIA', 'EN-AU'])) @click.option('--speed', '-s', default=1.0, help='Speed, defaults to 1.0', type=float) @click.option('--device', '-d', default='auto', help='Device, defaults to auto') -def main(text, output_path, language, speaker, speed, device): +def main(text, file, output_path, language, speaker, speed, device): + if file: + if not os.path.exists(text): + raise FileNotFoundError(f'Trying to load text from file due to --file/-f flag, but file not found. Remove the --file/-f flag to pass a string.') + else: + with open(text) as f: + text = f.read().strip() + if text == '': + raise ValueError('You entered empty text or the file you passed was empty.') language = language.upper() if language == '': language = 'EN' if speaker == '': speaker = None diff --git a/setup.py b/setup.py index ad67a65..5f0990e 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ setup( "console_scripts": [ "melotts = melo.main:main", "melo = melo.main:main", + "melo-ui = melo.app:main", ], }, ) From eb743f8353fd776854fa41499fdf1bf91d154e58 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 17:06:50 -0800 Subject: [PATCH 14/15] Install via git+ instead of ssh --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ed658ba..ce95955 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Some other features include: ## Install on Linux or macOS ```bash -git clone git@github.com:myshell-ai/MeloTTS.git +git clone git+https://github.com/myshell-ai/MeloTTS.git cd MeloTTS pip install -e . python -m unidic download From 27cf2e3749940b9c36455fad2bee70c8269d5594 Mon Sep 17 00:00:00 2001 From: mrfakename Date: Mon, 26 Feb 2024 20:15:49 -0800 Subject: [PATCH 15/15] Multi Accents -> Multiple Accents --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ce95955..3c146b3 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,7 @@ melo --help ### Python API -#### English with Multi Accents +#### English with Multiple Accents ```python from melo.api import TTS