Files
MeloTTS/MyShellTTSBase/text/es_phonemizer/test.ipynb
2024-02-19 17:49:56 +00:00

125 lines
5.5 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "attempted relative import with no known parent package",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb Cell 1\u001b[0m line \u001b[0;36m5\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mos\u001b[39;00m\u001b[39m,\u001b[39m \u001b[39msys\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=3'>4</a>\u001b[0m sys\u001b[39m.\u001b[39mpath\u001b[39m.\u001b[39mappend(\u001b[39m'\u001b[39m\u001b[39m/home/xumin/workspace/MyShell-VC-Training/text/es_phonemizer/\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m----> <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mes_to_ipa\u001b[39;00m \u001b[39mimport\u001b[39;00m es2ipa\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msplit_sentences_en\u001b[39m(text, min_len\u001b[39m=\u001b[39m\u001b[39m10\u001b[39m):\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=9'>10</a>\u001b[0m \u001b[39m# 将文本中的换行符、空格和制表符替换为空格\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell://ssh-remote%2Bcatams4/home/xumin/workspace/Bert-VITS2/text/es_phonemizer/test.ipynb#W0sdnNjb2RlLXJlbW90ZQ%3D%3D?line=10'>11</a>\u001b[0m text \u001b[39m=\u001b[39m re\u001b[39m.\u001b[39msub(\u001b[39m'\u001b[39m\u001b[39m[\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\\t\u001b[39;00m\u001b[39m ]+\u001b[39m\u001b[39m'\u001b[39m, \u001b[39m'\u001b[39m\u001b[39m \u001b[39m\u001b[39m'\u001b[39m, text)\n",
"File \u001b[0;32m/data/workspace/Bert-VITS2/text/es_phonemizer/es_to_ipa.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mcleaner\u001b[39;00m \u001b[39mimport\u001b[39;00m spanish_cleaners\n\u001b[1;32m 2\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39m.\u001b[39;00m\u001b[39mgruut_wrapper\u001b[39;00m \u001b[39mimport\u001b[39;00m Gruut\n\u001b[1;32m 4\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mes2ipa\u001b[39m(text):\n",
"\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package"
]
}
],
"source": [
"import re\n",
"import os\n",
"import os, sys\n",
"sys.path.append('/home/xumin/workspace/MyShell-VC-Training/text/es_phonemizer/')\n",
"from es_to_ipa import es2ipa\n",
"\n",
"\n",
"\n",
"def split_sentences_en(text, min_len=10):\n",
" # 将文本中的换行符、空格和制表符替换为空格\n",
" text = re.sub('[\\n\\t ]+', ' ', text)\n",
" # 在标点符号后添加一个空格\n",
" text = re.sub('([¿—¡])', r'\\1 $#!', text)\n",
" # 分隔句子并去除前后空格\n",
" \n",
" sentences = [s.strip() for s in text.split(' $#!')]\n",
" if len(sentences[-1]) == 0: del sentences[-1]\n",
"\n",
" new_sentences = []\n",
" new_sent = []\n",
" for ind, sent in enumerate(sentences):\n",
" if sent in ['¿', '—', '¡']:\n",
" new_sent.append(sent)\n",
" else:\n",
" new_sent.append(es2ipa(sent))\n",
" \n",
" \n",
" new_sentences = ''.join(new_sent)\n",
"\n",
" return new_sentences"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'—¿aβˈeis estˈaðo kasˈaða alɣˈuna bˈeθ?'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"split_sentences_en('—¿Habéis estado casada alguna vez?')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'aβˈeis estˈaðo kasˈaða alɣˈuna bˈeθ?'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"es2ipa('—¿Habéis estado casada alguna vez?')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}