Files
MeloTTS/MyShellTTSBase/text/symbols.py
2024-02-19 17:49:56 +00:00

291 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# punctuation = ["!", "?", "…", ",", ".", "'", "-"]
punctuation = ["!", "?", "", ",", ".", "'", "-", "¿", "¡"]
pu_symbols = punctuation + ["SP", "UNK"]
pad = "_"
# chinese
zh_symbols = [
"E",
"En",
"a",
"ai",
"an",
"ang",
"ao",
"b",
"c",
"ch",
"d",
"e",
"ei",
"en",
"eng",
"er",
"f",
"g",
"h",
"i",
"i0",
"ia",
"ian",
"iang",
"iao",
"ie",
"in",
"ing",
"iong",
"ir",
"iu",
"j",
"k",
"l",
"m",
"n",
"o",
"ong",
"ou",
"p",
"q",
"r",
"s",
"sh",
"t",
"u",
"ua",
"uai",
"uan",
"uang",
"ui",
"un",
"uo",
"v",
"van",
"ve",
"vn",
"w",
"x",
"y",
"z",
"zh",
"AA",
"EE",
"OO",
]
num_zh_tones = 6
# japanese
ja_symbols = [
"N",
"a",
"a:",
"b",
"by",
"ch",
"d",
"dy",
"e",
"e:",
"f",
"g",
"gy",
"h",
"hy",
"i",
"i:",
"j",
"k",
"ky",
"m",
"my",
"n",
"ny",
"o",
"o:",
"p",
"py",
"q",
"r",
"ry",
"s",
"sh",
"t",
"ts",
"ty",
"u",
"u:",
"w",
"y",
"z",
"zy",
]
num_ja_tones = 1
# English
en_symbols = [
"aa",
"ae",
"ah",
"ao",
"aw",
"ay",
"b",
"ch",
"d",
"dh",
"eh",
"er",
"ey",
"f",
"g",
"hh",
"ih",
"iy",
"jh",
"k",
"l",
"m",
"n",
"ng",
"ow",
"oy",
"p",
"r",
"s",
"sh",
"t",
"th",
"uh",
"uw",
"V",
"w",
"y",
"z",
"zh",
]
num_en_tones = 4
# Korean
kr_symbols = ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '(', '', ')', '', '', '', '', '', '', '', '', '~', '\\', '[', ']', '/', '^', ':', '', '*']
num_kr_tones = 1
# Spanish
es_symbols = [
"N",
"Q",
"a",
"b",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
"ɑ",
"æ",
"ʃ",
"ʑ",
"ç",
"ɯ",
"ɪ",
"ɔ",
"ɛ",
"ɹ",
"ð",
"ə",
"ɫ",
"ɥ",
"ɸ",
"ʊ",
"ɾ",
"ʒ",
"θ",
"β",
"ŋ",
"ɦ",
"ɡ",
"r",
"ɲ",
"ʝ",
"ɣ",
"ʎ",
"ˈ",
"ˌ",
"ː"
]
num_es_tones = 1
# French
fr_symbols = [
"\u0303",
"œ",
"ø",
"ʁ",
"ɒ",
"ʌ",
"ɜ",
"ɐ"
]
num_fr_tones = 1
# German
de_symbols = [
"ʏ",
"̩"
]
num_de_tones = 1
# Russian
ru_symbols = [
"ɭ",
"ʲ",
"ɕ",
"\"",
"ɵ",
"^",
"ɬ"
]
num_ru_tones = 1
# combine all symbols
normal_symbols = sorted(set(zh_symbols + ja_symbols + en_symbols + kr_symbols + es_symbols + fr_symbols + de_symbols + ru_symbols))
symbols = [pad] + normal_symbols + pu_symbols
sil_phonemes_ids = [symbols.index(i) for i in pu_symbols]
# combine all tones
num_tones = num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones + num_es_tones + num_fr_tones + num_de_tones + num_ru_tones
# language maps
language_id_map = {"ZH": 0, "JP": 1, "EN": 2, "ZH_MIX_EN": 3, 'KR': 4, 'ES': 5, 'SP': 5 ,'FR': 6}
num_languages = len(language_id_map.keys())
language_tone_start_map = {
"ZH": 0,
"ZH_MIX_EN": 0,
"JP": num_zh_tones,
"EN": num_zh_tones + num_ja_tones,
'KR': num_zh_tones + num_ja_tones + num_en_tones,
"ES": num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones,
"SP": num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones,
"FR": num_zh_tones + num_ja_tones + num_en_tones + num_kr_tones + num_es_tones,
}
if __name__ == "__main__":
a = set(zh_symbols)
b = set(en_symbols)
print(sorted(a & b))