Files
MeloTTS/MyShellTTSBase/text/english_utils/abbreviations.py
2024-02-19 17:49:56 +00:00

35 lines
948 B
Python

import re
# List of (regular expression, replacement) pairs for abbreviations in english:
abbreviations_en = [
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
for x in [
("mrs", "misess"),
("mr", "mister"),
("dr", "doctor"),
("st", "saint"),
("co", "company"),
("jr", "junior"),
("maj", "major"),
("gen", "general"),
("drs", "doctors"),
("rev", "reverend"),
("lt", "lieutenant"),
("hon", "honorable"),
("sgt", "sergeant"),
("capt", "captain"),
("esq", "esquire"),
("ltd", "limited"),
("col", "colonel"),
("ft", "fort"),
]
]
def expand_abbreviations(text, lang="en"):
if lang == "en":
_abbreviations = abbreviations_en
else:
raise NotImplementedError()
for regex, replacement in _abbreviations:
text = re.sub(regex, replacement, text)
return text