first commit
This commit is contained in:
35
MyShellTTSBase/text/english_utils/abbreviations.py
Normal file
35
MyShellTTSBase/text/english_utils/abbreviations.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import re
|
||||
|
||||
# List of (regular expression, replacement) pairs for abbreviations in english:
|
||||
abbreviations_en = [
|
||||
(re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
|
||||
for x in [
|
||||
("mrs", "misess"),
|
||||
("mr", "mister"),
|
||||
("dr", "doctor"),
|
||||
("st", "saint"),
|
||||
("co", "company"),
|
||||
("jr", "junior"),
|
||||
("maj", "major"),
|
||||
("gen", "general"),
|
||||
("drs", "doctors"),
|
||||
("rev", "reverend"),
|
||||
("lt", "lieutenant"),
|
||||
("hon", "honorable"),
|
||||
("sgt", "sergeant"),
|
||||
("capt", "captain"),
|
||||
("esq", "esquire"),
|
||||
("ltd", "limited"),
|
||||
("col", "colonel"),
|
||||
("ft", "fort"),
|
||||
]
|
||||
]
|
||||
|
||||
def expand_abbreviations(text, lang="en"):
|
||||
if lang == "en":
|
||||
_abbreviations = abbreviations_en
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
for regex, replacement in _abbreviations:
|
||||
text = re.sub(regex, replacement, text)
|
||||
return text
|
||||
Reference in New Issue
Block a user