-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtglSyllabification.py
More file actions
95 lines (78 loc) · 3.42 KB
/
tglSyllabification.py
File metadata and controls
95 lines (78 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
vowelSet = set("AEIOUaeiouÀÁÂÈÉÊÌÍÎÒÓÔÙÚÛàáâèéêìíîòóôùúû")
letterPairs = set(["bl", "br", "dr", "pl", "tr"])
def haveVowel(word):
for let in word:
if let in vowelSet:
return True
return False
def sliceValueInList(listSlice, valueSlice, indexSlice):
result = listSlice[:]
result.insert(valueSlice + 1, result[valueSlice][indexSlice:])
result[valueSlice] = result[valueSlice][:indexSlice]
return result
def mergeValueInList(listMerge, fromMerge, toMerge):
result = listMerge[:]
result[fromMerge : toMerge + 1] = ["".join(result[fromMerge : toMerge + 1])]
return result
def syllabify(wordToSyllabify):
word = wordToSyllabify
# Break down word to constants and vowels. Ex: maglakad = ['m','a','gl','a','k','a','d']
nextNg = False
for letter in word:
if letter in vowelSet:
word = word.replace(letter, f" {letter} ")
elif letter == "-":
word = word.replace(letter, f" - ")
word = word.replace("ng", "ŋ").replace("NG", "Ŋ") # ng is temporarily replaced with ŋ so that it counts as one letter, hope its not some bullshit like 'Ng' or "nG"
word = word.replace("'", "") # dont like apostrophes
word = word.split()
offset = 0
for index, group in enumerate(word[:]):
index += offset
if index == 0 or index == len(word[:]) - 1 or word[index-1] == '-': # ignore at start or beginning of word, or if prev group was a hyphen
continue
elif len(group) == 2 and word: # if two letters, then split in half
word = sliceValueInList(word[:], index, 1)
offset += 1
elif len(group) == 3:
if (
any((group[0].lower() == "n", group[0].lower() == "m"))
and group[1:3].lower() in letterPairs
): # if three letters and 1st letter is n/m and 2nd-3rd letter is bl, br, dr, pl, or tr, split n/m from letter pairs
word = sliceValueInList(word[:], index, 1)
offset += 1
else: # if three letters and none of above rules apply, split first two letters from last letter
word = sliceValueInList(word[:], index, 2)
offset += 1
elif len(group) > 3: # if four or more letters, detach first two letters
word = sliceValueInList(word[:], index, 2)
offset += 1
# Join vowels with the constants that precede them
joinWord = word[:]
offset = 0
for index, group in enumerate(joinWord):
if (
group[-1] in vowelSet
and joinWord[index - 1] not in vowelSet
and joinWord[index - 1] != "-"
and index != 0
):
word = mergeValueInList(word, index - 1 - offset, index - offset)
offset += 1
# Join vowels with the constants that follow them
joinWord = word[:]
offset = 0
for index, group in enumerate(joinWord):
if index != len(joinWord) - 1:
if (
group[-1] in vowelSet
and not haveVowel(joinWord[index + 1])
and joinWord[index + 1] != "-"
):
word = mergeValueInList(word, index - offset, index + 1 - offset)
offset += 1
for i in range(len(word)):
word[i] = word[i].replace("ŋ", "ng").replace("Ŋ", "NG") # ŋ returns to ng
while "-" in word: # bye bye hyphen
word.remove("-")
return word