-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrainPredict.py
More file actions
133 lines (118 loc) · 3.96 KB
/
trainPredict.py
File metadata and controls
133 lines (118 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from HMM import unsupervised_HMM
from HMM_helper import sample_sentence, parse_observations
from preprocessing import read_files, featurize, block_text, read_text
import numpy as np
import random
def generate_words(emission, POSlookup, syllables, reverse=False, lastWord=None):
'''
This function generates a string given the emissions and the probabilities
of a word being emitted given a certain
Input:
emission: The list of emission, which represents the POS of the word
POSlookup: A 2D array being POS, [word, frequency] for the given POS
syllables: The dictionary of words and number of syllables each word has
reverse: Whether to start from beginning or end of line
rhymes: Dictionary of different rhymes
Output:
emStr: The sentence generated
'''
done = False
if reverse:
assert(lastWord is not None)
while not done:
emStr = lastWord
try:
syllableCount = syllables[lastWord]
except:
syllableCount = 2
print(lastWord)
for obs in emission:
emRate = [row[1] for row in POSlookup[obs]]
emWords = [row[0] for row in POSlookup[obs]]
emRate = np.array(emRate)
emRate = emRate/sum(emRate)
index = np.random.choice(np.arange(len(emRate)), p=emRate)
newWord = emWords[index]
try:
syllableCount += syllables[newWord]
except:
syllableCount += 2
print(newWord)
emStr = newWord + ' ' + emStr
if syllableCount == 10:
done = True
break
else:
while not done:
emStr = ''
syllableCount = 0
for obs in emission:
emRate = [row[1] for row in POSlookup[obs]]
emWords = [row[0] for row in POSlookup[obs]]
emRate = np.array(emRate)
emRate = emRate/sum(emRate)
index = np.random.choice(np.arange(len(emRate)), p=emRate)
newWord = emWords[index]
syllableCount += syllables[newWord]
emStr = emStr + newWord + ' '
if syllableCount == 10:
done = True
break
return emStr
# if it's your heart's desire to train on the bee movie
# beeMovie = read_text("beeMovie")
# beeList, beeLookup, beeFeat = featurize(beeMovie)
# HMM = unsupervised_HMM(beeFeat, 10, 10)
# beeEm, beeState = HMM.generate_emission(20)
# print(generate_words(beeEm, beeLookup))
def generate_sonnet(poems, lines, syllables, rhymes=None):
POSList, POSlookup, features = featurize(poems)
HMM = unsupervised_HMM(features, 25, 100)
emission, states = HMM.generate_emission(10)
if rhymes is None:
sonnet = ""
for i in range(14):
line = generate_words(emission, POSlookup, syllables)
sonnet = sonnet + line + "\n"
else:
# abab cdcd efef gg
sonnet = ["" for x in range(14)]
line_idx = [0, 1, 4, 5, 8, 9, 12]
for i in line_idx:
# choose a random word in the dictionary
key, val = random.choice(list(rhymes.items()))
# choose a random word that rhymes with the previous one
pair = np.random.choice(val)
sonnet[i] += str(key)
if i < 12:
sonnet[i+2] += str(pair)
else:
sonnet[i+1] += str(pair)
for i in range(len(sonnet)):
line = generate_words(emission, POSlookup, syllables, True, sonnet[i])
sonnet[i] = line
sonnet = "\n".join(sonnet)
print(sonnet)
return sonnet
def main():
poems, syllables, _ = read_files(sep='poem')
lines, syllables, rhymes = read_files(sep='line')
sonnet = generate_sonnet(poems, lines, syllables, rhymes)
# sonnet = generate_sonnet(poems, syllables)
if __name__ == '__main__':
main()
# our poem
# in rhyme of that fair love that love forbear
# with heavy joy against speak counterfeit
# wouldst out vow of die new thee out doth there
# out slight delight for possessing unset
# in store that than strange therefore by cloud plight
# case through face from with great age of time's winds
# without if summer's woman in canst night
# audit that that equal cheek of home minds
# time for like first i of treasure buried
# song towards by beauty state than beauty shame
# in false determination on dost spread
# that wouldst by of holy thy in hast name
# thou in in thou succession with heart knit
# for hymns since than draw report by eye wit