-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlambda_function.py
More file actions
84 lines (75 loc) · 3.33 KB
/
lambda_function.py
File metadata and controls
84 lines (75 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import json
import urllib.parse
import boto3
import random
print('Loading function')
s3 = boto3.resource('s3')
def clean_text(text):
currently_good = True
clean_text = ''
for letter in text.replace("\n", "").replace("<APPLAUSE>", " *APPLAUSE*").replace('"','').replace(':','').lower():
if letter == '<':
currently_good = False
elif letter == '>':
currently_good = True
# treating each of these characters as their own word by putting a space before them
elif currently_good and letter in {'?', '.', '!', ',', '--'}:
clean_text += (" " + letter)
elif currently_good:
clean_text += letter
return clean_text
def get_likely_next_words(previous_word, current_word, transcript):
likely_next_words = {}
more_likely_next_words = {}
for i,word in enumerate(transcript):
if word == current_word and i + 1< len(transcript):
next_word = transcript[i+1]
currentCount = likely_next_words.get(next_word)
likely_next_words[next_word] = (currentCount or 0) + 1
if i > 0 and transcript[i - 1] == previous_word:
more_likely_next_words[next_word] = (currentCount or 0) + 1
# print(likely_next_words)
# print(more_likely_next_words)
return likely_next_words, more_likely_next_words
def get_speeches(speaker):
my_bucket = s3.Bucket('user-transcripts')
files = list(my_bucket.objects.filter(Prefix=speaker))
body = ''
#Rathern than read in all speeches, I arbitrarily selected a subset for performance reasons
for file in files[5:25]:
retrieved_file = file.get()
body += retrieved_file['Body'].read().decode('utf-8')
return body
def assess_next_words(possible_next_words, probable_next_words, trump_factor):
likely_next_words = {}
total_probability = 0
for word in possible_next_words:
increased_probability = int(pow(possible_next_words[word] + (probable_next_words.get(word) or 0), trump_factor))
likely_next_words[word] = increased_probability
total_probability += increased_probability
return likely_next_words, total_probability
def lambda_handler(event, context):
body = get_speeches(event['speaker'])
trump_factor = event['trump_factor']
words = clean_text(body).replace(" ", " ").split(" ")
current_word = words[int(random.random() * len(words))]
previous_word = ''
sentance = current_word
while current_word not in {'.', '!', '?','*APPLAUSE*'}:
possible_next_words, probable_next_words = get_likely_next_words(previous_word,current_word, words)
likely_next_words, total_probability = assess_next_words(possible_next_words, probable_next_words, trump_factor)
if len(likely_next_words) == 0:
print("dead end")
break
#print(likely_next_words)
patience = 0.0
tolerance = random.random()
for word in likely_next_words:
if patience/total_probability > tolerance or len(likely_next_words) == 1:
sentance += " " + word
previous_word = current_word
current_word = word
break
patience += likely_next_words[word]
#print("patience/total_probability," + str(patience/total_probability) + "\n tolerance:" + str(tolerance))
return sentance