-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathobject_lifecycle.py
More file actions
79 lines (61 loc) · 2.1 KB
/
object_lifecycle.py
File metadata and controls
79 lines (61 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import collections
import re
def nested_defaultdict():
return collections.defaultdict(nested_defaultdict)
class WordSequence(object):
def __init__(self, word0, word1):
self.value = (word0, word1)
def __eq__(self, other):
return (self is other
or self.value is other.value
or self.value == other.value
)
def __hash__(self):
return hash(self.value)
class FlyweightWordSequence(object):
cache = nested_defaultdict()
def __new__(cls, word0, word1):
if not word1 in cls.cache[word0]:
cls.cache[word0][word1] = word_sequence = super().__new__(cls)
word_sequence.value = (word0, word1)
return cls.cache[word0][word1]
def __eq__(self, other):
return (self is other
or self.value is other.value
or self.value == other.value
)
def __hash__(self):
return hash(self.value)
def count_word_sequences(path, class_):
counter = collections.Counter()
word_pattern = re.compile(r"[\w']+")
with open(path) as f:
line = f.readline()
words = []
while line != '':
words += word_pattern.findall(line)
while len(words) >= 2:
word_sequence = class_(words[0], words[1])
counter[word_sequence] += 1
words.pop(0)
line = f.readline()
output = {'len': len(counter), 'top_3': {
word_sequence.value: count
for word_sequence, count in counter.most_common(3)
}}
print(output)
return output
def main():
import timeit
print('WordSequence: ' + str(timeit.timeit(
stmt="count_word_sequences('./war_and_peace.txt', WordSequence)",
setup='from __main__ import count_word_sequences, WordSequence',
number=1
)))
print('FlyweightWordSequence: ' + str(timeit.timeit(
stmt="count_word_sequences('./war_and_peace.txt', FlyweightWordSequence)",
setup='from __main__ import count_word_sequences, FlyweightWordSequence',
number=1
)))
if __name__ == '__main__':
main()