-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathn_gram.py
More file actions
31 lines (24 loc) · 860 Bytes
/
n_gram.py
File metadata and controls
31 lines (24 loc) · 860 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import math
from collections import defaultdict
from utils import run_model
class Ngram:
def __init__(self, n):
self.n = n
self.freqs = defaultdict(lambda: [1, 1])
self.context = '0' * n
def get_prob(self):
return self.freqs[self.context][0] / sum(self.freqs[self.context])
def update(self, bit):
update_idx = 1 if bit == '1' else 0
self.freqs[self.context][update_idx] += 1
self.context += bit
self.context = self.context[-self.n:]
def reset(self):
self.freqs = defaultdict(lambda: [1, 1])
self.context = '0' * self.n
if __name__ == '__main__':
data = open('files/enwik3', 'rb').read()
n = 16
ngram_model = Ngram(n)
compressed_size, theoretical_compression = run_model(ngram_model, data)
print(compressed_size, theoretical_compression)