-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchallenge06.py
More file actions
164 lines (152 loc) · 6.03 KB
/
challenge06.py
File metadata and controls
164 lines (152 loc) · 6.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env python3
# Break repeating-key XOR
from base64 import b64decode
from utils import xor
import os
# --------------------------------------------------------
# ---------------------- functions -----------------------
# --------------------------------------------------------
def hamming_distance(x: bytes, y: bytes) -> int:
'''
A function that takes two bytes or strings, x and y.
and returns the integer number of differing bits
in the text or AKA the hamming distance.
#! not 100% on the code. But it's working
'''
differences = 0
# accepts input whether it's a string or single byte
if type(x) == str and type(y) == str:
xbit_array = str2bitArray(x)
ybit_array = str2bitArray(y)
else:
xbit_array = byte2bitArray(x)
ybit_array = byte2bitArray(y)
for byte, byte_val in enumerate(xbit_array):
for bit, bit_val in enumerate(byte_val):
try:
if bit_val != ybit_array[byte][bit]:
differences += 1
except:
differences += 1
return differences
def str2bitArray(string: str) -> list:
'''
A function that takes a string and returns the bit
values of the strings in a per character list
'''
bit_array = []
for c in string:
bit = bin(ord(c))
bit_array.append(bit)
return bit_array
def byte2bitArray(byte_array: bytes) -> list:
'''
A function that takes bytes and returns the
bit values of bytes in a list
'''
bit_array = []
for byte_val in byte_array:
bit = bin(byte_val)
bit_array.append(bit)
return bit_array
def smallest_hamming_distance(cipher_bytes: bytes) -> list:
'''
A function that when given a ciphertext, uses the
hamming_distance() function and keeps a list of distances
for different KEYSIZES.
The function compares consecutive KEYSIZE number of bytes
and then normalizes the result by the number of iterations
or chunks it had to compare, to not give any large keysizes
more advantage
'''
distances = []
for KEYSIZE in range(2,40):
distance = 0
for i in range(0, len(cipher_bytes), KEYSIZE):
iterations = len(cipher_bytes)/KEYSIZE
x_byte = cipher_bytes[i:i+KEYSIZE]
y_byte = cipher_bytes[i+KEYSIZE:i+(KEYSIZE*2)]
distance += hamming_distance(x_byte, y_byte)/KEYSIZE
distances.append([KEYSIZE, distance/iterations])
# sorts the distances by the normalized distance value ascendingly
sorted_distances = sorted(distances, key=lambda x: x[1])
print('Smallest normalized distance is {:.3f} '.format(sorted_distances[0][1]), end='')
print('At KEYSIZE =', sorted_distances[0][0])
return sorted_distances
def split_bytes(cipher_bytes: bytes, KEYSIZE: int) -> list:
'''
A function that splits bytes into chunks of bytes
as a elements of KEYSIZE number of lists in a list.
Same result can be had by splitting the bytes into KEYSIZE
large chunks, and then transposing the values using numpy
'''
splitted_bytes = []
for i in range(KEYSIZE):
splitted_bytes.append([])
for j in range(i, len(cipher_bytes), KEYSIZE):
splitted_bytes[i].append(cipher_bytes[j])
return splitted_bytes
def get_score(message: str) -> int:
'''
a function that takes input string and using letter
frequency it scores it. The higher the score, the better
'''
score = 0
freq = {'a': 0.0812, 'b': 0.0149, 'c': 0.0271, 'd': 0.0432,
'e': 0.1202, 'f': 0.0230, 'g': 0.0202, 'h': 0.0592, 'i': 0.0731,
'j': 0.001, 'k': 0.0069, 'l': 0.0398, 'm': 0.0261, 'n': 0.0695,
'o': 0.0768, 'p': 0.0182, 'q': 0.0011, 'r': 0.0602, 's': 0.0628,
't': 0.091, 'u': 0.0288, 'v': 0.0111, 'w': 0.0209, 'x': 0.0017,
'y': 0.0211, 'z': 0.0007, ' ': .13}
for c in message:
score += freq.get(c, 0) #if it is not an alphabet, give it a score of 0
return score/len(message) #normalize the score
def get_key(c_xored_lists: list) -> str:
'''
Takes input list of lists of xored values that need cracking
returns the key string using single character XOR on every list
'''
key = ''
for c_xored_list in c_xored_lists:
highestscore = 0
# check for all characters with ascii 0 to 128
for c in range(0,128):
#convert the character to an integer to use it in the strxor_c function
result = xor(bytes(c_xored_list), bytes([c]))
#decode 'UTF-8' from bytes to string of text
result = result.decode()
score = get_score(result)
if (score > highestscore):
highestscore = score
c_candidate = chr(c)
key += c_candidate
return(key)
# --------------------------------------------------------
# ------------------- Problem Solution -------------------
# --------------------------------------------------------
# opening the file and reading the ciphertext
def main():
dirname = os.path.dirname(__file__)
path = os.path.join(dirname, 'challenge06-text.txt')
with open(path, 'rb') as f:
ciphertext = f.read()
# base64 decode
ciphertext = b64decode(ciphertext)
# get smallest hamming distance KEYSIZE
distances = smallest_hamming_distance(ciphertext)
# Get keysize from the list of distances
KEYSIZE = distances[0][0]
# split the bytes into KEYSIZE lists
# with chunks of size len(xored)/KEYSIZE
splitted_bytes = split_bytes(ciphertext, KEYSIZE)
# use challenge3 code and use the character XOR on every list
xor_key = get_key(splitted_bytes)
print('XOR Key:', xor_key)
# get the plaintext using the rolling_xor func from challenge 5
plaintext = xor(ciphertext, xor_key.encode()).decode()
# save it to a file since it's a large wall of text
savepath = os.path.join(dirname, 'challenge06-key.txt')
with open(savepath, 'w') as f:
f.write(plaintext)
if __name__ == "__main__":
main()