# Imports and Utility Functions

In [168]:
import codecs
import unittest
import string

def decode_str(S):
    return bytes(ord(x) for x in S)

def repeat_key_xor(message, key):
    n, m = len(message), len(key)
    result = []
    for i in range(n):
        byte = message[i] ^ key[i % m]
        result.append(byte)
    return bytes(result)


LETTERS = 'etaoinsrhldcumfpgwybvkxjqz'
SCORES = [x for x in range(len(LETTERS), 0, -1)]
def text_score(S):
    """
    Returns the score of a piece of text looking only at letter 
    frequency.
    """
    score = 0
    for i in S:
        ch = chr(i)
        if ch != '\n' and not ch.isprintable():
            return -1
        if ch in LETTERS:
            score += SCORES[LETTERS.index(ch)]
    return score


def popcount(x):
    return bin(x).count('1')


def hamming_distance(A, B):
    """
    (bytes, bytes) -> int
    Returns the hamming distance between 'bytes' A and B.
    """
    assert len(A) == len(B), 'lengths are not equal.'

    distance = sum(popcount(x ^ y) for x, y in zip(A, B))
    return distance


## Challenge 1.1
Convert hex to base64

In [146]:
hex_string = '49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d'
b = codecs.decode(hex_string, 'hex')
base64_string = codecs.encode(b, 'base64')

base64_string

b'SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t\n'

## Challenge 1.2: Fixed XOR
Write a function that takes two equal-length buffers and produces their XOR combination.

In [147]:
hex1 = '1c0111001f010100061a024b53535009181c'
hex2 = '686974207468652062756c6c277320657965'
b1 = codecs.decode(hex1, 'hex')
b2 = codecs.decode(hex2, 'hex')

result = [x ^ y for x, y in zip(b1, b2)]
bytes(result).hex()

'746865206b696420646f6e277420706c6179'

## Challenge 1.3: Single-byte XOR cipher

In [154]:
def single_key_xor(b):
    best = (-1, b'')
    for guess in range(256):
        b1 = repeat_key_xor(b, bytes([guess]))
        score = text_score(b1)
        best = max(best, (score, b1))
    return best

hex1 = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'
b1 = codecs.decode(hex1, 'hex')
    
single_key_xor(b1)

(428, b"Cooking MC's like a pound of bacon")

## Challenge 1.4: Detect single-character XOR

In [155]:
best = (-1, b'')
with open('4.txt', 'r') as fin:
    for line in fin:
        ans = single_key_xor(codecs.decode(line.rstrip(), 'hex'))
        best = max(best, ans)
best

(416, b'Now that the party is jumping\n')

## Challenge 1.5: Implement repeating-key XOR

In [162]:
msg = '''Burning 'em, if you ain't quick and nimble
I go crazy when I hear a cymbal'''

key = 'ICE'
b = repeat_key_xor(bytes(ord(x) for x in msg), bytes(ord(x) for x in key))

codecs.encode(b, 'hex')

b'0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f'

## Challenge 1.6: Break repeating-key XOR

In [173]:
s1 = 'this is a test'
s2 = 'wokka wokka!!!'

hamming_distance(s1.encode('utf-8'), s2.encode('utf-8'))

37

In [164]:
with open('6.txt', 'r') as fin:
    msg = fin.read().replace('\n', '')
    msg = utility.base64_decode(msg.encode())
    n = len(msg)

keysizes = []
for k in range(2, min(n // 2, 42)):
    total = 0
    count = 0
    for i in range(0, len(msg), k):
        if i + k + k > len(msg):
            break
        total += hamming_distance(msg[i:i + k].decode(),
                                          msg[i + k:i + k + k].decode())
        count += 1
    keysizes.append((k, total / count / k))

keysizes.sort(key=lambda x: x[1])

max_score = None
answer = None

for i in range(4):
    k = keysizes[i][0]
    lines = [[] for _ in range(k)]
    for j in range(n):
        lines[j % k].append(msg[j])

    bytes_list = []
    valid = True
    for line in lines:
        score, ans = best_guess(bytes(line))
        if score is None:
            valid = False
            break
        bytes_list.append(ans)

    if not valid:
        continue

    bytes_rearranged = []
    for c in range((n + k - 1) // k):
        for r in range(k):
            if c < len(bytes_list[r]):
                bytes_rearranged.append(bytes_list[r][c])

    score = score_english(bytes(bytes_rearranged))
    if score is not None and (max_score is None or score > max_score):
        answer = bytes(bytes_rearranged).decode()
        max_score = score

assert (max_score is not None), 'answer not found.'
print(answer)


NameError: name 'utility' is not defined