# Le Chiffre Indéchiffrable
* **Event:** HackTX CTF
* **Problem Type:** Cryptography
* **Point Value / Difficulty:**
* **(Optional) Tools Required / Used:**


## Background Information
- https://en.wikipedia.org/wiki/Exclusive_or
- https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher

## Solution Idea

We are given a file called `ciphertext.txt` that consits of a base64 encoded message.
The problem statement tells us that the message was encrypted with a repeated key.
The hints make the problems easier by telling you that the key and the string 'utflag{' are aligned.
Therefore you can XOR the ciphertext with 'utflag{' using a sliding window.

In [28]:
# My solution

import base64
import string
from collections import defaultdict
from itertools import cycle

# Input must be a byte array 
def xor(a, b):
    return [x ^ y for x, y in zip(a, b)]

def repeated_xor(a, KEY):
    return [x ^ y for x, y in zip(a, cycle(KEY))]

englishLetterFreq = {
    'E': 12.70, 'T': 9.06, 'A': 8.17, 'O': 7.51, 'I': 6.97,
    'N': 6.75,  'S': 6.33, 'H': 6.09, 'R': 5.99, 'D': 4.25,
    'L': 4.03,  'C': 2.78, 'U': 2.76, 'M': 2.41, 'W': 2.36,
    'F': 2.23,  'G': 2.02, 'Y': 1.97, 'P': 1.93, 'B': 1.29,
    'V': 0.98,  'K': 0.77, 'J': 0.15, 'X': 0.15, 'Q': 0.10,
    'Z': 0.07,  ' ': 18.0
}

def freq(s):
    d = defaultdict(int)
    for x in s:
        d[x] += 1

    score = 0
    for c, val in d.items():
        if chr(c) in string.printable:
            if chr(c).capitalize() in englishLetterFreq.keys():
                score += val * englishLetterFreq[chr(c).capitalize()]
    return score


with open('ciphertext.txt', 'r') as f:
    encoded = f.readlines()[0].strip().encode()
    print('encoded:', encoded, '\n')
    ciphertext = base64.b64decode(encoded)
    print('ciphertext:', ciphertext, '\n')

    # Do a sliding window
    keys = []
    for i in range(len(ciphertext) - 7):
        block = ciphertext[i:i+7]
        candidate_key = xor(block, b'utflag{')
        keys.append(bytes(candidate_key))

    # Broke: You can look through the keys array and notice that one of them says 'THE KEY'
    # Woke: you can try every key and make a scoring function that favors plaintext
    best_score = -1
    best_plaintext = ''
    for key in keys:
        candidate_plaintext = repeated_xor(ciphertext, key)
        score = freq(candidate_plaintext)
        if score > best_score:
            best_score = score
            best_plaintext = bytes(candidate_plaintext)


    print(best_plaintext.decode())



encoded: b'GC1lQyMsPzI6IAAvIHkCISJFJYbxJi1lRTgxeSEmZVMyNi2X4ChFayE8dCstSS0jKzElIE4/ZSk7JDxBJzUxNSqGiT8sKCEtaQAop9nNLTZUazA3dCstSS0jKzElIE4/ZSk1OmVTPicqICExVT8sNjpkZU0qLCp0PStFayia/iUgACcgLSA6IAAvMHk5LTZTKiI8dCspQSI3eSQtMFRnZSohITNBJTF5JyllUCQ2MCAhKk5rITg6O2VDLiksPWUmSWdlmv48N0VrNzw5OClBKIbwMWg1QTllPTE7ZUwuMS0mLTYALyw/MovsUi4rLTE7aQAoKjcgOiRJOSA0MSYxAIjleSEmZVMyNi2X4ChFayE8dCstSS0jKzElIE4/ZTQ7JioAKikpPCkn4+IxMCU9IAAoKjQ5LWVMLmU6PCEjRjkgeTAtZWOI7Co1OmUIOjB+PSRlVT8sNT07IAAoICkxJiFBJTF5NycoTS5lOjslNU84JDcgYWsACCAtIC1lTYjsLTwnIUVrN5r9OyxTPyB5NSErUyJlmvRoKQcqKzg4MTZFayE8dC434+I0LDEmJkU4aXk3LWVRPix5MTsxAD4reTU+JE4/JD4xaCHj4iYwJyEjADgwK3QkIFNrJjE9LiNSLig8Ojw2ACYqNztoJEw7LTg2i+xUIjQsMTtrAAggKTEmIUElMXk4LWVDIyw/MjogAC8geQIhIkUlhvEmLWVBa4bwIIvsADsgKzeL7AA7JCt0JCAAJiQzOzplUDkwKichIE5rAys9LSFSIiYxdAMkUyI2Mj1oNFUiZTh0ODBCJyya/Wg2QWsomv08LU8vIHkxJmURc3NqemgBRTswMCdoJkU/MTx0i+xQJDQsMWRlSSdlN7bI3U8tIysxaDVMPjZ5NT0mVSUgeSeL7EM+NzAgi+wOa2V5ITwjTCoiIjh7GkMjdD8yOnZ/eis9l+EmSHojPyZ8J0x4OA==' 

ciphertext: b

This problem had two assumptions:
1. The plaintext contains the phrase `utflag\{`
2. The key aligns perfectly with that known plaintext

The problem becomes slightly harder if we removed the second assumption since we could still potentially recover part of the key.
If we removed both assumptions then the problem becomes significantly harder.

To solve the more general problem take a look at:
https://cryptopals.com/sets/1/challenges/6

The code below is spoilers:

In [46]:
import base64
import string
import itertools
from collections import defaultdict
from itertools import combinations
from itertools import cycle

def xor(a, b):
    return [x ^ y for x, y in zip(a, b)]

def repeated_xor(a, KEY):
    return [x ^ y for x, y in zip(a, cycle(KEY))]

englishLetterFreq = {
    'E': 12.70, 'T': 9.06, 'A': 8.17, 'O': 7.51, 'I': 6.97,
    'N': 6.75,  'S': 6.33, 'H': 6.09, 'R': 5.99, 'D': 4.25,
    'L': 4.03,  'C': 2.78, 'U': 2.76, 'M': 2.41, 'W': 2.36,
    'F': 2.23,  'G': 2.02, 'Y': 1.97, 'P': 1.93, 'B': 1.29,
    'V': 0.98,  'K': 0.77, 'J': 0.15, 'X': 0.15, 'Q': 0.10,
    'Z': 0.07,  ' ': 18.0
}

def freq(s):
    d = defaultdict(int)
    for x in s:
        d[x] += 1

    score = 0
    for c, val in d.items():
        if chr(c) in string.printable:
            if chr(c).capitalize() in englishLetterFreq.keys():
                score += val * englishLetterFreq[chr(c).capitalize()]
    return score

def solve_single_xor(s):
    best_candidate = ""
    best_key = 0
    best_score = -1
    for c in range(256):
        candidate = repeated_xor(s, [c])
        score = freq(candidate)
        if score > best_score:
            best_score = score
            best_key = c
            best_candidate = candidate
    return best_score, best_key, best_candidate


# Assumes a and b are byte arrays
def hamming_distance(a, b):
    a_bin = ''.join([bin(x)[2:].zfill(8) for x in a])
    b_bin = ''.join([bin(x)[2:].zfill(8) for x in b])
    return sum([int(x) ^ int(y) for x, y in zip(a_bin, b_bin)])

# Why hamming distance: https://crypto.stackexchange.com/questions/8115/repeating-key-xor-and-hamming-distance/8118#8118
def determine_key_size(ciphertext):
    candidates = []
    for k in range(2, 40):
        chunks = [ciphertext[i:i+k] for i in range(0, len(ciphertext), k)][0:2]
        score = hamming_distance(chunks[0], chunks[1]) / float(k)
        candidates.append((k, score))

    candidates.sort(key=lambda x : x[1])
    return candidates

with open('ciphertext.txt', 'r') as f:
    encoded = f.readlines()[0].strip().encode()
    ciphertext = base64.b64decode(encoded)

    keysizes = determine_key_size(ciphertext)
    print(keysizes)

    answers = []
    for keys in determine_key_size(ciphertext):

        k = keys[0]

        # Break ciphertext into keysize chunks
        chunks = [ciphertext[i:i+k] for i in range(0, len(ciphertext), k)]

        # Transpose blocks
        t_matrix = list(itertools.zip_longest(*chunks, fillvalue=0))

        # Solve single byte xor for each block
        key = [solve_single_xor(bytes(x))[1] for x in t_matrix]
        print(bytes(key))

        # Decrypt ciphertext
        ans = repeated_xor(ciphertext, key)
        answers.append((bytes(ans), freq(ans)))

    answers.sort(key=lambda x : -x[1])
    print(answers[0][0].decode())


[(28, 2.7142857142857144), (7, 2.857142857142857), (14, 2.857142857142857), (35, 2.914285714285714), (36, 2.9444444444444446), (33, 3.0303030303030303), (21, 3.0476190476190474), (17, 3.0588235294117645), (25, 3.08), (32, 3.1875), (31, 3.193548387096774), (29, 3.206896551724138), (24, 3.25), (12, 3.3333333333333335), (26, 3.3461538461538463), (27, 3.3703703703703702), (8, 3.375), (13, 3.3846153846153846), (18, 3.388888888888889), (5, 3.4), (10, 3.4), (11, 3.4545454545454546), (37, 3.4594594594594597), (19, 3.473684210526316), (4, 3.5), (39, 3.5128205128205128), (22, 3.5454545454545454), (16, 3.5625), (3, 3.6666666666666665), (34, 3.676470588235294), (23, 3.6956521739130435), (20, 3.7), (9, 3.7777777777777777), (15, 3.8), (38, 3.8157894736842106), (6, 4.0), (30, 4.033333333333333), (2, 5.5)]
b'THE KEYTHE KEYTHE KEYTHE KEY'
b'THE KEY'
b'THE KEYTHE KEY'
b'THE KEYTHE KEYTHE KE^THE KEYTHE KEY'
b'ECETlKYEEEETTEEEEeEeOeEEElTYesEeiEHs'
b'SEEroeEEETsHEEtEEETnTEHEeEEEnEEET'
b'THE KEYTHE KEYTHE K