### Single-byte XOR cipher

The hex encoded string:
```
1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736
```
... has been XOR'd against a single character. Find the key, decrypt the message.

You can do this by hand. But don't: write code to do it for you.

How? Devise some method for "scoring" a piece of English plaintext. Character frequency is a good metric. Evaluate each output and choose the one with the best score.


In [1]:
def decrypt(encrypted, passcode):
    return bytes([byte ^ passcode for byte in encrypted])


def in_ascii_range(buffer):
    int_list = [ b for b in buffer]
    max_value = max(int_list)
    min_value = min(int_list)
    if min_value < 32 or max_value > 126:
        return False
    return True

def get_common_letters_score(buffer):
    letter_list = [chr(b).lower() for b in buffer]
    uniq_letters = set(letter_list)
    
    letters_with_count = []
    for letter in uniq_letters:
        letters_with_count.append((letter, letter_list.count(letter)))
    letters_with_count.sort(key=lambda item: item[1], reverse=True)

    score = 0
    most_common_letter = "ETAOINSRHLDCU"

    for i in range(len(most_common_letter)):
        if letters_with_count[i][0] in most_common_letter.lower():
            score += len(most_common_letter) - i
    return score


encrypted = bytes.fromhex("1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736")

scores = []

for ch in range(32, 127):
    decrypted = decrypt(encrypted, ch)
    if not in_ascii_range(decrypted):
        continue
    scores.append(
        (chr(ch), get_common_letters_score(decrypted))
    )

scores.sort(key=lambda x: x[1], reverse=True)

key = ord(scores[0][0])

print(decrypt(encrypted, key))

b"Cooking MC's like a pound of bacon"


In [2]:
# Solution from https://laconicwolf.com/2018/05/29/cryptopals-challenge-3-single-byte-xor-cipher-in-python/
def get_english_score(input_bytes):
    """Compares each input byte to a character frequency 
    chart and returns the score of a message based on the
    relative frequency the characters occur in the English
    language
    """

    # From https://en.wikipedia.org/wiki/Letter_frequency
    # with the exception of ' ', which I estimated.
    character_frequencies = {
        'a': .08167, 'b': .01492, 'c': .02782, 'd': .04253,
        'e': .12702, 'f': .02228, 'g': .02015, 'h': .06094,
        'i': .06094, 'j': .00153, 'k': .00772, 'l': .04025,
        'm': .02406, 'n': .06749, 'o': .07507, 'p': .01929,
        'q': .00095, 'r': .05987, 's': .06327, 't': .09056,
        'u': .02758, 'v': .00978, 'w': .02360, 'x': .00150,
        'y': .01974, 'z': .00074, ' ': .13000
    }
    return sum([character_frequencies.get(chr(byte), 0) for byte in input_bytes.lower()])

def single_char_xor(input_bytes, char_value):
    """Returns the result of each byte being XOR'd with a single value.
    """
    output_bytes = b''
    for byte in input_bytes:
        output_bytes += bytes([byte ^ char_value])
    return output_bytes

hexstring = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'
ciphertext = bytes.fromhex(hexstring)
potential_messages = []
for key_value in range(256):
    message = single_char_xor(ciphertext, key_value)
    score = get_english_score(message)
    data = {
        'message': message,
        'score': score,
        'key': key_value
        }
    potential_messages.append(data)
best_score = sorted(potential_messages, key=lambda x: x['score'], reverse=True)[0]
for item in best_score:
    print("{}: {}".format(item.title(), best_score[item]))

Message: b"Cooking MC's like a pound of bacon"
Score: 2.14329
Key: 88
