## Convert hex to base64
The string:

49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d

Should produce:

SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t

Cryptopals Rule

**Always operate on raw bytes, never on encoded strings. Only use hex and base64 for pretty-printing.**

In [1]:
import base64

In [2]:
def hex2base64(raw: str):
    '''
    >>> hex2base64('49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d')
    SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t
    '''
    Str = ''.join([chr(int(raw[i:i+2], 16)) for i in range(0, len(raw), 2)])
    print(Str)
    return base64.b64encode(Str.encode()).decode()

In [31]:
def base64_to_byte(base64_string):
    return base64.b64decode(base64_string)

base64_string = 'SGVsbG8gd29ybGQ='  # 这是"Hello world"的Base64编码
byte_string = base64_to_byte(base64_string)
print(byte_string)


b'Hello world'


In [3]:
def hex2ascii(raw: str):
    '''
    >>> hex2ascii('61616161616161616161616161616161616161616161616161616161616161616161)
    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
    '''
    return ''.join([chr(int(raw[i:i+2], 16)) for i in range(0, len(raw), 2)])

In [4]:
def ascii2hex(raw: str):
    '''
    >>> ascii2hex('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
    61616161616161616161616161616161616161616161616161616161616161616161
    >>> ascii2hex('\n')
    0a
    '''
    return ''.join([hex(ord(i))[2:].zfill(2) for i in raw])
ascii2hex('\n')

'0a'

In [5]:
def getHex(raw: int):
    tep = hex(raw)[2:]
    if len(tep) == 1:
        return '0' + tep
    return tep
getHex(int(ascii2hex('E'), 16) ^ int(ascii2hex('I'), 16))

'0c'

In [6]:
hexStr = '49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d'
hex2base64(hexStr)

I'm killing your brain like a poisonous mushroom


'SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t'

## Fixed XOR
Write a function that takes two equal-length buffers and produces their XOR combination.

If your function works properly, then when you feed it the string:

1c0111001f010100061a024b53535009181c

... after hex decoding, and when XOR'd against:

686974207468652062756c6c277320657965

... should produce:

746865206b696420646f6e277420706c6179

In [7]:
def fixedXor(str1: str, str2: str) -> str:
    '''
    >>> fixedXor('1c0111001f010100061a024b53535009181c', '686974207468652062756c6c277320657965')
    746865206b696420646f6e277420706c6179
    '''
    dec1 = [int(str1[i:i+2], 16) for i in range(0, len(str1), 2)]
    dec2 = [int(str2[i:i+2], 16) for i in range(0, len(str2), 2)]
    return ''.join([hex(i ^ j)[2:].zfill(2) for i,j in zip(dec1, dec2)])
fixedXor('1c0111001f010100061a024b53535009181c', '686974207468652062756c6c277320657965')

'746865206b696420646f6e277420706c6179'

## Single-byte XOR cipher

In [8]:
raw = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'
hex2base64(raw)

77316?x+x413=x9x(7-6<x7>x:9;76


'Gzc3MzE2P3gVG38reDQxMz14OXgoNy02PHg3Png6OTs3Ng=='

In [9]:
import string


In [10]:
latter_frequency = {
    'a': .08167, 'b': .01492, 'c': .02782, 'd': .04253,
        'e': .12702, 'f': .02228, 'g': .02015, 'h': .06094,
        'i': .06094, 'j': .00153, 'k': .00772, 'l': .04025,
        'm': .02406, 'n': .06749, 'o': .07507, 'p': .01929,
        'q': .00095, 'r': .05987, 's': .06327, 't': .09056,
        'u': .02758, 'v': .00978, 'w': .02360, 'x': .00150,
        'y': .01974, 'z': .00074, ' ': .15000
}

def scoring(t):
    '''
    >>> scoring("Cooking MC's like a pound of bacon")
    2.2632899999999996
    '''
    return sum([latter_frequency.get(i,0) for i in t.lower()])  


# 强烈谴责下面这种写法，漏掉了空格！ cnm
def scoring2(raw: str):
    letter = string.ascii_letters
    cnt = 0
    for i in raw:
        if i in letter:
            cnt += latter_frequency[i.lower()]
    return cnt 

scoring("Cooking MC's like a pound of bacon")

2.2632899999999996

In [11]:
def findSingleXor(raw: str):
    letter = string.ascii_letters
    cmp = 0
    returnAns = ''
    for i in letter:
        hh = ''.join([str(hex(ord(i))[2:]) for _ in range(len(raw)>>1)])
        res = hex2ascii(fixedXor(hh, raw))
        if scoring(res) > cmp:
            cmp = scoring(res)
            returnAns = res
    return returnAns, cmp

findSingleXor(raw)

("Cooking MC's like a pound of bacon", 2.2632899999999996)

Cooking MC's like a pound of bacon

In [12]:
raw = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'

def findSingleXor(raw: str):
    letter = [chr(i) for i in range(255)]
    cmp = 0
    returnAns = ''
    singleChar = ''
    for i in letter:
        hh = ''.join([str(hex(ord(i))[2:]) for _ in range(len(raw)>>1)])
        res = hex2ascii(fixedXor(hh, raw))
        if scoring(res) > cmp:
            cmp = scoring(res)
            returnAns = res
            singleChar = i
    return returnAns, cmp, singleChar

findSingleXor(raw)

("Cooking MC's like a pound of bacon", 2.2632899999999996, 'X')

In [13]:
# 打开文件
with open('4.txt', 'r') as file:
    # 读取每一行
    lines = file.readlines()

# 输出每一行
minni = 0
minn = 0
minnstr = ''
for i, line in enumerate(lines):
    line = line[0:-1]
    if (findSingleXor(line)[1] == 0):
        continue
    if (findSingleXor(line)[1] > minn):
        minni = i
        minnstr = findSingleXor(line)[0]
        minn = findSingleXor(line)[1]
print("The max score is the " + str(minni) + "th str and the plaintext is " + minnstr)

The max score is the 170th str and the plaintext is Now that the party is jumping


The max score of 170 is ('Now that the party is jumping\n', 1.3847900000000002)


In [14]:
minnstr

'Now that the party is jumping\n'

## Implement repeating-key XOR  

In [15]:
def repeatXor(raw: str, key: str):
    '''
    >>> repeatXor("Burning 'em, if you ain't quick and nimble\nI go crazy when I hear a cymbal", 'ICE')
    0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f
    '''
    key = (key * (int(len(raw) / len(key)) + 1))[:len(raw)]
    return fixedXor(ascii2hex((raw)), ascii2hex(key))
raw = """Burning 'em, if you ain't quick and nimble
I go crazy when I hear a cymbal"""
key = 'ICE'
repeatXor(raw, key)

'0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f'

主要bug：没有考虑到使用hex()函数的时候，输出有可能是单个字符，这时候需要在前面补0
涉及到两个函数: ascii2hex() 和 fixedXor()

In [16]:
def ascii2bin(raw: str):
    '''
    >>> ascii2bin('aa')
    0110000101100001
    '''
    return ''.join([bin(ord(i))[2:].zfill(8) for i in raw])
ascii2bin('aa')

'0110000101100001'

## Break repeating-key XOR

In [17]:
def hamming(raw1: str, raw2: str):
    '''
    >>> hamming('this is a test', 'wokka wokka!!!')
    37
    '''
    bias = 0
    if (len(raw1) > len(raw2)):
        bias = len(raw1) - len((raw2))
        raw1 = raw1[:len(raw2)]
    elif (len(raw1) < len(raw2)):
        bias = len(raw2) - len((raw1))
        raw2 = raw2[:len(raw1)]
    raw1 = ascii2bin(raw1)
    raw2 = ascii2bin(raw2)
    return sum([raw1[i] != raw2[i] for i in range(len(raw1))])+bias

In [18]:
cipherFile = ''
with open('6.txt', 'r') as file:
    # 读取每一行
    lines = file.readlines()
for line in lines:
    cipherFile += line[0:-1]
strCipherFile = base64.b64decode(cipherFile).decode()

In [19]:
candidate = []
strCipherFile = base64.b64decode(cipherFile).decode()
for KeySize in range(2, 40):
    ss1 = strCipherFile[:KeySize]
    ss2 = strCipherFile[KeySize: 2 * KeySize]
    candidate.append((hamming(ss1, ss2)/KeySize, KeySize))
sorted(candidate)

[(1.2, 5),
 (2.0, 3),
 (2.5, 2),
 (2.5384615384615383, 13),
 (2.6363636363636362, 11),
 (2.7, 20),
 (2.7777777777777777, 18),
 (2.8684210526315788, 38),
 (2.933333333333333, 15),
 (2.9411764705882355, 17),
 (3.0, 7),
 (3.0, 8),
 (3.0, 16),
 (3.0476190476190474, 21),
 (3.096774193548387, 31),
 (3.108108108108108, 37),
 (3.1739130434782608, 23),
 (3.206896551724138, 29),
 (3.2142857142857144, 14),
 (3.24, 25),
 (3.25, 12),
 (3.257142857142857, 35),
 (3.272727272727273, 33),
 (3.3, 10),
 (3.3076923076923075, 39),
 (3.3157894736842106, 19),
 (3.323529411764706, 34),
 (3.375, 24),
 (3.4166666666666665, 36),
 (3.433333333333333, 30),
 (3.4375, 32),
 (3.4814814814814814, 27),
 (3.5, 4),
 (3.5, 26),
 (3.5357142857142856, 28),
 (3.5555555555555554, 9),
 (3.727272727272727, 22),
 (4.0, 6)]

In [20]:
candidate = []
strCipherFile = base64.b64decode(cipherFile).decode()
for KeySize in range(1, 40):
    ss1 = strCipherFile[:KeySize]
    ss2 = strCipherFile[KeySize: 2 * KeySize]
    ss3 = strCipherFile[2 * KeySize: 3 * KeySize]
    ss4 = strCipherFile[3 * KeySize: 4 * KeySize]
    # 计算所有可能的汉明距离
    hamming_distances = [hamming(ss1, ss2), hamming(ss1, ss3), hamming(ss1, ss4), 
                    hamming(ss2, ss3), hamming(ss2, ss4), 
                    hamming(ss3, ss4)]
    # 计算平均汉明距离
    average_hamming_distance = sum(hamming_distances) / len(hamming_distances)

    candidate.append(((average_hamming_distance/KeySize), KeySize))
print(sorted(candidate))

[(2.7471264367816093, 29), (2.9, 5), (3.0, 2), (3.0208333333333335, 24), (3.0714285714285716, 7), (3.0833333333333335, 6), (3.0964912280701755, 19), (3.1, 20), (3.111111111111111, 3), (3.125, 8), (3.1547619047619047, 28), (3.1777777777777776, 30), (3.181372549019608, 34), (3.185185185185185, 9), (3.1965811965811968, 39), (3.2, 10), (3.2058823529411766, 17), (3.2083333333333335, 16), (3.25, 18), (3.25, 26), (3.2522522522522523, 37), (3.2604166666666665, 32), (3.260869565217391, 23), (3.267543859649123, 38), (3.2777777777777777, 15), (3.287878787878788, 33), (3.3015873015873014, 21), (3.304761904761905, 35), (3.3118279569892475, 31), (3.313333333333333, 25), (3.3205128205128203, 13), (3.3333333333333335, 4), (3.3452380952380953, 14), (3.3456790123456788, 27), (3.3787878787878785, 22), (3.384259259259259, 36), (3.424242424242424, 11), (3.4583333333333335, 12), (3.5, 1)]


keysize 的候选值：5，3

In [21]:
KeySize = 29
strCipherFile = strCipherFile.zfill(2871+29)
breakCipher = [strCipherFile[i:i + KeySize] for i in range(0, len(strCipherFile), KeySize)]
transBreakCipher = [None for _ in range(len(breakCipher[0]))]
for i in range(KeySize):
    transBreakCipher[i] = ''.join([ss[i] for ss in breakCipher])

key = ''
for i in transBreakCipher:
    key += findSingleXor(ascii2hex(i))[2]
print(key)

nator X: Bring the noiseTermi


这里卡住了好一会，方向搞错了。。。我以为 findSingleXor 的结果应该是有特征的。。。但是它是又一列的字符构成的字符串，当然不会有规律了。。。重点是猜出这一位的 key，然后拼起来，最后用来解密密文，这时的结果才是有规律的。。。

In [22]:
print(hex2ascii(repeatXor(strCipherFile, key)))

^QD_Bh
rBY^WDXU^_YCUI'm back and I'm ringin' the bell 
A rockin' on the mike while the fly girls yell 
In ecstasy in the back of me 
Well that's my DJ Deshay cuttin' all them Z's 
Hittin' hard and the girlies goin' crazy 
Vanilla's on the mike, man I'm not lazy. 

I'm lettin' my drug kick in 
It controls my mouth and I begin 
To just let it flow, let my concepts go 
My posse's to the side yellin', Go Vanilla Go! 

Smooth 'cause that's the way I will be 
And if you don't give a damn, then 
Why you starin' at me 
So get off 'cause I control the stage 
There's no dissin' allowed 
I'm in my own phase 
The girlies sa y they love me and that is ok 
And I can dance better than any kid n' play 

Stage 2 -- Yea the one ya' wanna listen to 
It's off my head so let the beat play through 
So I can funk it up and make it sound good 
1-2-3 Yo -- Knock on some wood 
For good luck, I like my rhymes atrocious 
Supercalafragilisticexpialidocious 
I'm an effect and that you can bet 
I can take a fly 

## AES in ECB mode

In [23]:
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad, unpad

def encrypt(raw: str, key: str) -> bytes:
    """Encrypt a string using AES-128 ECB mode"""
    raw = pad(raw.encode(), 16)
    cipher = AES.new(key.encode('utf-8'), AES.MODE_ECB)
    return cipher.encrypt(raw)

def decrypt(enc: bytes, key: str) -> str:
    """Decrypt a ciphertext using AES-128 ECB mode"""
    cipher = AES.new(key.encode('utf-8'), AES.MODE_ECB)
    return unpad(cipher.decrypt(enc), 16).decode()

In [35]:
with open('7.txt', 'r') as file:
    lines = file.readlines()

cipherText = ''.join([i[:-1] for i in lines]) + 'H' # 原来的文件最后没有换行
cipherText = base64.b64decode(cipherText)

key = 'YELLOW SUBMARINE'

cipher = AES.new(key.encode('utf-8'), AES.MODE_ECB)
print(cipher.decrypt(cipherText).decode())

I'm back and I'm ringin' the bell 
A rockin' on the mike while the fly girls yell 
In ecstasy in the back of me 
Well that's my DJ Deshay cuttin' all them Z's 
Hittin' hard and the girlies goin' crazy 
Vanilla's on the mike, man I'm not lazy. 

I'm lettin' my drug kick in 
It controls my mouth and I begin 
To just let it flow, let my concepts go 
My posse's to the side yellin', Go Vanilla Go! 

Smooth 'cause that's the way I will be 
And if you don't give a damn, then 
Why you starin' at me 
So get off 'cause I control the stage 
There's no dissin' allowed 
I'm in my own phase 
The girlies sa y they love me and that is ok 
And I can dance better than any kid n' play 

Stage 2 -- Yea the one ya' wanna listen to 
It's off my head so let the beat play through 
So I can funk it up and make it sound good 
1-2-3 Yo -- Knock on some wood 
For good luck, I like my rhymes atrocious 
Supercalafragilisticexpialidocious 
I'm an effect and that you can bet 
I can take a fly girl and make her wet. 


## Detect AES in ECB mode

In [2]:
with open('8.txt', 'r') as file:
    lines = file.readlines()

lines = [i[:-1] for i in lines]
# 16 bytes == 16 hex values -> length of 32
splitLines = [set([ss[i:i+32] for i in range(0, len(ss), 32)]) for ss in lines]
print(sorted([(len(hh), i) for i, hh in enumerate(splitLines)]))

[(7, 132), (10, 0), (10, 1), (10, 2), (10, 3), (10, 4), (10, 5), (10, 6), (10, 7), (10, 8), (10, 9), (10, 10), (10, 11), (10, 12), (10, 13), (10, 14), (10, 15), (10, 16), (10, 17), (10, 18), (10, 19), (10, 20), (10, 21), (10, 22), (10, 23), (10, 24), (10, 25), (10, 26), (10, 27), (10, 28), (10, 29), (10, 30), (10, 31), (10, 32), (10, 33), (10, 34), (10, 35), (10, 36), (10, 37), (10, 38), (10, 39), (10, 40), (10, 41), (10, 42), (10, 43), (10, 44), (10, 45), (10, 46), (10, 47), (10, 48), (10, 49), (10, 50), (10, 51), (10, 52), (10, 53), (10, 54), (10, 55), (10, 56), (10, 57), (10, 58), (10, 59), (10, 60), (10, 61), (10, 62), (10, 63), (10, 64), (10, 65), (10, 66), (10, 67), (10, 68), (10, 69), (10, 70), (10, 71), (10, 72), (10, 73), (10, 74), (10, 75), (10, 76), (10, 77), (10, 78), (10, 79), (10, 80), (10, 81), (10, 82), (10, 83), (10, 84), (10, 85), (10, 86), (10, 87), (10, 88), (10, 89), (10, 90), (10, 91), (10, 92), (10, 93), (10, 94), (10, 95), (10, 96), (10, 97), (10, 98), (10, 99),

In [3]:
print(lines[132])

d880619740a8a19b7840a8a31c810a3d08649af70dc06f4fd5d2d69c744cd283e2dd052f6b641dbf9d11b0348542bb5708649af70dc06f4fd5d2d69c744cd2839475c9dfdbc1d46597949d9c7e82bf5a08649af70dc06f4fd5d2d69c744cd28397a93eab8d6aecd566489154789a6b0308649af70dc06f4fd5d2d69c744cd283d403180c98c8f6db1f2a3f9c4040deb0ab51b29933f2c123c58386b06fba186a


In [26]:
print(sorted([(len(hh), i) for i, hh in enumerate(splitLines)]))

[(7, 132), (10, 0), (10, 1), (10, 2), (10, 3), (10, 4), (10, 5), (10, 6), (10, 7), (10, 8), (10, 9), (10, 10), (10, 11), (10, 12), (10, 13), (10, 14), (10, 15), (10, 16), (10, 17), (10, 18), (10, 19), (10, 20), (10, 21), (10, 22), (10, 23), (10, 24), (10, 25), (10, 26), (10, 27), (10, 28), (10, 29), (10, 30), (10, 31), (10, 32), (10, 33), (10, 34), (10, 35), (10, 36), (10, 37), (10, 38), (10, 39), (10, 40), (10, 41), (10, 42), (10, 43), (10, 44), (10, 45), (10, 46), (10, 47), (10, 48), (10, 49), (10, 50), (10, 51), (10, 52), (10, 53), (10, 54), (10, 55), (10, 56), (10, 57), (10, 58), (10, 59), (10, 60), (10, 61), (10, 62), (10, 63), (10, 64), (10, 65), (10, 66), (10, 67), (10, 68), (10, 69), (10, 70), (10, 71), (10, 72), (10, 73), (10, 74), (10, 75), (10, 76), (10, 77), (10, 78), (10, 79), (10, 80), (10, 81), (10, 82), (10, 83), (10, 84), (10, 85), (10, 86), (10, 87), (10, 88), (10, 89), (10, 90), (10, 91), (10, 92), (10, 93), (10, 94), (10, 95), (10, 96), (10, 97), (10, 98), (10, 99),