In [90]:
import base64
import json
import hmac
import hashlib
import time

In [91]:
type('sbd')
type(b'sbd')

str

bytes

In [92]:
data = b'abc'
len(data)
data[0]
data[1]
data[2]

3

97

98

99

In [93]:
type(data[0])
chr(data[0])
chr(data[1])
chr(data[2])

int

'a'

'b'

'c'

In [94]:
chr(97)
chr(98)
chr(99)

'a'

'b'

'c'

In [95]:
chr(255)  # last char of ascii table

'Ã¿'

In [96]:
'\x41'
'\x42'
'\x43'

'A'

'B'

'C'

In [97]:
type('\x41')
'\x41' == 'A'

str

True

In [98]:
print('\x41 is the first alphabet')
print('\\x41 is the first alphabet')
print('x41 is the first alphabet')

A is the first alphabet
\x41 is the first alphabet
x41 is the first alphabet


In [99]:
print('launch\\missile')  # \ is used to escape symbols part of syntax
print(r'launch\missile')  # raw string

launch\missile
launch\missile


In [100]:
data
data.decode()

''.join([chr(byte) for byte in data])

b'abc'

'abc'

'abc'

In [101]:
f'{97}'
f'{97:.2f}'
f'{97:.3f}'

f'{23412}'
f'{23412:,}'
f'{23412:,.2f}'
f'{23412:,.3f}'

'97'

'97.00'

'97.000'

'23412'

'23,412'

'23,412.00'

'23,412.000'

In [102]:
f'{0:b}'
f'{1:b}'
f'{2:b}'
f'{4:b}'
f'{8:b}'
f'{16:b}'
f'{17:b}'

'0'

'1'

'10'

'100'

'1000'

'10000'

'10001'

In [103]:
f'{0:b}'
f'{0:5b}'
f'{0:10b}'
f'{0:12b}'

'0'

'    0'

'         0'

'           0'

In [104]:
for i in range(1, 10):
  f'{0:{i}b}'

'0'

' 0'

'  0'

'   0'

'    0'

'     0'

'      0'

'       0'

'        0'

In [105]:
f'{0:b}'
f'{0:5b}'
f'{2:05b}'
f'{15:05b}'  # 1 padding req
f'{16:05b}'  # from 16 no padding req
f'{31:05b}'  # 5 digits binary num till 31
f'{195:05b}'

'0'

'    0'

'00010'

'01111'

'10000'

'11111'

'11000011'

In [106]:
data = b'abc'
for i in data:
  print(i, chr(i), f'{i:08b}')

97 a 01100001
98 b 01100010
99 c 01100011


In [107]:
list(b'abc')

bytes([97, 98, 99])

[97, 98, 99]

b'abc'

In [108]:
for i in [97, 98, 99]:
  f'{i:b}'

' '.join([f'{i:b}' for i in list(data)])

'1100001'

'1100010'

'1100011'

'1100001 1100010 1100011'

In [109]:
int('1100001', 2)
chr(int('1100001', 2))

int('1100001')

97

'a'

1100001

In [110]:
binary_str = '1100001 1100010 1100011'

for byte in binary_str.split():
  print(byte, int(byte, 2), chr(int(byte, 2)))

1100001 97 a
1100010 98 b
1100011 99 c


# Implementing base64 encoding/decoding from scratch

In [111]:
import string
base64_symbols = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+/'
base64_symbols
len(base64_symbols)

'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'

64

In [112]:
for i, symbol in enumerate(base64_symbols):
  print(i, f'{i:06b}', symbol)

0 000000 A
1 000001 B
2 000010 C
3 000011 D
4 000100 E
5 000101 F
6 000110 G
7 000111 H
8 001000 I
9 001001 J
10 001010 K
11 001011 L
12 001100 M
13 001101 N
14 001110 O
15 001111 P
16 010000 Q
17 010001 R
18 010010 S
19 010011 T
20 010100 U
21 010101 V
22 010110 W
23 010111 X
24 011000 Y
25 011001 Z
26 011010 a
27 011011 b
28 011100 c
29 011101 d
30 011110 e
31 011111 f
32 100000 g
33 100001 h
34 100010 i
35 100011 j
36 100100 k
37 100101 l
38 100110 m
39 100111 n
40 101000 o
41 101001 p
42 101010 q
43 101011 r
44 101100 s
45 101101 t
46 101110 u
47 101111 v
48 110000 w
49 110001 x
50 110010 y
51 110011 z
52 110100 0
53 110101 1
54 110110 2
55 110111 3
56 111000 4
57 111001 5
58 111010 6
59 111011 7
60 111100 8
61 111101 9
62 111110 +
63 111111 /


In [113]:
# '101' -> 5 -> 'F' -> 5 -> '000101' [data is corrupted after decoding]

bad_chunk = '101'

# encoding:
int(bad_chunk, 2)  # chunk -> num
base64_symbols[5]  # num -> symbol

# decoding:
base64_symbols.index('F')
f'{5:06b}'  # num -> chunk

5

'F'

5

'000101'

In [114]:
# '100101' -> 37 -> 'l' -> 37 -> '100101'

good_chunk = '100101'  # 6 bits is ok

# encoding
int(good_chunk, 2)
base64_symbols[37]

# decoding:
base64_symbols.index('l')
f'{37:06b}'  # num -> chunk

37

'l'

37

'100101'

In [115]:
# data = b'vidu is my hero'
data = b'vidu is my boy'
type(data)

bytes

### step 1: Bytes -> Binary str

In [116]:
binary_str = ''.join([f'{i:08b}' for i in data])
binary_str

len(binary_str)
len(binary_str) % 6

'0111011001101001011001000111010100100000011010010111001100100000011011010111100100100000011000100110111101111001'

112

4

In [117]:
r = len(data) % 3
if r == 1:
  binary_str += '0'*4
if r == 2:
  binary_str += '0'*2

### step 2: split binary string in chunks of 6 bits

In [118]:
# Problem: Split a str in chunks of length x

fruits = 'applemangolemon'

split_from_where = list(range(0, len(fruits), 5))
split_from_where

for i in split_from_where:
  print(i, i+5, fruits[i:i+5])

[0, 5, 10]

0 5 apple
5 10 mango
10 15 lemon


In [119]:
chunks = [binary_str[i:i+6] for i in range(0, len(binary_str), 6)]
print(chunks)

['011101', '100110', '100101', '100100', '011101', '010010', '000001', '101001', '011100', '110010', '000001', '101101', '011110', '010010', '000001', '100010', '011011', '110111', '100100']


### step 3: chunks -> nums (0-63)

In [120]:
nums = [int(chunk, 2) for chunk in chunks]
nums

[29, 38, 37, 36, 29, 18, 1, 41, 28, 50, 1, 45, 30, 18, 1, 34, 27, 55, 36]

### step 4: nums -> symbols

In [121]:
encoded_str = ''.join([base64_symbols[num] for num in nums])
encoded_str

'dmlkdSBpcyBteSBib3k'

In [122]:
base64.b64encode(data).decode()
base64.b64encode(data).decode() == encoded_str

'dmlkdSBpcyBteSBib3k='

False

# Decoding
### decoding is done to get back data from encoded string

### step 1: symbols -> nums

In [123]:
nums = [base64_symbols.index(symbol) for symbol in encoded_str]
nums

[29, 38, 37, 36, 29, 18, 1, 41, 28, 50, 1, 45, 30, 18, 1, 34, 27, 55, 36]

### step 2: nums -> chunks (6 bit binary)

In [124]:
chunks = [f'{num:06b}' for num in nums]
print(chunks)

['011101', '100110', '100101', '100100', '011101', '010010', '000001', '101001', '011100', '110010', '000001', '101101', '011110', '010010', '000001', '100010', '011011', '110111', '100100']


### step 3: join chunks

In [125]:
binary_str = ''.join(chunks)
binary_str

'011101100110100101100100011101010010000001101001011100110010000001101101011110010010000001100010011011110111100100'

### step 4: split in chunks (8 bits)

In [126]:
chunks = [binary_str[i:i+8] for i in range(0, len(binary_str), 8)]
print(chunks)

['01110110', '01101001', '01100100', '01110101', '00100000', '01101001', '01110011', '00100000', '01101101', '01111001', '00100000', '01100010', '01101111', '01111001', '00']


### step 5: chunks -> nums

In [127]:
nums = [int(chunk, 2) for chunk in chunks]
nums

[118, 105, 100, 117, 32, 105, 115, 32, 109, 121, 32, 98, 111, 121, 0]

### step 6: nums -> bytes

In [128]:
decoded_data = bytes(nums)
decoded_data

b'vidu is my boy\x00'

In [129]:
data == decoded_data

False

In [130]:
base64.b64decode(encoded_str)
base64.b64decode(encoded_str) == decoded_data

Error: Incorrect padding