Skip to content

Commit e177198

Browse files
laurent-pckcclauss
authored andcommitted
Add unicode support in ciphers/base64_cipher.py script. (TheAlgorithms#1316)
* Add unicode support in ciphers/base64_cipher.py script. * Add doctests and correct the padding length computation in base64_cipher.
1 parent 2197bfa commit e177198

File tree

1 file changed

+37
-20
lines changed

1 file changed

+37
-20
lines changed

ciphers/base64_cipher.py

+37-20
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,52 @@
1-
def encodeBase64(text):
2-
base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
3-
1+
def encode_base64(text):
2+
r"""
3+
>>> encode_base64('WELCOME to base64 encoding 😁')
4+
'V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ=='
5+
>>> encode_base64('AÅᐃ𐀏🤓')
6+
'QcOF4ZCD8JCAj/CfpJM='
7+
>>> encode_base64('A'*60)
8+
'QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB\r\nQUFB'
9+
"""
10+
base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
11+
12+
byte_text = bytes(text, "utf-8") # put text in bytes for unicode support
413
r = "" # the result
5-
c = 3 - len(text) % 3 # the length of padding
14+
c = -len(byte_text) % 3 # the length of padding
615
p = "=" * c # the padding
7-
s = text + "\0" * c # the text to encode
16+
s = byte_text + b"\x00" * c # the text to encode
817

918
i = 0
1019
while i < len(s):
1120
if i > 0 and ((i / 3 * 4) % 76) == 0:
12-
r = r + "\r\n"
21+
r = r + "\r\n" # for unix newline, put "\n"
1322

14-
n = (ord(s[i]) << 16) + (ord(s[i + 1]) << 8) + ord(s[i + 2])
23+
n = (s[i] << 16) + (s[i + 1] << 8) + s[i + 2]
1524

1625
n1 = (n >> 18) & 63
1726
n2 = (n >> 12) & 63
1827
n3 = (n >> 6) & 63
1928
n4 = n & 63
2029

21-
r += base64chars[n1] + base64chars[n2] + base64chars[n3] + base64chars[n4]
30+
r += base64_chars[n1] + base64_chars[n2] + base64_chars[n3] + base64_chars[n4]
2231
i += 3
2332

2433
return r[0 : len(r) - len(p)] + p
2534

2635

27-
def decodeBase64(text):
28-
base64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
36+
def decode_base64(text):
37+
r"""
38+
>>> decode_base64('V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ==')
39+
'WELCOME to base64 encoding 😁'
40+
>>> decode_base64('QcOF4ZCD8JCAj/CfpJM=')
41+
'AÅᐃ𐀏🤓'
42+
>>> decode_base64("QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB\r\nQUFB")
43+
'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
44+
"""
45+
base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
2946
s = ""
3047

3148
for i in text:
32-
if i in base64chars:
49+
if i in base64_chars:
3350
s += i
3451
c = ""
3552
else:
@@ -43,28 +60,28 @@ def decodeBase64(text):
4360
if c == "==":
4461
p = "AA"
4562

46-
r = ""
63+
r = b""
4764
s = s + p
4865

4966
i = 0
5067
while i < len(s):
5168
n = (
52-
(base64chars.index(s[i]) << 18)
53-
+ (base64chars.index(s[i + 1]) << 12)
54-
+ (base64chars.index(s[i + 2]) << 6)
55-
+ base64chars.index(s[i + 3])
69+
(base64_chars.index(s[i]) << 18)
70+
+ (base64_chars.index(s[i + 1]) << 12)
71+
+ (base64_chars.index(s[i + 2]) << 6)
72+
+ base64_chars.index(s[i + 3])
5673
)
5774

58-
r += chr((n >> 16) & 255) + chr((n >> 8) & 255) + chr(n & 255)
75+
r += bytes([(n >> 16) & 255]) + bytes([(n >> 8) & 255]) + bytes([n & 255])
5976

6077
i += 4
6178

62-
return r[0 : len(r) - len(p)]
79+
return str(r[0 : len(r) - len(p)], "utf-8")
6380

6481

6582
def main():
66-
print(encodeBase64("WELCOME to base64 encoding"))
67-
print(decodeBase64(encodeBase64("WELCOME to base64 encoding")))
83+
print(encode_base64("WELCOME to base64 encoding 😁"))
84+
print(decode_base64(encode_base64("WELCOME to base64 encoding 😁")))
6885

6986

7087
if __name__ == "__main__":

0 commit comments

Comments
 (0)