## Pickle

    serialization
        python objects - > flattened content  -- string/file

            dump  -- into a file
            dumps -- into a python string

    De-serialization
        string/file --> unflattened content -> python objects

            load  -- from a file
            loads -- from a python string

In [1]:
import pickle

###  Pickle to string object

In [2]:
# Serialize an object
data = {"name": "John", "age": 30}
serialized_data = pickle.dumps(data)

serialized_data

b'\x80\x04\x95\x1b\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x04name\x94\x8c\x04John\x94\x8c\x03age\x94K\x1eu.'

In [3]:
# Deserialize the object
deserialized_data = pickle.loads(serialized_data)

deserialized_data

{'name': 'John', 'age': 30}

### Pickle to file

In [4]:
# Write to a pickle file
data = {"name": "John", "age": 30}
with open("data.pickle", "wb") as file:
    pickle.dump(data, file)

In [6]:
# Read from a pickle file
with open("data.pickle", "rb") as file:
    loaded_data = pickle.load(file)


loaded_data

{'name': 'John', 'age': 30}

### Pickling Multiple Objects

In [7]:
# Pickle multiple objects
data1 = {"name": "John", "age": 30}
data2 = {"name": "Alice", "age": 25}
serialized_data = pickle.dumps((data1, data2))
print(f"{serialized_data =}")

# Unpickle multiple objects
deserialized_data = pickle.loads(serialized_data)
obj1, obj2 = deserialized_data

print(f"{obj1 =}")
print(f"{obj2 =}")

serialized_data =b'\x80\x04\x95/\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x04name\x94\x8c\x04John\x94\x8c\x03age\x94K\x1eu}\x94(h\x01\x8c\x05Alice\x94h\x03K\x19u\x86\x94.'
obj1 ={'name': 'John', 'age': 30}
obj2 ={'name': 'Alice', 'age': 25}


### Pickling and Unpickling with Compression

In [8]:
import pickle
import gzip

data = {"name": "John", "age": 30}

# Pickle with compression
with gzip.open("data.pickle.gz", "wb") as file:
    pickle.dump(data, file)

# Unpickle with compression
with gzip.open("data.pickle.gz", "rb") as file:
    loaded_data = pickle.load(file)

print(f"{loaded_data =}")

loaded_data ={'name': 'John', 'age': 30}


### Pickling and Unpickling Objects with External Dependencies

In [9]:
import pickle
import numpy as np

data = {"array": np.array([1, 2, 3])}

# Pickle an object with NumPy array
serialized_data = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
print("serialized_data =", serialized_data)

# Unpickle an object with NumPy array
deserialized_data = pickle.loads(serialized_data)
print("deserialized_data =", deserialized_data)

serialized_data = b'\x80\x05\x95\x8a\x00\x00\x00\x00\x00\x00\x00}\x94\x8c\x05array\x94\x8c\x12numpy.core.numeric\x94\x8c\x0b_frombuffer\x94\x93\x94(\x96\x0c\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x94\x8c\x05numpy\x94\x8c\x05dtype\x94\x93\x94\x8c\x02i4\x94\x89\x88\x87\x94R\x94(K\x03\x8c\x01<\x94NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00t\x94bK\x03\x85\x94\x8c\x01C\x94t\x94R\x94s.'
deserialized_data = {'array': array([1, 2, 3])}


### Handling Unpickling Errors

In [10]:
import pickle

# Try to unpickle an object, handle errors
try:
    with open("data.pickle", "rb") as file:
        loaded_data = pickle.load(file)
except (pickle.UnpicklingError, FileNotFoundError) as e:
    print(f"Error loading pickle file: {e}")

### Pickling and Unpickling Large Objects Incrementally

In [6]:
import pickle
import io

data = [1, 2, 3] * int(1e6)
chunk_size = 1000

# Pickle large object incrementally
serialized_data = io.BytesIO()
pickler= pickle.Pickler(serialized_data)
for i in range(0, len(data), chunk_size):
    pickler.dump(data[i : i + chunk_size])

In [7]:

# Unpickle large object incrementally
serialized_data.seek(0)
unpickler = pickle.Unpickler(serialized_data)

deserialized_data = []
for i in range(0, len(data), chunk_size):
    try:
        chunk = unpickler.load()
        deserialized_data.extend(chunk)
    except EOFError:
        break

deserialized_data

[1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,
 2,
 3,
 1,


## Serialization for all types

In [15]:
data_types = [123, 23.121, "python", True, None, [1, 2, 3], (1, 2, 3), {1, 2, 3}, {1:2, 3:4}]

for each in data_types:
    print( each, pickle.dumps(each), pickle.loads(pickle.dumps(each)))

123 b'\x80\x04K{.' 123
23.121 b'\x80\x04\x95\n\x00\x00\x00\x00\x00\x00\x00G@7\x1e\xf9\xdb"\xd0\xe5.' 23.121
python b'\x80\x04\x95\n\x00\x00\x00\x00\x00\x00\x00\x8c\x06python\x94.' python
True b'\x80\x04\x88.' True
None b'\x80\x04N.' None
[1, 2, 3] b'\x80\x04\x95\x0b\x00\x00\x00\x00\x00\x00\x00]\x94(K\x01K\x02K\x03e.' [1, 2, 3]
(1, 2, 3) b'\x80\x04\x95\t\x00\x00\x00\x00\x00\x00\x00K\x01K\x02K\x03\x87\x94.' (1, 2, 3)
{1, 2, 3} b'\x80\x04\x95\x0b\x00\x00\x00\x00\x00\x00\x00\x8f\x94(K\x01K\x02K\x03\x90.' {1, 2, 3}
{1: 2, 3: 4} b'\x80\x04\x95\r\x00\x00\x00\x00\x00\x00\x00}\x94(K\x01K\x02K\x03K\x04u.' {1: 2, 3: 4}


## Issue


In [26]:
person_details ={
    "name": "Gudo Van Russum", 
    "age": 67,
    "location": 'USA'
}

with open('person_details.pkl', 'wb') as fh:
    pickle.dump(person_details, fh)

In [27]:
! ls -ltr

total 48
drwxrwxrwx+ 2 codespace codespace  4096 Dec 30 14:00 01_pickle
-rw-rw-rw-  1 codespace codespace    38 Dec 30 14:06 data.pickle
-rw-rw-rw-  1 codespace codespace    68 Dec 30 14:10 a_using_shelve.py
-rw-rw-rw-  1 codespace codespace 29081 Dec 30 15:31 a_working_with_pickle.ipynb
-rw-rw-rw-  1 codespace codespace    66 Dec 30 15:31 person_details.pkl


In [28]:
! cat person_details.pkl

��7       }�(�name��Gudo Van Russum��age�KClocation��USA�u.

 Man in the Middle attack 

In [31]:
! cat person_details.pkl

��7       }�(�name��Gudo Van Russum��age�KClocation��UAE�u.

In [32]:
with open('person_details.pkl', 'rb') as fh:
    content = pickle.load(fh)

content

UnpicklingError: invalid load key, '\xef'.

In [33]:
# cpython - it is c implementation of python
# Pickle and cpickle has their importance in interfacing with c and C++.
# in python 2

# In python 3, cpickle is renamed as pickle