Skip to content

Commit

Permalink
minor changes from PR #554 (#573)
Browse files Browse the repository at this point in the history
* minor changes to the codebase from #475

* roll back some changes (moved to the refonfail PR)

* add _session to Pickler

---------

Co-authored-by: Leonardo Gama <leonardo.reis.gama@gmail.com>
  • Loading branch information
mmckerns and leogama committed Feb 8, 2023
1 parent 95cb794 commit 4911aff
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 47 deletions.
112 changes: 72 additions & 40 deletions dill/_dill.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,29 @@
"""
dill: a utility for serialization of python objects
The primary functions in `dill` are :func:`dump` and
:func:`dumps` for serialization ("pickling") to a
file or to a string, respectively, and :func:`load`
and :func:`loads` for deserialization ("unpickling"),
similarly, from a file or from a string. Other notable
functions are :func:`~dill.dump_module` and
:func:`~dill.load_module`, which are used to save and
restore module objects, including an intepreter session.
Based on code written by Oren Tirosh and Armin Ronacher.
Extended to a (near) full set of the builtin types (in types module),
and coded to the pickle interface, by <mmckerns@caltech.edu>.
Initial port to python3 by Jonathan Dobson, continued by mmckerns.
Test against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns.
Test against CH16+ Std. Lib. ... TBD.
Tested against "all" python types (Std. Lib. CH 1-15 @ 2.7) by mmckerns.
Tested against CH16+ Std. Lib. ... TBD.
"""

from __future__ import annotations

__all__ = [
'Pickler','Unpickler',
'check','copy','dump','dumps','load','loads','pickle','pickles','register',
'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','CONTENTS_FMODE','FILE_FMODE','HANDLE_FMODE',
'dump','dumps','load','loads','copy',
'Pickler','Unpickler','register','pickle','pickles','check',
'DEFAULT_PROTOCOL','HIGHEST_PROTOCOL','HANDLE_FMODE','CONTENTS_FMODE','FILE_FMODE',
'PickleError','PickleWarning','PicklingError','PicklingWarning','UnpicklingError',
'UnpicklingWarning',
]
Expand All @@ -39,6 +51,7 @@
#XXX: get types from .objtypes ?
import builtins as __builtin__
from pickle import _Pickler as StockPickler, Unpickler as StockUnpickler
from pickle import GLOBAL, POP
from _thread import LockType
from _thread import RLock as RLockType
#from io import IOBase
Expand All @@ -58,6 +71,7 @@
import marshal
import gc
# import zlib
import dataclasses
from weakref import ReferenceType, ProxyType, CallableProxyType
from collections import OrderedDict
from functools import partial
Expand Down Expand Up @@ -158,22 +172,19 @@ def get_file_type(*args, **kwargs):
from socket import socket as SocketType
#FIXME: additionally calls ForkingPickler.register several times
from multiprocessing.reduction import _reduce_socket as reduce_socket
try:
try: #pragma: no cover
IS_IPYTHON = __IPYTHON__ # is True
ExitType = None # IPython.core.autocall.ExitAutocall
singletontypes = ['exit', 'quit', 'get_ipython']
ExitType = None # IPython.core.autocall.ExitAutocall
IPYTHON_SINGLETONS = ('exit', 'quit', 'get_ipython')
except NameError:
IS_IPYTHON = False
try: ExitType = type(exit) # apparently 'exit' can be removed
except NameError: ExitType = None
singletontypes = []
IPYTHON_SINGLETONS = ()

import inspect
import dataclasses
import typing

from pickle import GLOBAL


### Shims for different versions of Python and dill
class Sentinel(object):
Expand Down Expand Up @@ -320,7 +331,14 @@ class UnpicklingWarning(PickleWarning, UnpicklingError):
### Extend the Picklers
class Pickler(StockPickler):
"""python's Pickler extended to interpreter sessions"""
dispatch = MetaCatchingDict(StockPickler.dispatch.copy())
dispatch: typing.Dict[type, typing.Callable[[Pickler, typing.Any], None]] \
= MetaCatchingDict(StockPickler.dispatch.copy())
"""The dispatch table, a dictionary of serializing functions used
by Pickler to save objects of specific types. Use :func:`pickle`
or :func:`register` to associate types to custom functions.
:meta hide-value:
"""
_session = False
from .settings import settings

Expand All @@ -341,11 +359,12 @@ def __init__(self, file, *args, **kwds):
self._file = file

def save(self, obj, save_persistent_id=True):
# register if the object is a numpy ufunc
# thanks to Paul Kienzle for pointing out ufuncs didn't pickle
# numpy hack
obj_type = type(obj)
if NumpyArrayType and not (obj_type is type or obj_type in Pickler.dispatch):
if NumpyUfuncType and numpyufunc(obj_type):
# register if the object is a numpy ufunc
# thanks to Paul Kienzle for pointing out ufuncs didn't pickle
if numpyufunc(obj_type):
@register(obj_type)
def save_numpy_ufunc(pickler, obj):
logger.trace(pickler, "Nu: %s", obj)
Expand All @@ -359,7 +378,7 @@ def save_numpy_ufunc(pickler, obj):
# def uload(name): return getattr(numpy, name)
# copy_reg.pickle(NumpyUfuncType, udump, uload)
# register if the object is a numpy dtype
if NumpyDType and numpydtype(obj_type):
if numpydtype(obj_type):
@register(obj_type)
def save_numpy_dtype(pickler, obj):
logger.trace(pickler, "Dt: %s", obj)
Expand All @@ -372,7 +391,7 @@ def save_numpy_dtype(pickler, obj):
# def udump(f): return uload, (f.type,)
# copy_reg.pickle(NumpyDTypeType, udump, uload)
# register if the object is a subclassed numpy array instance
if NumpyArrayType and ndarraysubclassinstance(obj_type):
if ndarraysubclassinstance(obj_type):
@register(obj_type)
def save_numpy_array(pickler, obj):
logger.trace(pickler, "Nu: (%s, %s)", obj.shape, obj.dtype)
Expand All @@ -381,8 +400,9 @@ def save_numpy_array(pickler, obj):
pickler.save_reduce(_create_array, (f,args,state,npdict), obj=obj)
logger.trace(pickler, "# Nu")
return
# end hack
if GENERATOR_FAIL and type(obj) == GeneratorType:
# end numpy hack

if GENERATOR_FAIL and obj_type is GeneratorType:
msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
raise PicklingError(msg)
StockPickler.save(self, obj, save_persistent_id)
Expand All @@ -392,7 +412,6 @@ def save_numpy_array(pickler, obj):
def dump(self, obj): #NOTE: if settings change, need to update attributes
logger.trace_setup(self)
StockPickler.dump(self, obj)

dump.__doc__ = StockPickler.dump.__doc__

class Unpickler(StockUnpickler):
Expand Down Expand Up @@ -436,12 +455,12 @@ def dispatch_table():
pickle_dispatch_copy = StockPickler.dispatch.copy()

def pickle(t, func):
"""expose dispatch table for user-created extensions"""
"""expose :attr:`~Pickler.dispatch` table for user-created extensions"""
Pickler.dispatch[t] = func
return

def register(t):
"""register type to Pickler's dispatch table """
"""decorator to register types to Pickler's :attr:`~Pickler.dispatch` table"""
def proxy(func):
Pickler.dispatch[t] = func
return func
Expand All @@ -460,7 +479,7 @@ def use_diff(on=True):
Reduces size of pickles by only including object which have changed.
Decreases pickle size but increases CPU time needed.
Also helps avoid some unpicklable objects.
Also helps avoid some unpickleable objects.
MUST be called at start of script, otherwise changes will not be recorded.
"""
global _use_diff, diff
Expand Down Expand Up @@ -1088,7 +1107,7 @@ def _save_with_postproc(pickler, reduction, is_pickler_dill=None, obj=Getattr.NO
else:
pickler.save_reduce(*reduction)
# pop None created by calling preprocessing step off stack
pickler.write(bytes('0', 'UTF-8'))
pickler.write(POP)

#@register(CodeType)
#def save_code(pickler, obj):
Expand Down Expand Up @@ -1158,7 +1177,7 @@ def save_code(pickler, obj):
return

def _repr_dict(obj):
"""make a short string representation of a dictionary"""
"""Make a short string representation of a dictionary."""
return "<%s object at %#012x>" % (type(obj).__name__, id(obj))

@register(dict)
Expand Down Expand Up @@ -1470,7 +1489,7 @@ def save_cell(pickler, obj):
# The result of this function call will be None
pickler.save_reduce(_shims._delattr, (obj, 'cell_contents'))
# pop None created by calling _delattr off stack
pickler.write(bytes('0', 'UTF-8'))
pickler.write(POP)
logger.trace(pickler, "# Ce3")
return
if is_dill(pickler, child=True):
Expand Down Expand Up @@ -1614,7 +1633,7 @@ def save_module(pickler, obj):
pass
else:
logger.trace(pickler, "M2: %s with diff", obj)
logger.trace(pickler, "Diff: %s", changed.keys())
logger.info("Diff: %s", changed.keys())
pickler.save_reduce(_import_module, (obj.__name__,), obj=obj,
state=changed)
logger.trace(pickler, "# M2")
Expand All @@ -1625,15 +1644,20 @@ def save_module(pickler, obj):
logger.trace(pickler, "# M1")
else:
builtin_mod = _is_builtin_module(obj)
if obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod or \
is_dill(pickler, child=True) and obj is pickler._main:
is_session_main = is_dill(pickler, child=True) and obj is pickler._main
if (obj.__name__ not in ("builtins", "dill", "dill._dill") and not builtin_mod
or is_session_main):
logger.trace(pickler, "M1: %s", obj)
_main_dict = obj.__dict__.copy() #XXX: better no copy? option to copy?
[_main_dict.pop(item, None) for item in singletontypes
+ ["__builtins__", "__loader__"]]
# Hack for handling module-type objects in load_module().
mod_name = obj.__name__ if _is_imported_module(obj) else '__runtime__.%s' % obj.__name__
pickler.save_reduce(_import_module, (mod_name,), obj=obj,
state=_main_dict)
# Second references are saved as __builtin__.__main__ in save_module_dict().
main_dict = obj.__dict__.copy()
for item in ('__builtins__', '__loader__'):
main_dict.pop(item, None)
for item in IPYTHON_SINGLETONS: #pragma: no cover
if getattr(main_dict.get(item), '__module__', '').startswith('IPython'):
del main_dict[item]
pickler.save_reduce(_import_module, (mod_name,), obj=obj, state=main_dict)
logger.trace(pickler, "# M1")
elif obj.__name__ == "dill._dill":
logger.trace(pickler, "M2: %s", obj)
Expand All @@ -1643,7 +1667,6 @@ def save_module(pickler, obj):
logger.trace(pickler, "M2: %s", obj)
pickler.save_reduce(_import_module, (obj.__name__,), obj=obj)
logger.trace(pickler, "# M2")
return
return

@register(TypeType)
Expand All @@ -1669,7 +1692,7 @@ def save_type(pickler, obj, postproc_list=None):
elif obj is type(None):
logger.trace(pickler, "T7: %s", obj)
#XXX: pickler.save_reduce(type, (None,), obj=obj)
pickler.write(bytes('c__builtin__\nNoneType\n', 'UTF-8'))
pickler.write(GLOBAL + b'__builtin__\nNoneType\n')
logger.trace(pickler, "# T7")
elif obj is NotImplementedType:
logger.trace(pickler, "T7: %s", obj)
Expand Down Expand Up @@ -1710,9 +1733,18 @@ def save_type(pickler, obj, postproc_list=None):
else:
logger.trace(pickler, "T4: %s", obj)
if incorrectly_named:
warnings.warn('Cannot locate reference to %r.' % (obj,), PicklingWarning)
warnings.warn(
"Cannot locate reference to %r." % (obj,),
PicklingWarning,
stacklevel=3,
)
if obj_recursive:
warnings.warn('Cannot pickle %r: %s.%s has recursive self-references that trigger a RecursionError.' % (obj, obj.__module__, obj_name), PicklingWarning)
warnings.warn(
"Cannot pickle %r: %s.%s has recursive self-references that "
"trigger a RecursionError." % (obj, obj.__module__, obj_name),
PicklingWarning,
stacklevel=3,
)
#print (obj.__dict__)
#print ("%s\n%s" % (type(obj), obj.__name__))
#print ("%s\n%s" % (obj.__bases__, obj.__dict__))
Expand Down Expand Up @@ -1848,7 +1880,7 @@ def save_function(pickler, obj):
# Change the value of the cell
pickler.save_reduce(*possible_postproc)
# pop None created by calling preprocessing step off stack
pickler.write(bytes('0', 'UTF-8'))
pickler.write(POP)

logger.trace(pickler, "# F1")
else:
Expand Down
18 changes: 17 additions & 1 deletion dill/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,29 @@ def dump_module(
>>> [foo.sin(x) for x in foo.values]
[0.8414709848078965, 0.9092974268256817, 0.1411200080598672]
- Use `refimported` to save imported objects by reference:
>>> import dill
>>> from html.entities import html5
>>> type(html5), len(html5)
(dict, 2231)
>>> import io
>>> buf = io.BytesIO()
>>> dill.dump_module(buf) # saves __main__, with html5 saved by value
>>> len(buf.getvalue()) # pickle size in bytes
71665
>>> buf = io.BytesIO()
>>> dill.dump_module(buf, refimported=True) # html5 saved by reference
>>> len(buf.getvalue())
438
*Changed in version 0.3.6:* Function ``dump_session()`` was renamed to
``dump_module()``. Parameters ``main`` and ``byref`` were renamed to
``module`` and ``refimported``, respectively.
Note:
Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']``
don't apply to this function.`
don't apply to this function.
"""
for old_par, par in [('main', 'module'), ('byref', 'refimported')]:
if old_par in kwds:
Expand Down
10 changes: 5 additions & 5 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@
'special-members': True,
'show-inheritance': True,
'imported-members': True,
'exclude-members': (
'__dict__,'
'exclude-members': ( #NOTE: this is a single string concatenation
'__dict__,' # may be verbose
'__slots__,'
'__weakref__,'
'__module__,'
'_abc_impl,'
'__init__,'
'__annotations__,'
'__dataclass_fields__,'
'__init__,' # redundant with class docstring by "autoclass_content=both"
'__annotations__,' # redundant with signature documentation
'__dataclass_fields__,' # redundant automatic attribute
)
}
autodoc_typehints = 'description'
Expand Down
2 changes: 1 addition & 1 deletion docs/source/dill.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ logger module
-------------

.. automodule:: dill.logger
.. :exclude-members: +
:exclude-members: +trace

objtypes module
---------------
Expand Down

0 comments on commit 4911aff

Please sign in to comment.