example of how to persist a model with pickle

# 3.4.1. Persistence example

## Pickle to string

In [6]:
from sklearn import svm
from sklearn import datasets
clf = svm.SVC()
iris = datasets.load_iris()
X, y = iris.data, iris.target
print clf.fit(X, y)  

import pickle

# here we pickle to a string (not to disk)
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
print clf2.predict(X[0])
print y[0]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
[0]
0


## Pickle to disk
- https://docs.python.org/2/library/pickle.html#example

In [7]:
pwd

u'/home/takanori/work-local/ipynb-notes/sklearn/user-guide'

In [8]:
!ls -a

.
..
3.1 Cross-validation - evaluating estimator performance.ipynb
3.2 Grid search - searching for estimator parameters.ipynb
3.3 Model evaluation - quantifying the quality of predictions.ipynb
3.4 Model persistence.ipynb
.ipynb_checkpoints


### Pickle dump

In [10]:
data1 = {'a': [1, 2.0, 3, 4+6j],
         'b': ('string', u'Unicode string'),
         'c': None}

selfref_list = [1, 2, 3]
selfref_list.append(selfref_list)

output = open('data.pkl', 'wb')

# Pickle dictionary using protocol 0.
pickle.dump(data1, output)

# Pickle the list using the highest protocol available.
pickle.dump(selfref_list, output, -1)

output.close()

In [12]:
!ls -al

total 68
drwxr-xr-x 3 takanori takanori  4096 Oct 13 22:56 .
drwxr-xr-x 5 takanori takanori  4096 Oct 13 17:42 ..
-rw-r--r-- 1 takanori takanori 11274 Oct 13 18:28 3.1 Cross-validation - evaluating estimator performance.ipynb
-rw-r--r-- 1 takanori takanori 18463 Oct 13 22:38 3.2 Grid search - searching for estimator parameters.ipynb
-rw-r--r-- 1 takanori takanori 15003 Oct 13 22:38 3.3 Model evaluation - quantifying the quality of predictions.ipynb
-rw-r--r-- 1 takanori takanori  3053 Oct 13 22:55 3.4 Model persistence.ipynb
-rw-r--r-- 1 takanori takanori   151 Oct 13 22:56 data.pkl
drwxr-xr-x 2 takanori takanori  4096 Oct 13 22:47 .ipynb_checkpoints


### Pickle load

In [14]:
import pprint
pkl_file = open('data.pkl', 'rb')

data1 = pickle.load(pkl_file)
pprint.pprint(data1)

data2 = pickle.load(pkl_file)
pprint.pprint(data2)

pkl_file.close()

{'a': [1, 2.0, 3, (4+6j)], 'b': ('string', u'Unicode string'), 'c': None}
[1, 2, 3, <Recursion on list with id=139775925402152>]


# Using skl.externals.joblib

In [15]:
from sklearn.externals import joblib
joblib.dump(clf, 'filename.pkl') 
del clf
clf = joblib.load('filename.pkl') 