## Load MNIST on Python 3.x

In [1]:
import numpy as np
import h5py
import pickle
from scipy import ndimage
import scipy
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
with open('./mnist.pkl', 'rb') as f:
    training_data, validation_data, test_data = pickle.load(f, encoding='latin1')

In [3]:
!mkdir ../data

mkdir: cannot create directory ‘../data’: File exists


In [4]:
with h5py.File('../data/mnist.hdf5', 'w') as f:
    f.create_dataset('Xtr', data=training_data[0].reshape(-1,28,28))
    f.create_dataset('ytr', data=training_data[1])
    
    f.create_dataset('Xv', data=validation_data[0].reshape(-1,28,28))
    f.create_dataset('yv', data=validation_data[1])
    
    f.create_dataset('Xts', data=test_data[0].reshape(-1,28,28))
    f.create_dataset('yts', data=test_data[1])

In [8]:
from glob import glob
from tqdm import tqdm_notebook as tqdm

In [9]:
def transform(img):
    # inverting and closing
    img = ndimage.binary_closing(1-img)
    
    # cropping tight
    aw = np.argwhere(img == 1)
    mins = aw.min(axis=0)
    maxs = aw.max(axis=0) 
    img = img[ mins[0]:maxs[0],
               mins[1]:maxs[1]]
    # padding
    max_d = max(img.shape)
    max_d += int(max_d*0.2)
    pad = max_d - np.r_[img.shape]
    padded = np.pad(img, ((pad[0]//2, pad[0] - pad[0]//2),
                          (pad[1]//2, pad[1] - pad[1]//2)),
                    mode='constant', constant_values=0).astype('uint8')
    
    return ndimage.zoom(padded, zoom= 28/max_d , order=2)

In [11]:
imgs = [
    np.c_[[transform(plt.imread(f).mean(axis=2))\
     for f in glob(f'./USPSdata/Numerals/{i}/*png')]] for i in tqdm(range(10))
]






In [81]:
y = np.concatenate(
    [np.ones(len(imgs[i])) * i for i in range(10)]
)

In [66]:
X = np.concatenate(imgs, axis=0)

In [88]:
f = h5py.File('/user/vliunda/data/usp_processed.hdf5', 'w')

In [89]:
f.create_dataset('X', data=X.astype('uint8'))
f.create_dataset('y', data=y.astype('uint8'))

<HDF5 dataset "y": shape (19999,), type "|u1">

In [90]:
f.flush()
f.close()