In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from six.moves import cPickle as pickle
from scipy import ndimage
from scipy.misc import imresize
%matplotlib inline

pixel_depth = 255.0
screen_width = 32
screen_height = 32

In [2]:
try:
    with open('svhn.pickle', 'rb') as f:
        dataset = pickle.load(f)

except Exception as e:
    print('Unable to process data from dataset.pickle', ':', e)
    raise

In [3]:
def load_image(image_file, path='train/', **box):
    image_data = np.average(ndimage.imread(path+image_file), axis=2)
    if box['minTop'] <= 0: box['minTop'] = 0
    if box['minLeft'] <= 0: box['minLeft'] = 0
    image_data = image_data[box['minTop']:box['maxTopHeight'], box['minLeft']:box['maxLeftWidth']]
    image_data = imresize(image_data, (32,32))
    image_data = (image_data.astype(float) - pixel_depth / 2) / pixel_depth
    return image_data

In [4]:
def load_images(dataset, struct):
    images = dataset[struct]['images']
    tops = dataset[struct]['tops']
    widths = dataset[struct]['widths']
    heights = dataset[struct]['heights']
    lefts = dataset[struct]['lefts']
    data = np.ndarray(shape=(images.shape[0], 32, 32), dtype=np.float32)
        
    for i in range(data.shape[0]):
        if (i % 5000 == 0):
            print(i, "elapsed out of ", data.shape[0], "for: ", struct)
        try:
            if struct == 'valid':
                path = 'extra/'
            else:
                path = struct + '/'
            chrCount = dataset[struct]['labels'][i][dataset[struct]['labels'][i] > -1].shape[0]
            topHeights = np.array([tops[i][:chrCount], heights[i][:chrCount]])
            leftWidths = np.array([lefts[i][:chrCount], widths[i][:chrCount]])
            image = load_image(images[i], path, **{
                    "minTop": min(topHeights[0, :]),
                    "minLeft": min(leftWidths[1, :]),
                    "maxTopHeight": topHeights.sum(axis=0).max(),
                    "maxLeftWidth": leftWidths.sum(axis=0).max()
            })
            data[i, :, :] = image
        except Exception, e:
            img = np.average(ndimage.imread(path+images[i]), axis=2)
            print( i, chrCount,img.shape, {
                "minTop": min(topHeights[0, :]),
                "minLeft": min(leftWidths[1, :]),
                "maxTopHeight": topHeights.sum(axis=0).max(),
                "maxLeftWidth": leftWidths.sum(axis=0).max(),
                "lefts": lefts[i],
                "widths": widths[i],
                "message": e.message
            })
            return
    return data

trX = load_images(dataset, 'train')
teX = load_images(dataset, 'test')
vaX = load_images(dataset, 'valid')
exX = load_images(dataset, 'extra')

(0, 'elapsed out of ', 33402, 'for: ', 'train')




(5000, 'elapsed out of ', 33402, 'for: ', 'train')
(10000, 'elapsed out of ', 33402, 'for: ', 'train')
(15000, 'elapsed out of ', 33402, 'for: ', 'train')
(20000, 'elapsed out of ', 33402, 'for: ', 'train')
(25000, 'elapsed out of ', 33402, 'for: ', 'train')
(30000, 'elapsed out of ', 33402, 'for: ', 'train')
(0, 'elapsed out of ', 13068, 'for: ', 'test')
(5000, 'elapsed out of ', 13068, 'for: ', 'test')
(10000, 'elapsed out of ', 13068, 'for: ', 'test')
(0, 'elapsed out of ', 2000, 'for: ', 'valid')
(0, 'elapsed out of ', 202353, 'for: ', 'extra')
(5000, 'elapsed out of ', 202353, 'for: ', 'extra')
(10000, 'elapsed out of ', 202353, 'for: ', 'extra')
(15000, 'elapsed out of ', 202353, 'for: ', 'extra')
(20000, 'elapsed out of ', 202353, 'for: ', 'extra')
(25000, 'elapsed out of ', 202353, 'for: ', 'extra')
(30000, 'elapsed out of ', 202353, 'for: ', 'extra')
(35000, 'elapsed out of ', 202353, 'for: ', 'extra')
(40000, 'elapsed out of ', 202353, 'for: ', 'extra')
(45000, 'elapsed out o

In [5]:
trY = dataset['train']['labels']
teY = dataset['test']['labels']
vaY = dataset['valid']['labels']
exY = dataset['extra']['labels']

In [8]:
try:
    with open( 'tensorflow_data.pickle', 'wb') as f:
        pickle.dump({
            'train': {'data': trX, 'label': trY},
            'test': {'data': teX, 'label': teY},
            'valid': {'data': vaX, 'label': vaY},
            'extra': {'data': exX, 'label': exY}
        }, f, pickle.HIGHEST_PROTOCOL)
except Exception as e:
    print('Unable to save data to',  struct + '.pickle', ':', e)