In [1]:
import os
#os.environ['THEANO_FLAGS']='mode=FAST_RUN,device=' + device + ',floatX=float32,nvcc.flags=-D_FORCE_INLINES'

import argparse
import csv
import datetime
import math
import sys
import time
import json
import cPickle # for saving scaler and labelencoder
import numpy as np

import pandas as pd # Pandas for easier Data handling in preparation

from sklearn import preprocessing
from sklearn.cross_validation import ShuffleSplit, KFold, StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix

from theano import config
from theano import function as tfunction

import keras
from keras.optimizers import SGD, RMSprop, Adagrad
from keras.callbacks import Callback, History, EarlyStopping, ModelCheckpoint # BaseLogger,


Using Theano backend.
  "downsample module has been moved to the theano.tensor.signal.pool module.")


In [36]:
from keras.preprocessing import image

In [3]:

RANDOM_SEED = 2107


# adapted from https://github.com/fchollet/keras/blob/master/keras/applications/imagenet_utils.py
# (skipping 0 mean as we do this separately)

def reorder_RGB(data, dim_ordering='default'):
    if dim_ordering == 'default':
        dim_ordering = 'th' #K.image_dim_ordering()
    assert dim_ordering in {'tf', 'th'}

    # reorder 'RGB'->'BGR' according to Theano or Tensorflow order
    if dim_ordering == 'th':
        data = data[:, ::-1, :, :]
    else:
        data = data[:, :, :, ::-1]
    return data


In [4]:

# STANDARDIZE DATA

def standardize(data, return_scaler = True, copy=True):
    '''standardize the data with zero mean unit variance (feature attribute-wise)

    data: numpy array to be transformed
    return_scaler: if True, a tuple of (data, scaler) will be returned with the scaler object containing all necessary parameters to scale other data again
    copy = False means try to avoid a copy and do inplace scaling instead.
    '''

    if return_scaler:
        # STANDARDIZATION (0 mean, unit var)
        scaler = preprocessing.StandardScaler(copy)
        # alternative: NORMALIZATION (min - max Normalization to (0,1))
        #scaler = preprocessing.MinMaxScaler()
        data = scaler.fit_transform(data)
        return (data, scaler)
    else:
        return preprocessing.scale(data,axis=0,copy=copy)
        # axis=0 means independently standardize each feature, otherwise (if 1) standardize each sample

    # how to get scaler parameters:
    #print scaler.mean_
    #print scaler.scale_


def standardize_flat(data):
    from scipy.stats.mstats import zscore
    return zscore(data,axis=None)
    # the manual version:
    #m = np.mean(a)
    #s = np.std(a)
    #(a - m) / s



In [5]:
    # settings
    PATH = '/data/images/monds-persondetect-7k'  # base path
    csv_file = os.path.join(PATH, 'labels.csv')
    thumbs_path = os.path.join(PATH, 'thumbs')

    size = 224
    file = "mon_style_persons_cropped_resized_%dx%d.npz" % (size, size)
    img_thumb_file = os.path.join(thumbs_path, file)

    folds = 3

In [52]:
    # load meta-data

    image_list = pd.read_csv(csv_file)
    image_files = list(image_list.image)
    groundtruth = list(image_list.person)

    # load image data
    data = np.load(img_thumb_file)
    images = data['images']
    file_ids = data['filenames']
    data.close() #  the returned instance of NpzFile class must be closed to avoid leaking file descriptors
    
    # dimensions
    n_images = images.shape[0]
    img_size = images.shape[1:3]
    n_channels = images.shape[3]
    #images.shape
    
    print "Loaded", images.shape[0], "images with size", img_size, "and", n_channels, "color channels"

Loaded 7833 images with size (224, 224) and 3 color channels


In [53]:
    # re-sort groundtruth by file_ids order from image thumbs file
    groundtruth_df = pd.DataFrame(groundtruth, index=image_files) # make dataframe
    groundtruth_df = groundtruth_df.ix[file_ids]
    groundtruth = groundtruth_df[0].values # is now np.ndarray

In [54]:
gt_list = list(groundtruth)

In [55]:
print "Groundtruth Statistics:"
#class_counts = 
for v in set(gt_list):
    print "Class", v, ":", gt_list.count(v)

Groundtruth Statistics:
Class 0 : 2164
Class 1 : 5669


In [56]:
baseline = 5669 * 1.0 / len(gt_list)
baseline

0.7237329248053108

In [41]:
# needed? dont think so
#groundtruth=groundtruth.astype(config.floatX)

In [57]:
    # reorder data

    # color channel is the last axis in input images, we move it to axis 1
    # TODO check if this doesnt rotate the image - i think it does
    images = np.swapaxes(images,1,3)
    images.shape

(7833, 3, 224, 224)

In [58]:
    # TODO why is this done (e.g. in ResNet 50)
    # TODO this depends on Theano vs Keras: change Code above to incorporate K.image_dim_ordering()
    images = reorder_RGB(images, dim_ordering='default') 
    images.shape

(7833, 3, 224, 224)

In [59]:
images.min(), images.max()

(0, 255)

In [61]:
images = images.astype(config.floatX)

In [62]:
# standardize each RGB channel - inspired from keras ResNet50 / image_util preprocess

x = images

for c in range(3):
    print x[:, c, :, :].mean()
    x[:, c, :, :] -= x[:, c, :, :].mean()
    x[:, c, :, :] /= x[:, c, :, :].std() # was not in ResNet implementation

images = x

186.14
188.196
194.957


In [63]:
images.min(), images.max()

(-2.5346594, 0.86231875)

In [50]:
    # standardize FLAT
    # TODO verify which kind of standardization we need
    # TODO verify if the data order is right for this standardization
    # TODO also check whether we need to standardize each channel separately
    images = standardize_flat(images)
    images.dtype

MemoryError: 

In [17]:
# standardize flat myself
images -= np.mean(images, axis=0) # 0 center
images /= np.std(images, axis=0)  # normalize by stdev


In [18]:
images.min(), images.max()

(-279.01828, 41.362053)

In [64]:
images.mean()

7.7521609e-06

In [65]:
    # create folds
    splits = StratifiedKFold(groundtruth, n_folds=folds, shuffle=True, random_state=RANDOM_SEED)


In [66]:
    for train_ind, test_ind in splits:
        print "Train:", len(train_ind), "Test:", len(test_ind)


Train: 5221 Test: 2612
Train: 5222 Test: 2611
Train: 5223 Test: 2610


In [113]:
train_img = images[train_ind]
test_img = images[test_ind]

In [68]:
train_gt = groundtruth[train_ind]
test_gt = groundtruth[test_ind]

In [18]:
train_gt.shape

(5223,)

In [19]:
train_img.shape

(5223, 3, 224, 224)

In [26]:
train_img.dtype

dtype('float32')

In [27]:
train_gt.dtype

dtype('int64')

In [115]:
input_shape = images.shape[1:]
output_units = 1

multi_class = output_units > 1
multi_class

False

In [70]:
# Keras 0.3
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout, Flatten # Reshape,
from keras.layers.convolutional import Convolution2D, MaxPooling2D # CNN
from keras.optimizers import SGD

In [None]:
# Keras 1.x
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD

In [83]:
# taking just one channel for full model
imagesch1 = images[:,0,:,:]

In [91]:
images_flat = images.reshape(images.shape[0],-1)

In [92]:
train_img = images_flat[train_ind]
test_img = images_flat[test_ind]


In [95]:
input_shape = (images_flat.shape[1])

In [107]:
# simple Full network
model = Sequential()

model.add(Dense(256, input_dim=input_shape))
model.add(Dense(256))
model.add(Dense(1,activation='sigmoid'))



In [117]:
# simple CNN to test

model = Sequential()
# input: 100x100 images with 3 channels -> input_shape should be (3, 100, 100) 
# this applies 32 convolution filters of size 3x3 each.
model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=input_shape))
#model.add(Activation('relu')) # WAS COMMENTED OUT WHEN WORKED
#model.add(MaxPooling2D(pool_size=(2, 2))) # NEW
#model.add(Dropout(0.1)) # NEW

model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.1))

model.add(Flatten())
# Note: Keras does automatic shape inference.
model.add(Dense(256))
#model.add(Activation('relu'))
model.add(Dropout(0.1))

if output_units > 1:
    model.add(Dense(output_units))
    model.add(Activation('softmax'))
else:
    model.add(Dense(1,activation='sigmoid'))


In [124]:
# VGG-like convnet:
# (from https://keras.io/getting-started/sequential-model-guide/)

model = Sequential()
# input: 100x100 images with 3 channels -> input_shape should be (3, 100, 100) 
# this applies 32 convolution filters of size 3x3 each.
model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=input_shape))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='valid'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
# Note: Keras does automatic shape inference.
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

if output_units > 1:
    model.add(Dense(output_units))
    model.add(Activation('softmax'))
else:
    model.add(Dense(1,activation='sigmoid'))


In [125]:
model.summary()

--------------------------------------------------------------------------------
Initial input shape: (None, 3, 224, 224)
--------------------------------------------------------------------------------
Layer (name)                  Output Shape                  Param #             
--------------------------------------------------------------------------------
Convolution2D (Unnamed)       (None, 32, 222, 222)          896                 
Activation (Unnamed)          (None, 32, 222, 222)          0                   
Convolution2D (Unnamed)       (None, 32, 220, 220)          9248                
Activation (Unnamed)          (None, 32, 220, 220)          0                   
MaxPooling2D (Unnamed)        (None, 32, 110, 110)          0                   
Dropout (Unnamed)             (None, 32, 110, 110)          0                   
Convolution2D (Unnamed)       (None, 64, 108, 108)          18496               
Activation (Unnamed)          (None, 64, 108, 108)          0       

In [119]:
# we use test data as validation data to see direct results (usually not recommended)
validation_data = (test_img, test_gt)

In [128]:
if multi_class:
    loss = 'categorical_crossentropy' # multi class
    class_mode='categorical'
    #optimizer = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    # try simpler first
    optimizer = SGD(lr=0.001)
else: # binary 0/1
    loss = 'binary_crossentropy' 
    class_mode='binary'
    optimizer = 'rmsprop'
    #optimizer = RMSprop(lr=0.0001)
    #optimizer = 'adam'
    #optimizer = SGD(lr=0.001)

print "Class mode:", class_mode, ", multi_class:", multi_class
    

Class mode: binary , multi_class: False


In [129]:

model.compile(loss=loss, optimizer=optimizer, class_mode=class_mode)
    

In [None]:
    epochs = 5
    model.fit(train_img, train_gt, batch_size=32, nb_epoch=epochs, validation_data=validation_data, show_accuracy=True)

Train on 5223 samples, validate on 2610 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

In [77]:
# verify Accuracy on Train set
train_pred = model.predict_classes(train_img)
accuracy_score(train_gt, train_pred)



0.72372199885123489

In [78]:
# TESTING
test_pred = model.predict_classes(test_img)
acc = accuracy_score(test_gt, test_pred)
acc



0.72375478927203063