In [1]:
import os

import numpy as np
import tensorflow as tf

from tensorflow_vgg import vgg16
from tensorflow_vgg import utils

data_dir = 'dogImages/'
contents = os.listdir(data_dir)
classes = [each for each in contents if os.path.isdir(data_dir + each)]

In [2]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

batch_size = 10
codes_list = []
labels = []
batch = []

codes = None

with tf.Session() as sess:   
    vgg = vgg16.Vgg16()
    input_ = tf.placeholder(tf.float32, [None, 224, 224, 3])
    with tf.name_scope("content_vgg"):
        vgg.build(input_)

    for each in classes:
        print("Starting {} images".format(each))
        class_path = data_dir + each
        files = os.listdir(class_path)
        for ii, file in enumerate(files, 1):
            # Add images to the current batch
            # utils.load_image crops the input images for us, from the center
            img = utils.load_image(os.path.join(class_path, file))
            batch.append(img.reshape((1, 224, 224, 3)))
            labels.append(each)
            
            # Running the batch through the network to get the codes
            if ii % batch_size == 0 or ii == len(files):
                
                # Image batch to pass to VGG network
                
                images = np.concatenate(batch)
                
                feed_dict = {input_: images}
                codes_batch = sess.run(vgg.relu7, feed_dict=feed_dict)
                
                # Here I'm building an array of the codes
                if codes is None:
                    codes = codes_batch
                else:
                    codes = np.concatenate((codes, codes_batch))
                
                # Reset to start building the next batch
                batch = []
                print('{} images processed'.format(ii))

/home/wangyao/Desktop/Transfer_Learning/tensorflow_vgg/vgg16.npy
npy file loaded
build model started
build model finished: 0s
Starting 056.Dachshund images
10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
60 images processed
70 images processed
80 images processed
82 images processed
Starting 100.Lowchen images
10 images processed
20 images processed
30 images processed
40 images processed
42 images processed
Starting 001.Affenpinscher images
10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
60 images processed
70 images processed
80 images processed
Starting 065.Entlebucher_mountain_dog images
10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
53 images processed
Starting 092.Keeshond images
10 images processed
20 images processed
30 images processed
40 images processed
50 images processed
55 images processed
Starting 064.English_toy_spa

In [3]:
# write codes to file
with open('codes', 'w') as f:
    codes.tofile(f)
    
# write labels to file
import csv
with open('labels', 'w') as f:
    writer = csv.writer(f, delimiter='\n')
    writer.writerow(labels)

In [4]:
# read codes and labels from file
import csv

with open('labels') as f:
    reader = csv.reader(f, delimiter='\n')
    labels = np.array([each for each in reader]).squeeze()
with open('codes') as f:
    codes = np.fromfile(f, dtype=np.float32)
    codes = codes.reshape((len(labels), -1))

In [5]:
# One-hot encode labels
from sklearn import preprocessing

lb = preprocessing.LabelBinarizer()
lb.fit(labels)
labels_vecs = lb.transform(labels)

In [8]:
import sklearn
from sklearn import svm
from sklearn import cross_validation, grid_search
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.externals import joblib
 
def train_svm_classifer(codes, labels):
    # save 20% of data for performance evaluation
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(codes, labels, train_size=0.8, test_size=0.1)
    print("Train shapes (x, y):", X_train.shape, y_train.shape)
    print("Test shapes (x, y):", X_test.shape, y_test.shape)
    
    clf = svm.SVC(C=1, kernel='rbf', probability=True)
    clf.fit(X_train, y_train)
        
    score = clf.score(X_test,y_test)
    print("Test accuracy:", score)
    
    y_predict=clf.predict(X_test)
    return y_predict

In [9]:
train_svm_classifer(codes,labels)

Train shapes (x, y): (6623, 4096) (6623,)
Test shapes (x, y): (828, 4096) (828,)
Test accuracy: 0.863526570048


array(['017.Bearded_collie', '002.Afghan_hound', '015.Basset_hound',
       '056.Dachshund', '045.Cardigan_welsh_corgi', '029.Border_collie',
       '044.Cane_corso', '115.Papillon', '131.Wirehaired_pointing_griffon',
       '038.Brussels_griffon', '015.Basset_hound',
       '113.Old_english_sheepdog', '058.Dandie_dinmont_terrier',
       '119.Petit_basset_griffon_vendeen', '050.Chinese_shar-pei',
       '034.Boxer', '026.Black_russian_terrier', '002.Afghan_hound',
       '024.Bichon_frise', '051.Chow_chow', '037.Brittany',
       '107.Norfolk_terrier', '063.English_springer_spaniel',
       '015.Basset_hound', '111.Norwich_terrier', '020.Belgian_malinois',
       '088.Irish_water_spaniel', '021.Belgian_sheepdog', '082.Havanese',
       '002.Afghan_hound', '014.Basenji', '010.Anatolian_shepherd_dog',
       '063.English_springer_spaniel', '115.Papillon',
       '049.Chinese_crested', '076.Golden_retriever',
       '068.Flat-coated_retriever', '084.Icelandic_sheepdog', '054.Collie',
   