# Classifying Caltech-101 with fixed pre-trained features

## 1. Preparing Caltech-101

* Download image data

In [None]:
%%bash
mkdir -p ~/dataset/Caltech101  # dataset dir
cd ~/dataset/Caltech101
wget https://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz -qnc --no-check-certificate  # get image data
tar xzf 101_ObjectCategories.tar.gz 
rm -rf images; mv 101_ObjectCategories images  # rename

* Scan the dataset and setup training and testing splits

In [None]:
import os
import random
random.seed(20) # fix the seed (we randomly sample training and testing examples)

dataset_dir = '/home/ubuntu/dataset/Caltech101/'
labels = os.listdir(dataset_dir+'images/')
train_list, val_list = [], []

for c, category in enumerate(labels):
    files = os.listdir(dataset_dir+'images/'+category)
    random.shuffle(files)
    for img in files[:30]:  # 30 training samples per class
        train_list.append(dataset_dir+'images/'+category+'/'+img+' '+str(c))  # "image_path category_id" (following Caffe style)
    for img in files[30:50]:  # at most 20 testing samples per class
        val_list.append(dataset_dir+'images/'+category+'/'+img+' '+str(c))

random.shuffle(train_list) # Be sure to shuffle training images (otherwise fine-tuning will fail)

with open(dataset_dir+'train.txt', 'w') as f:
    f.write('\n'.join(train_list))
with open(dataset_dir+'val.txt', 'w') as f:
    f.write('\n'.join(val_list))
with open(dataset_dir+'labels.txt', 'w') as f:
    f.write('\n'.join(labels))

### 2. Setup pre-trained network

* Just the same as in the previous exercise

In [None]:
import sys
import numpy as np

# load caffe
caffe_root = '/home/ubuntu/apps/caffe/'  # this file should be run from {caffe_root}/examples (otherwise change this line)
sys.path.insert(0, caffe_root + 'python')
import caffe # If you get "No module named _caffe", either you have not built pycaffe or you have the wrong path.
caffe.set_mode_gpu()

# load a pre-trained model
model_def = caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt'
model_weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
#model_def = caffe_root + 'models/VGG16/VGG_ILSVRC_16_layers_deploy.prototxt'
#model_weights = caffe_root + 'models/VGG16/VGG_ILSVRC_16_layers.caffemodel'

net = caffe.Net(model_def,      # defines the structure of the model
                model_weights,  # contains the trained weights
                caffe.TEST)     # use test mode (e.g., don't perform dropout)

(K,H,W) = net.blobs['data'].shape[1:] # input size
# load the mean ImageNet image (as distributed with Caffe) for subtraction
mu = np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')
mu = caffe.io.resize_image(mu.transpose(1,2,0),(H,W))
mu = mu.transpose(2,0,1)
#mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values

# create transformer for the input called 'data'
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})

transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
transformer.set_channel_swap('data', (2,1,0))  # swap channels from RGB to BGR


### 3. Extract features 

In [None]:
bsize = 50 # mini batch size    
(K,H,W) = net.blobs['data'].shape[1:] # input size
net.blobs['data'].reshape(bsize, K,H,W)

features = {'train':[],'val':[]}
crits = {'train':[],'val':[]}

for T in ['train', 'val']:
    f = open(dataset_dir+T+'.txt','r')
    lines = f.read().split('\n')
    f.close()
    for idx, line in enumerate(lines):
        imgpath, cat_id = line.rstrip().split(' ')
        crits[T].append(int(cat_id))
        image = caffe.io.load_image(imgpath)
        transformed_image = transformer.preprocess('data', image)
        net.blobs['data'].data[idx%bsize] = transformed_image
        if (idx+1) % bsize == 0 or (idx+1) == len(lines):
            net.forward() # feed forward
            feat = np.copy(net.blobs['fc7'].data[:idx%bsize+1]) # use feature responses of a specific layer (let's change this!)
            features[T].append(feat.reshape(feat.shape[0],-1)) # flatten feature matrix (for 'conv' layers)
            print 'Feature extracted: ', T, '~',idx+1
    
    features[T] = np.vstack(features[T])
    crits[T] = np.hstack(crits[T])
    assert(features[T].shape[0]==crits[T].shape[0])

## 4. Run SVM

In [None]:
import numpy as np
from sklearn import svm

## train a linear SVM classifier
clf = svm.LinearSVC(C=1) # you may need to change C
clf.fit(features['train'],crits['train'])
yPredTrain = clf.predict(features['train'])
yPredTest = clf.predict(features['val'])

print "Training score: ", len((np.where(yPredTrain == crits['train'])[0]))*1.0/features['train'].shape[0]
print "Test(validation) score: ", len((np.where(yPredTest == crits['val'])[0]))*1.0/features['val'].shape[0]

## 5. Exercise
* Change the layer from which features are extracted and see how it affects the performance.
* Change the pre-trained network to the VGG-16 model (and ResNet if possible).