In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *
from path import Path as p

In [12]:
PATH = "../../../data/"
PATH2 = "../../../data/Flicker8k_Dataset/"
sz=224
bs = 64
n = 1000 # Number of dogs and cats to train model on
# imbalanced classes
CATDOG = 0 # Dummy class variables
NOTCATDOG = 1 # Not Cat or Not Dog

In [4]:
def get_names(filelist, suffix):
    return [suffix + f.name for f in filelist]

In [13]:
train_cats = get_names(p(PATH + "dogscats/train/cats/").files(), "dogscats/train/cats/")
train_dogs = get_names(p(PATH + "dogscats/train/dogs/").files(), "dogscats/train/dogs/")
valid_cats = get_names(p(PATH + "dogscats/valid/cats/").files(), "dogscats/valid/cats/")
valid_dogs = get_names(p(PATH + "dogscats/valid/dogs/").files(), "dogscats/valid/dogs/")
flickr = get_names(p(PATH2).files(), "Flicker8k_Dataset/")
print(len(train_cats))
print(len(train_dogs))
print(len(valid_cats))
print(len(valid_dogs))
print(len(flickr))

11500
11500
1000
1000
8091


## Part 1: Not Cats

In [29]:
def prep(train, valid):
    animals = list(np.random.choice(train, n, False)) + list(valid)
    notanimals = list(np.random.choice(flickr, n + len(valid), False))
    fname = animals + notanimals
    y = np.array([CATDOG]*len(animals) + [NOTCATDOG]*len(notanimals))
    classes = list(set(y))
    v_cat_dog_idx = range(n, n + len(valid))
    v_not_idx = range(len(animals) + n, len(fnames))
    val_idxs = list(v_cat_dog_idx) + list(v_not_idx)
    return (fnames, y, classes, val_idxs)

# fnames: file names
# y: numpy array which contains target labels ordered by filenames.
# classes: a list of all labels/classifications, [0, 1]
# val_idxs: index of images to be used for validation.

In [30]:
fnames, y, classes, val_idxs = prep(train_cats, valid_cats)

In [14]:
catdogs = list(np.random.choice(train_cats, n, False)) + list(valid_cats)
notcatdogs = list(np.random.choice(flickr, n + 1000, False))
fnames = catdogs + notcatdogs
y = np.array([CATDOG]*len(catdogs) + [NOTCATDOG]*len(notcatdogs))
classes = list(set(y))
v_cat_dog_idx = range(n, n + len(valid_cats))
v_not_idx = range(len(catdogs) + n, len(fnames))
val_idxs = list(v_cat_dog_idx) + list(v_not_idx)

In [31]:
# Let's set up our model using the pretrained Resnet34 Imagenet model
arch=resnet34
data = ImageClassifierData.from_names_and_array(PATH, fnames, y, classes, \
                                                val_idxs, bs=bs, \
                                                tfms=tfms_from_model(arch, sz))
learn = ConvLearner.pretrained(arch, data, precompute=True)

In [32]:
# Use a learning rate of 0.01 and train for 5 epochs
lr = 0.01
epochs = 5
learn.fit(lr, epochs)

HBox(children=(IntProgress(value=0, description='Epoch', max=5), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                                                                              
    0      0.122568   0.028269   0.9905    
    1      0.063907   0.02063    0.9925                                                                                
    2      0.037626   0.019258   0.9935                                                                                
    3      0.02648    0.018233   0.993                                                                                 
    4      0.021621   0.017585   0.9935                                                                                



[array([0.01758]), 0.9935]

In [25]:
# Create our prediction function
def predict(learner, pred_files):
    orig_precompute = learner.precompute
    learner.precompute = False
    trn_tfms, val_tfms = tfms_from_model(arch, sz)
    ds = FilesIndexArrayDataset(list(pred_files), np.zeros(len(pred_files)), val_tfms, PATH)
    dl = DataLoader(ds)
    log_preds = learner.predict_dl(dl)
    preds = np.exp(log_preds)
    results = np.argmax(preds, axis=1)
    learner.precompute = orig_precompute
    return results

In [27]:
# Now try to predict on dogs
pred_dogs = predict(learn, valid_dogs)
print(sum(pred_dogs))

876
