In [1]:
import os
import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
import pickle
from pprint import pprint
from IPython.display import clear_output
import traceback

import keras
from keras import backend as K
from keras.applications.vgg16 import VGG16
from keras.callbacks import ModelCheckpoint, ProgbarLogger, TensorBoard, CSVLogger
from keras.layers import Dense, GlobalAveragePooling2D, LSTM, Embedding, TimeDistributed, \
                         RepeatVector, Merge, Activation, Flatten
from keras.models import Model, Sequential
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.text import text_to_word_sequence, Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Viz
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG


rseed = 4444 # use this seed for any functions that utilizes randomness
EMBEDDING_DIM = 128 # how many dimensions we're embedding our image
GLOVE_DIM = 100

%matplotlib inline

Using TensorFlow backend.


In [2]:
DATASET_PATH = '../Flickr8k_Dataset' # path to where the images are located
CAPTION_PATH = '../Flickr8k_Captions/Flickr8k.lemma.token.txt' # Lemmatized tokens
TRAIN_PATH = '../Flickr8k_Captions/Flickr_8k.trainImages.txt' # Ids of Images used for training
TEST_PATH = '../Flickr8k_Captions/Flickr_8k.testImages.txt' # Ids of images used for testing
GLOVE_DIR = '../glove.6B'

# Build Class For Captions

In [3]:
class CaptionGenerator():

    def __init__(self):
        # this is instantiated in variable_initializer where it maintains the largest caption size
        self.max_cap_len = None
        self.vocab_size = None
        self.index_word = None
        self.word_index = None
        self.total_samples = None
        self.training_samples = None
        self.test_samples = None
        self.df = None
        self.encoded_images = pickle.load( open( "encoded_images.pkl", "rb" ) )
        self.variable_initializer()

    def variable_initializer(self):
        # load the df and clean the pound signs at the end of the images
        self.df = pd.read_csv(CAPTION_PATH,
                              sep='\t',
                              header=None,
                              names=['image', 'caption'])
        self.df['image'] = self.df.image.str.replace(r'#\d$', '')
        
        train_df = pd.read_csv(TRAIN_PATH,
                               header=None,
                               names=['image'])
        test_df = pd.read_csv(TEST_PATH,
                              header=None,
                              names=['image'])
        
        # merge the training and test set into a single df and add the start and end tags
        train_df = pd.merge(train_df, self.df, on='image')
        train_df['label'] = 'train'
        test_df = pd.merge(test_df, self.df, on='image')
        test_df['label'] = 'test'
        self.df = pd.concat([train_df, test_df])
        self.df['caption'] = self.df.caption.apply(lambda cap: '<start> ' + cap + ' <end>')
        
        # add all the captions
        caps = []
        for row in self.df.iterrows():
            caps.append(row[1][1])
        
        # This calculates the total, training and test samples (aka observations).
        # This data is used in the data generator, and we remove 1 because when building
        # the dataset for training and testing, we always guess UP to the last word.
        self.total_samples=0
        for text in caps:
            self.total_samples += len(text.split()) - 1 # store the amount of data we have
        print("Total samples :", self.total_samples)
        
        self.training_samples = 0
        for row in self.df[self.df.label == 'train'].iterrows():
            self.training_samples += len(row[1][1].split()) - 1
        print("Training samples:", self.training_samples)
        
        self.test_samples = 0
        for row in self.df[self.df.label == 'test'].iterrows():
            self.test_samples += len(row[1][1].split()) - 1
        print("Test samples:", self.test_samples)
        
        # This builds our vocabulary. We use this to reference what index means what word, and vice versa.
        words = [text.split() for text in caps] # flatten to a list containing all words from the captions
        unique = set() # prepare a list to add in all words
        for word in words:
            unique.update(word) # add each word found

        self.vocab_size = len(unique)
        self.word_index = {}
        self.index_word = {}
        for i, word in enumerate(unique):
            # be able to look up corresponding index to word and vice versa
            self.word_index[word]=i # word  -> index 
            self.index_word[i]=word # index -> word
        
        # Determines what the largest caption is
        max_len = 0
        for caption in caps:
            if(len(caption.split()) > max_len): # checks if this caption is larger than the max
                max_len = len(caption.split()) # if so, rewrites the maximum
        self.max_cap_len = max_len
        
        print("Vocabulary size:", self.vocab_size)
        print("Maximum caption length:", self.max_cap_len)
        print("Variables initialization done!")

    def build_embedding_layer(self):
        print('Building embedding layer.')
        embeddings_index = {}
        with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f:
            for line in f:
                values = line.split()
                word = values[0]
                coefs = np.asarray(values[1:], dtype='float32')
                embeddings_index[word] = coefs

        print('Found %s word vectors within glove.' % len(embeddings_index))
        
        embedding_matrix = np.zeros((self.vocab_size, GLOVE_DIM))
        for word, index in self.word_index.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                # words not found in embedding index will be all-zeros.
                embedding_matrix[index] = embedding_vector
                
        embedding_layer = Embedding(self.vocab_size,
                            GLOVE_DIM,
                            weights=[embedding_matrix],
                            input_length=self.max_cap_len,
                            trainable=False)
        
        return embedding_layer
        
    
    def data_generator(self, data='train', batch_size = 32):
        partial_caps = [] # our features, it holds a list of partial captions
        next_words = [] # our predictors, holds a list of vectors that is the next word in a partial caption
        images = [] # list of numpy arrays of images for each partial caption
        batch_count = 0 # maintains the number of batches we've built
        samples_built = 0 # maintains track of how many samples within the batch we've built
        
        print("Generating data...")
        
        caps = [] # array to store captions
        imgs = [] # array to store paths of images
        
        # get the training data or test data
        for row in self.df[self.df.label == data].iterrows():
            imgs.append(row[1][0]) # add the images
            caps.append(row[1][1]) # add the caption
        
        # We want this to continually run while our model is training!
        # This continuously returns batches 
        while True: 
            current_image = ''
            
            # start cycling through the captions data, each full caption at a time, and when it's done
            for index,text in enumerate(caps):
                
                # make sure we're not reloading the same image constantly
                if current_image != imgs[index]:
                    # load's the current image associated with the index
                    current_image = imgs[index] # self.load_image(DATASET_PATH + '/' + imgs[index])
                    img_encoding = self.encoded_images.loc[current_image]
                
                # cycle through the entire string length of the caption up until the last one, as we'll
                # need that for our prediction.
                words_in_caption = text.split()
                for i in range(len(text.split())-1): 
                    
                    samples_built += 1
                    
                    # We first build a partial list of words in a caption, where each element of the list is an index 
                    # to the word. We then append them to a list where we maintain track of what the partial
                    # captions are for a particular sample.
                    partial = [self.word_index[txt] for txt in words_in_caption[:i+1]] 
                    partial_caps.append(partial)
                    
                    # This stores the next word in the partial sequence above. We retrieve the next word
                    # in the sequence we're working on right now, then flip it to 1 for it's respective index.
                    next_word = np.zeros(self.vocab_size)
                    next_word[ self.word_index[ words_in_caption[i+1] ]] = 1
                    
                    next_words.append(next_word)
                    images.append(img_encoding)
                    
                    # Check if we hit the batch size, and return the features (X) and predictors (y).
                    if samples_built >= batch_size:
                        
                        # prepare data for Neural Net by creating numpy arrays
                        next_words = np.asarray(next_words)
                        images = np.asarray(images)
                        
                        # pad the partial captions so that they're of uniform length, where the length is the 
                        # size of the largest caption.
                        partial_caps = pad_sequences(partial_caps, maxlen=self.max_cap_len, padding='post')
                        
                        batch_count += 1
                        if batch_count % 50 == 0:
                            with open("batch_watch.log", "a") as f:
                                f.write("Training on Batch #: {}\n".format(batch_count))
                        
                        yield [[images, partial_caps], next_words]
                        
                        # reset the feature variables
                        partial_caps = []
                        next_words = []
                        images = []
                        samples_built = 0
    
    def load_image(self, path):
        img = image.load_img(path, target_size=(224,224))
        x = image.img_to_array(img)
        return np.asarray(x)

    def create_model(self, ret_model=False, include_base=False):
        
        # Handles the image encoded features
        image_model = Sequential()
        image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu'))
        image_model.add(RepeatVector(self.max_cap_len))

        # Initial Embedding of the Language
        lang_model = Sequential()
        # lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_cap_len))
        lang_model.add(self.build_embedding_layer())
        lang_model.add(LSTM(256,return_sequences=True))
        lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM)))

        # The final layer
        model = Sequential()
        model.add(Merge([image_model, lang_model], mode='concat'))
        model.add(LSTM(1000,return_sequences=False, dropout=0.2))
        model.add(Dense(self.vocab_size, activity_regularizer=keras.regularizers.l2())) 
        model.add(Activation('softmax'))

        print("Model created!")

        if(ret_model == True):
            return model

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def get_word(self,index):
        return self.index_word[index]

In [6]:
caption_gen = CaptionGenerator()

Total samples : 446798
Training samples: 382760
Test samples: 64038
Vocabulary size: 6804
Maximum caption length: 40
Variables initialization done!


In [11]:
from keras.utils import plot_model
plot_model(caption_model, to_file='model.png', show_shapes=False, show_layer_names=False)

In [10]:
SVG(model_to_dot(caption_model, show_shapes=False).create(prog='dot', format='svg'))

ExpatError: not well-formed (invalid token): line 1, column 0

In [None]:
caption_model.summary()

# Encode Images
You want to encode your images using the last layer of VGG16 (or any base model) before dumping it into the model.

In [None]:
encoding_model = VGG16()
layer_name = 'fc2'
intermediate_layer_model = Model(inputs=encoding_model.input,
                                 outputs=encoding_model.get_layer(layer_name).output)
# intermediate_output = intermediate_layer_model.predict(data)

In [None]:
# Uncomment this if you want to check out the model
# SVG(model_to_dot(encoding_model, show_shapes=True).create(prog='dot', format='svg'))

In [None]:
def encode_img(img_path, model):
    img = image.load_img(DATASET_PATH + '/' + img_path, target_size=(224,224))
    img = image.img_to_array(img)
    img = np.asarray(img)
    img = img[np.newaxis,:,:,:]
    return intermediate_layer_model.predict(img).flatten()[np.newaxis,:]

In [None]:
encoded_images = []
for index, img_name in enumerate(caption_gen.df.image.unique()):
    print('Encoding image:', index)
    img = caption_gen.load_image(DATASET_PATH + '/' + img_name)
    img = img[np.newaxis,:,:,:]
    pred = intermediate_layer_model.predict(img).flatten()
    
    encoded_images.append([img_name, pred])

df = pd.DataFrame(encoded_images, columns=['image', 'encoding'])
df = df.sort_values('image')
df = df.set_index('image')
# df = df.encoding.apply(lambda x: x.flatten()) # this won't be needed
df.to_pickle('encoded_images.pkl')

# Training the model

In [None]:
def train_model(cg, model, weight=None, batch_size=32, epochs=10, initial_epoch=0):

    if weight != None:
        model.load_weights(weight)

    counter = 0
    file_name = './weights_regularization/glove_weights.{epoch:02d}-{loss:.2f}.hdf5'
    
    # Callbacks
    checkpoint = ModelCheckpoint(file_name, monitor='loss', verbose=1, save_best_only=False, mode='min')
    progbar = ProgbarLogger(count_mode='steps')
    tboard = TensorBoard(log_dir='./logs_regularization', histogram_freq=2,
                         write_graph=True, write_images=False)
    csv_logger = CSVLogger('keras_logs_regularization', separator=',', append=True)
    callbacks_list = [checkpoint, progbar, tboard, csv_logger]
    
    # Pass in the data generator and train
    try:
        model.fit_generator(cg.data_generator(batch_size=batch_size, data='train'),
                            steps_per_epoch=cg.training_samples/batch_size,
                            validation_data=cg.data_generator(batch_size=batch_size, data='test'),
                            validation_steps=cg.test_samples/batch_size,
                            epochs=epochs,
                            verbose=2,
                            callbacks=callbacks_list,
                            initial_epoch=initial_epoch)
    except Exception as e:
        with open('error.log', 'w') as f:
            f.write(traceback.format_exc())
    
    # Save your work!
    try:
        model.save('../Models/WholeModel.h5', overwrite=True)
        model.save_weights('../Models/Weights.h5',overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")

In [7]:
caption_model = caption_gen.create_model()

Building embedding layer.
Found 400000 word vectors within glove.




Model created!


In [None]:
train_model(caption_gen, caption_model, batch_size=32, epochs=30)

# Testing the Model

#### Functions for testing used

In [None]:
def process_caption(caption):
    caption_split = caption.split()
    processed_caption = caption_split[1:]
    
    try:
        end_index = processed_caption.index('<end>')
        processed_caption = processed_caption[:end_index]
    except:
        pass
    
    return " ".join([word for word in processed_caption])

In [None]:
def get_best_caption(captions):
    captions.sort(key = lambda l:l[1])
    best_caption = captions[-1][0]
    print(best_caption)
    return " ".join([caption_gen.index_word[index] for index in best_caption])

In [None]:
def get_all_captions(captions):
    final_captions = []
    captions.sort(key = lambda l:l[1])
    
    for caption in captions:
        text_caption = " ".join([caption_gen.index_word[index] for index in caption[0]])
        final_captions.append([text_caption, caption[1]])
        
    return final_captions

In [None]:
def generate_captions(model, image, beam_size):
    # this is setting the initial sequence to be the start tag, and 0 ??
    start = [cg.word_index['<start>']]
    captions = [[start,0.0]]
    
    while(len(captions[0][0]) < cg.max_cap_len):
        temp_captions = []
        for caption in captions:
            partial_caption = sequence.pad_sequences([caption[0]], maxlen=cg.max_cap_len, padding='post')
            next_words_pred = model.predict([np.asarray([image]), np.asarray(partial_caption)])[0]
            next_words = np.argsort(next_words_pred)[-beam_size:]
            
            for word in next_words:
                new_partial_caption, new_partial_caption_prob = caption[0][:], caption[1]
                new_partial_caption.append(word)
                new_partial_caption_prob += next_words_pred[word]
                temp_captions.append([new_partial_caption,new_partial_caption_prob])
        
        captions = temp_captions
        captions.sort(key = lambda l:l[1])
        captions = captions[-beam_size:]

    return captions

In [None]:
def test_model(weight, img_name, beam_size = 3):
    encoded_images = pickle.load( open( "encoded_images.p", "rb" ) )
    model = cg.create_model(ret_model = True)
    model.load_weights(weight)

    image = encoded_images[img_name]
    captions = generate_captions(model, image, beam_size)
    return process_caption(get_best_caption(captions))
    #return [process_caption(caption[0]) for caption in get_all_captions(captions)] 

In [None]:
def bleu_score(hypotheses, references):
    return nltk.translate.bleu_score.corpus_bleu(references, hypotheses)

In [None]:
def test_model_on_images(cg, model, beam_size = 3):
    # preparing to generate predictions
    imgs = cg.df[cg.df.label == 'test'].image
#     full = {}
    
    # logic to load the model and weights if no model was input
#     if model:
#         print('Using Model')
#     else:
#         model = cg.create_model(ret_model = True)
#         model.load_weights(weight)
    
    # getting the predictions & best one for each image
    for count, img_name in enumerate(imgs[:1]):
        print("Predicting for image:", count)
        
        # load in the image, and add the extra dimension needed for the input to the model
        # i think this extra dimension denotes the sample, such that i could pass in multiple samples
        # and get multiple outputs?
        image = cg.load_image(DATASET_PATH + '/' + img_name)[np.newaxis,:,:,:]
        
        # this is setting the initial sequence to use the start tag, and total probability of the caption be 0
        start = cg.word_index['<start>']
#         prob_counter = 0.0
#         captions = [[[start], prob_counter]]
        captions = [start]

        # this will cycle through the sequence until we hit the maximum caption length.
        # captions[0][0] is the sequence we're building, it only started with <start> initially
        while(len(captions) < cg.max_cap_len):
            # preparing the temporary storage of the caption we're building?
#             temp_captions = []
            
            # go through the captions we have (i don't know how this is built)
#             for caption in captions:
            # pad the sequence so that it fits the input
            partial_caption = pad_sequences([captions], maxlen=cg.max_cap_len, padding='post')

            # pass the image and caption into the model
            next_word_pred = model.predict([image, partial_caption])[0]

            # sort, and get the indicies of highest probable words (the values at the end of the list)
            # have to understand what 'beam' does. It might be a beam search, but i'm not sure what that is.
            next_word = np.argsort(next_word_pred)[-1]#[-beam_size:]
            
            captions.append(next_word)

#                 # go through the possible words
#                 for word in next_words:
#                     # temporarily store the caption, and the previous probability of the caption,
#                     # then append the news word to the caption
#                     new_partial_caption, new_partial_caption_prob = caption[0][0], caption[1]
#                     new_partial_caption.append(word)
                
#                     # add the probability to the total
#                     # next_words_pred[word] gets the probability of that word
#                     new_partial_caption_prob += next_words_pred[word]
                    
#                     # appends the list new_partial_caption and the associated probability to the list
#                     # new_partial_caption_prob is a scalar
#                     # an example ouput is [[12,8499, 41], 2.34]
#                     temp_captions.append([new_partial_caption, new_partial_caption_prob])
            
#             captions = temp_captions
#             captions.sort(key = lambda l:l[1])
#             print(captions)
#             captions = captions[-beam_size:]
#             print(captions)
    
    full_caption = []
    for word in captions:
        full_caption.append(caption_gen.index_word[word])
    
    return " ".join(full_caption[1:])
        
        
#         best_caption = process_caption(get_best_caption(image_captions))
#         captions[img_name] = best_caption
#         print img_name+" : "+str(best_caption)
#         f_pred_caption.write(img_name+"\t"+str(best_caption))
#         f_pred_caption.flush()
    
    # close the predictions we're making
#     f_pred_caption.close()
    
#     # getting the caption for each image?
#     f_captions = open('Flickr8k_text/Flickr8k.token.txt', 'rb')
#     captions_text = f_captions.read().strip().split('\n')
#     image_captions_pair = {}
#     for row in captions_text:
#         row = row.split("\t")
#         row[0] = row[0][:len(row[0])-2]
#         try:
#             image_captions_pair[row[0]].append(row[1])
#         except:
#             image_captions_pair[row[0]] = [row[1]]
#     f_captions.close()
    
#     # building the bleu score
#     hypotheses=[]
#     references = []
#     for img_name in imgs:
#         hypothesis = captions[img_name]
#         reference = image_captions_pair[img_name]
#         hypotheses.append(hypothesis)
#         references.append(reference)

#     return bleu_score(hypotheses, references)

In [None]:
def predict_caption(cg, model, image):
    start = cg.word_index['<start>']
    captions = [start]

    # this will cycle through the sequence until we hit the maximum caption length.
    while(len(captions) < cg.max_cap_len):
        # pad the sequence so that it fits the input
        partial_caption = pad_sequences([captions], maxlen=cg.max_cap_len, padding='post')

        # pass the image and caption into the model
        next_word_pred = model.predict([image, partial_caption])[0]

        # get the largest predictor
        next_word = np.argsort(next_word_pred)[-1]

        captions.append(next_word)
    
    full_caption = []
    for word in captions:
        full_caption.append(cg.index_word[word])
    
    return " ".join(full_caption[1:])

In [None]:
61750/(caption_gen.total_samples/32)

In [None]:
# pickle.dump(caption_gen.word_index, open('word_index.pkl', 'wb'))
# pickle(caption_gen.word_index
# caption_gen.index_word

In [None]:
weight = 'weights/glove_weights.19-5.71.hdf5'
test_model = caption_gen.create_model(ret_model=True)
test_model.load_weights(weight)

In [None]:
test_image = '241374292_11e3198daa.jpg'
predict_caption(caption_gen, test_model, encode_img(test_image, test_model))

In [None]:
test_model_on_images(caption_gen, test_model, beam_size=3)

# Testing

In [None]:
# words_in_caption = text.split()
# for i in range(len(text.split())-1): # cycle through the entire string length of the caption
#     total_count+=1 # i'm assuming this keeps track of the 'windows' you have
#     partial = [self.word_index[txt] for txt in text.split()[:i+1]] # this builds the 
#     partial_caps.append(partial)
#     next1 = np.zeros(self.vocab_size)
#     next1[ self.word_index[ text.split()[i+1] ]] = 1

word_index = {'the':0, 'cat':1, 'ran':2, 'fast':3, 'and':4, 
              'leapt':5, 'off':6, 'couch':7, 'cow':8, 'dog':9,
              'car':10}

caps = ['the cat ran fast and leapt off the couch',
        'couch and cow and dog car ran and leapt']
max_cap_len = 9
batch_size = 5
vocab_size = len(word_index)
total_count = 0
partial_caps = list()
next_words = list()
gen_count = 0

# text = each individual caption
for text in caps:
    words_in_caption = text.split() # split them into words
    for i in range(len(words_in_caption) - 1):
            total_count += 1
            
            partial = [word_index[txt] for txt in words_in_caption[:i+1]]
#             print(partial)
            partial_caps.append(partial)

            next1 = np.zeros(vocab_size, dtype=np.int)
            next1[ word_index[ words_in_caption[i+1]] ] = 1
            print(next1)

            next_words.append(next1)

            if total_count > batch_size:
                partial_caps = pad_sequences(partial_caps, maxlen=max_cap_len, padding='post')
                
                total_count = 0
                gen_count += 1
                # pprint("yielding count:", gen_count)
                # pprint( [['img',partial_caps], next_words] )
                        
                partial_caps = []
                next_words = []
                images = []

                # print(partial)
# pprint(partial_caps)
# pprint(next_words)

In [None]:
partial

In [None]:
np.asarray(next_words)

In [None]:
len(pd.Index(sum([x.split()[:-1] for x in caps], [])))

In [None]:
pd.DataFrame(next_words, columns=list(word_index.keys()), 
             index=pd.Index(sum([x.split()[:-1] for x in caps], [])))

In [None]:
print(partial_caps)

In [None]:
next1

In [None]:
caption_gen = CaptionGenerator()

In [None]:
keys = list(test1.word_index.keys())
test1.word_index[keys[10]]

In [None]:
keys[:10]

In [None]:
test_model = caption_gen.create_model()

In [None]:
test_model.layers[0].layers[1].layers[0].input

# Prepare Model

In [None]:
base_model = Xception(include_top=False, weights='imagenet')

# Create your own input format (here 3x200x200)
# input = Input(shape=(3,200,200),name = 'image_input')
layer1 = base_model.output
layer1 = GlobalAveragePooling2D()(layer1) # add a global spatial average pooling layer
layer1 = Dense(1024, activation='relu')(layer1) # let's add a fully-connected layer

# and a logistic layer -- let's say we have 200 classes
predictions = Dense(200, activation='softmax')(layer1)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
model.summary()

In [None]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [None]:
def generate_arrays_from_file(path):
    while 1:
        f = open(path)
        for line in f:
            # create numpy arrays of input data
            # and labels, from each line in the file
            x, y = process_line(line)
            img = load_images(x)
            yield (img, y)
        f.close()

In [None]:
# train the model on the new data for a few epochs
model.fit_generator(...)

In [None]:
# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
    layer.trainable = False
for layer in model.layers[172:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit_generator(...)