# Development: Fine-Tuning Pre-Trained Models
Load a previously trained model, freeze most of its layers, and then train it on data from a different city.
  
Date: 2018-12-06  
Author: Peter Kerins

### Import statements
(may be over-inclusive)

In [1]:
# typical, comprehensive imports
import warnings
warnings.filterwarnings('ignore')
#
import os
import sys
import json
import itertools
import pickle
from pprint import pprint
#
import numpy as np
import shapely
import cartopy
import geojson
import fiona
import gdal
import h5py
get_ipython().magic(u'matplotlib inline')
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler 
import ogr, gdal
from keras.models import load_model
import math
from keras.utils import to_categorical
from keras.models import load_model
from keras import models
from keras import layers
from keras.layers import Dropout
from keras.utils import to_categorical
from skimage import measure
from skimage import filters
from scipy import stats

import tensorflow as tf

import keras
import keras.backend as K
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Add, Lambda
from keras.callbacks import EarlyStopping, ModelCheckpoint, History

import collections

import descarteslabs as dl
print dl.places.find('illinois') ## TEST

#ULU_REPO = os.environ["ULU_REPO"]
#sys.path.append(ULU_REPO+'/utils')
sys.path.append('/home/Peter.Kerins/UrbanLandUse/utils')
print sys.path

import util_descartes
import util_ml
import util_rasters
import util_vectors
import util_workflow
import util_keras


Using TensorFlow backend.


[{u'name': u'Illinois', u'id': 85688697, u'placetype': u'region', u'bbox': [-91.512974, 36.970298, -87.019935, 42.508302], u'path': u'continent:north-america_country:united-states_region:illinois', u'slug': u'north-america_united-states_illinois'}]
['', '/usr/lib/python2.7', '/usr/lib/python2.7/plat-x86_64-linux-gnu', '/usr/lib/python2.7/lib-tk', '/usr/lib/python2.7/lib-old', '/usr/lib/python2.7/lib-dynload', '/home/Taufiq.Rashid/.local/lib/python2.7/site-packages', '/usr/local/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages/PILcompat', '/usr/lib/python2.7/dist-packages/gtk-2.0', '/usr/local/lib/python2.7/dist-packages/IPython/extensions', '/home/Taufiq.Rashid/.ipython', '/home/Peter.Kerins/UrbanLandUse/utils']


### Set key variables

In [2]:
data_root='/data/phase_iii/'
#data_path=data_root+place+'/'

bands=['blue','green','red','nir','swir1','swir2','alpha']; suffix='BGRNS1S2A'  # S2, Lx
resolution=5  # Lx:15 S2:10

s1_bands=['vv','vh']; s1_suffix='VVVH'
s1_resolution = resolution

ndvi_bands = ['raw'];
#ndvi_bands = ['raw','min','max'];
# ndbi_bands = ['raw','min','max'];

tile_resolution = resolution
tile_size = 256
tile_pad = 16

# ground truth source: aue, aue+osm, aue+osm2
label_suffix = 'aue'

# NYU AoUE land use/land cover categories
category_label = {0:'Open Space',1:'Non-Residential',\
                   2:'Residential Atomistic',3:'Residential Informal Subdivision',\
                   4:'Residential Formal Subdivision',5:'Residential Housing Project',\
                   6:'Roads',7:'Study Area',8:'Labeled Study Area',254:'No Data',255:'No Label'}

### Specify training data & training parameters

In [3]:
window = 17

cats_map = {}
cats_map[0] = 0
cats_map[1] = 1
cats_map[2] = 4
cats_map[3] = 4
cats_map[4] = 4
cats_map[5] = 4
cats_map[6] = 6

bands_vir=bands[:-1]
bands_sar=None#s1_bands
bands_ndvi=ndvi_bands#None
bands_ndbi=None
bands_osm=None

haze_removal = False


epochs = 500
batch_size = 128
balancing = None

In [4]:
stack_label, feature_count = util_workflow.build_stack_label(
        bands_vir=bands_vir,
        bands_sar=bands_sar,
        bands_ndvi=bands_ndvi,
        bands_ndbi=bands_ndbi,
        bands_osm=bands_osm,)
print stack_label, feature_count

vir+ndvi 7


### Build training data "superset"
The "superset" of training data used to actually train a model can be a combination of any number of stored training data sets built from particular input stacks. These constituent sets can come from one or many cities. The only requirement is that they are all consistent in construction, ie are built from the same input stack.

In [5]:
reload(util_workflow)
place_images = {}

place_images['sitapur'] = ['Q','R','T','U','V']

X_train_raw, Y_train_raw, X_valid_raw, Y_valid_raw = util_workflow.load_datasets(place_images, data_root, label_suffix, stack_label, window, resolution=resolution)

calculate total size of training and validation supersets
/data/phase_iii/sitapur/sitapur_train_aue_5m_vir+ndvi_17w_Q.pkl
/data/phase_iii/sitapur/sitapur_valid_aue_5m_vir+ndvi_17w_Q.pkl
(120785, 2023) (120785,) (51766, 2023) (51766,)
/data/phase_iii/sitapur/sitapur_train_aue_5m_vir+ndvi_17w_R.pkl
/data/phase_iii/sitapur/sitapur_valid_aue_5m_vir+ndvi_17w_R.pkl
(120785, 2023) (120785,) (51766, 2023) (51766,)
/data/phase_iii/sitapur/sitapur_train_aue_5m_vir+ndvi_17w_T.pkl
/data/phase_iii/sitapur/sitapur_valid_aue_5m_vir+ndvi_17w_T.pkl
(120785, 2023) (120785,) (51766, 2023) (51766,)
/data/phase_iii/sitapur/sitapur_train_aue_5m_vir+ndvi_17w_U.pkl
/data/phase_iii/sitapur/sitapur_valid_aue_5m_vir+ndvi_17w_U.pkl
(120785, 2023) (120785,) (51766, 2023) (51766,)
/data/phase_iii/sitapur/sitapur_train_aue_5m_vir+ndvi_17w_V.pkl
/data/phase_iii/sitapur/sitapur_valid_aue_5m_vir+ndvi_17w_V.pkl
(120785, 2023) (120785,) (51766, 2023) (51766,)
603925 258830
construct np arrays for supersets
populate super

### Scale training data

In [6]:
X_train_raw_scaled, X_valid_raw_scaled, scaler = util_ml.scale_learning_data(X_train_raw, X_valid_raw)
print X_train_raw_scaled.shape,  X_valid_raw_scaled.shape

(603925, 2023) (258830, 2023)
(603925, 2023) (258830, 2023)


In [7]:
del X_train_raw, X_valid_raw

In [8]:
X_train = X_train_raw_scaled.reshape((X_train_raw_scaled.shape[0],feature_count,window,window))
X_valid = X_valid_raw_scaled.reshape((X_valid_raw_scaled.shape[0],feature_count,window,window))
#print(X_train_raw[0])
#print(X_train[0])

In [9]:
del X_train_raw_scaled, X_valid_raw_scaled

In [10]:
print X_train.shape

(603925, 7, 17, 17)


### Prepare training data for ingestion
This version of the workflow does not include class-balancing

Remap original set of categories (from the Atlas of Urban Expansion) encoded in the ground-truth files and the training data files to a target typology (eg collapsing all residential LULC types to a single category)

In [11]:
Y_train = Y_train_raw.copy()
Y_valid = Y_valid_raw.copy()

In [12]:
for k, v in cats_map.items():
    Y_train[Y_train_raw==k] = v
    Y_valid[Y_valid_raw==k] = v
    
print Y_train_raw.shape
print Y_train.shape

(603925,)
(603925,)


In [13]:
del Y_train_raw, Y_valid_raw

Remove roads

In [14]:
non_roads = np.where(Y_train!=6)
Y_train = Y_train[non_roads]
X_train = X_train[non_roads]
non_roads = np.where(Y_valid!=6)
Y_valid = Y_valid[non_roads]
X_valid = X_valid[non_roads]

Now reduce the remapped values to the categorical values required by the Keras library. These pre-processing steps could be consolidated, but represent two separate conceptual parts of the workflow, so are executed separately

In [15]:
Y_t = Y_train.copy()
Y_v = Y_valid.copy()

Y_t[Y_train==0] = 0
Y_t[Y_train==1] = 1
Y_t[Y_train==4] = 2
Y_t[Y_train==6] = 3

Y_v[Y_valid==0] = 0
Y_v[Y_valid==1] = 1
Y_v[Y_valid==4] = 2
Y_v[Y_valid==6] = 3

Change Y data to one-hot structure

In [16]:
categories_reduced = [0,1,2]

Y_t_cat = to_categorical(Y_t)
Y_v_cat = to_categorical(Y_v)

In [17]:
print Y_t.shape
print Y_t_cat.shape

(541725,)
(541725, 3)


### Prepare Keras variables

In [18]:
print("DEFAULT:",K.image_data_format())
K.set_image_data_format('channels_first')
print("UPDATED:",K.image_data_format())

('DEFAULT:', 'channels_last')
('UPDATED:', 'channels_first')


In [19]:
NB_FILTERS_1=32
NB_FILTERS_2=64

### Load pre-trained model

In [36]:
original_model_id = '3cat_Hin_U-AB'
weights_label='WCC_weights.best'
filepath = data_root+'models/'+original_model_id+'_'+weights_label+'.hdf5'

In [37]:
# rebuild network
network=util_keras.build_model(util_keras.doubleres_block,input_shape=X_train.shape[-3:],output_nodes=len(categories_reduced))
# load weights from fast learning
network.load_weights(filepath)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 7, 17, 17)    0                                            
__________________________________________________________________________________________________
conv2d_21 (Conv2D)              (None, 32, 17, 17)   5632        input_3[0][0]                    
__________________________________________________________________________________________________
conv2d_23 (Conv2D)              (None, 32, 17, 17)   2048        input_3[0][0]                    
__________________________________________________________________________________________________
activation_21 (Activation)      (None, 32, 17, 17)   0           conv2d_21[0][0]                  
__________________________________________________________________________________________________
activation

### Define variables and prepare objects for new, fine-tuned model

In [31]:
model_id = '3cat_Sit_QRTUV_tuned'
notes = 'test of fine-tuning. sitapur QRTUV based on trained hindupur model \'3cat_Hin_U-AB\'.3-category; 5m bilinear with plain aue, green images from 2017 for Sitapur'
weights_label='WCC_weights_tuned.best'

In [32]:
# weights and loss
weights = util_ml.generate_category_weights(place_images,category_label,label_suffix,stack_label,window,data_root,use_log=True,
                                   columns=['image_name','Open Space','Non-Residential','Residential-Total'],resolution=resolution)
print 'weights', weights
loss = util_keras.weighted_categorical_crossentropy(weights)

weights [1.0, 2.7794092921593903, 1.2122760272800606]


### Prepare network and training objects

In [38]:
callbacks, history_path = util_keras.create_callbacks(data_root, model_id, weights_label=weights_label, patience=3)
util_keras.compile_network(network, loss, LR=0.0001)

### "Option 1": training classifier from scratch
Not included here, as there is a separate notebook for that (should be titled something like "core_train-model-3category")

### "Option 2": starting with pre-trained weights but training all parameters

In [26]:
# train slow
history_tuned = network.fit(X_train, Y_t_cat, batch_size=batch_size, epochs=epochs, validation_data=(X_valid, Y_v_cat),shuffle=True,callbacks=callbacks)

Train on 541725 samples, validate on 232420 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500

KeyboardInterrupt: 

In [None]:
plt.plot(history_slow.history['val_acc'])
plt.show()
plt.plot(history_slow.history['val_loss'])
plt.show()

In [None]:
print "evaluate training"
Yhat_t_prob = network.predict(X_train)
Yhat_t = Yhat_t_prob.argmax(axis=-1)
train_confusion = util_ml.calc_confusion(Yhat_t,Y_t,categories_reduced)
train_recalls, train_precisions, train_accuracy = util_ml.calc_confusion_details(train_confusion)

# Calculate f-score
beta = 2
train_f_score = (beta**2 + 1) * train_precisions * train_recalls / ( (beta**2 * train_precisions) + train_recalls )
train_f_score_open = train_f_score[0] 
train_f_score_nonres = train_f_score[1]  
train_f_score_res = train_f_score[2]  
train_f_score_roads = None#train_f_score[3]  
train_f_score_average = np.mean(train_f_score)

print "evaluate validation"
Yhat_v_prob = network.predict(X_valid)
Yhat_v = Yhat_v_prob.argmax(axis=-1)
valid_confusion = util_ml.calc_confusion(Yhat_v,Y_v,categories_reduced)
valid_recalls, valid_precisions, valid_accuracy = util_ml.calc_confusion_details(valid_confusion)

# Calculate f-score
valid_f_score = (beta**2 + 1) * valid_precisions * valid_recalls / ( (beta**2 * valid_precisions) + valid_recalls )
valid_f_score_open = valid_f_score[0] 
valid_f_score_nonres = valid_f_score[1] 
valid_f_score_res = valid_f_score[2] 
valid_f_score_roads = None# valid_f_score[3] 
valid_f_score_average = np.mean(valid_f_score)

# expanding lists to match expected model_record stuff
train_recalls_expanded = [train_recalls[0],train_recalls[1],train_recalls[2],None]
valid_recalls_expanded = [valid_recalls[0],valid_recalls[1],valid_recalls[2],None]
train_precisions_expanded = [train_precisions[0],train_precisions[1],train_precisions[2],None]
valid_precisions_expanded = [valid_precisions[0],valid_precisions[1],valid_precisions[2],None]

End "Option 2"

### "Option 3": freeze all but final (flattened) layers and train only those parameters

In [39]:
# freeze relevant layers
for layer in network.layers[:-5]:
    layer.trainable = False

# Check the trainable status of the individual layers
for layer in network.layers:
    print(layer, layer.trainable)

(<keras.engine.topology.InputLayer object at 0x7f5047419a50>, False)
(<keras.layers.convolutional.Conv2D object at 0x7f52221bcf10>, False)
(<keras.layers.convolutional.Conv2D object at 0x7f50473e96d0>, False)
(<keras.layers.core.Activation object at 0x7f5047435ed0>, False)
(<keras.layers.core.Activation object at 0x7f5047380b90>, False)
(<keras.layers.convolutional.Conv2D object at 0x7f5047435c50>, False)
(<keras.layers.convolutional.Conv2D object at 0x7f50903809d0>, False)
(<keras.layers.core.Activation object at 0x7f5047435650>, False)
(<keras.layers.core.Activation object at 0x7f5090380850>, False)
(<keras.layers.convolutional.Conv2D object at 0x7f50474bb990>, False)
(<keras.layers.core.Lambda object at 0x7f50473847d0>, False)
(<keras.layers.core.Lambda object at 0x7f504736f110>, False)
(<keras.layers.core.Lambda object at 0x7f504736f8d0>, False)
(<keras.layers.merge.Add object at 0x7f504736f150>, False)
(<keras.layers.pooling.MaxPooling2D object at 0x7f50903c98d0>, False)
(<keras.l

In [40]:
# train slow
history_tuned = network.fit(X_train, Y_t_cat, batch_size=batch_size, epochs=epochs, validation_data=(X_valid, Y_v_cat),shuffle=True,callbacks=callbacks)

Train on 541725 samples, validate on 232420 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Train on 541725 samples, validate on 232420 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Train on 541725 samples, validate on 232420 samples
Epoch 1/500

KeyboardInterrupt: 

In [41]:
plt.plot(history_tuned.history['val_acc'])
plt.show()
plt.plot(history_tuned.history['val_loss'])
plt.show()

NameError: name 'history_tuned' is not defined

In [42]:
print "evaluate training"
Yhat_t_prob = network.predict(X_train)
Yhat_t = Yhat_t_prob.argmax(axis=-1)
train_confusion = util_ml.calc_confusion(Yhat_t,Y_t,categories_reduced)
train_recalls, train_precisions, train_accuracy = util_ml.calc_confusion_details(train_confusion)

# Calculate f-score
beta = 2
train_f_score = (beta**2 + 1) * train_precisions * train_recalls / ( (beta**2 * train_precisions) + train_recalls )
train_f_score_open = train_f_score[0] 
train_f_score_nonres = train_f_score[1]  
train_f_score_res = train_f_score[2]  
train_f_score_roads = None#train_f_score[3]  
train_f_score_average = np.mean(train_f_score)

print "evaluate validation"
Yhat_v_prob = network.predict(X_valid)
Yhat_v = Yhat_v_prob.argmax(axis=-1)
valid_confusion = util_ml.calc_confusion(Yhat_v,Y_v,categories_reduced)
valid_recalls, valid_precisions, valid_accuracy = util_ml.calc_confusion_details(valid_confusion)

# Calculate f-score
valid_f_score = (beta**2 + 1) * valid_precisions * valid_recalls / ( (beta**2 * valid_precisions) + valid_recalls )
valid_f_score_open = valid_f_score[0] 
valid_f_score_nonres = valid_f_score[1] 
valid_f_score_res = valid_f_score[2] 
valid_f_score_roads = None# valid_f_score[3] 
valid_f_score_average = np.mean(valid_f_score)

# expanding lists to match expected model_record stuff
train_recalls_expanded = [train_recalls[0],train_recalls[1],train_recalls[2],None]
valid_recalls_expanded = [valid_recalls[0],valid_recalls[1],valid_recalls[2],None]
train_precisions_expanded = [train_precisions[0],train_precisions[1],train_precisions[2],None]
valid_precisions_expanded = [valid_precisions[0],valid_precisions[1],valid_precisions[2],None]

evaluate training
0 247965
1 89215
2 204545
[[238332   5467   4166]
 [   136  89049     30]
 [  1708   1638 201199]]
541725 528580 0.9757349208546772
evaluate validation
0 105720
1 38780
2 87920
[[99676  3339  2705]
 [  576 37986   218]
 [ 1599  1216 85105]]
232420 222767 0.958467429653214


End "Option 3"

### Save objects and record model in scorecard

In [43]:
scaler_filename = data_root+'models/'+model_id+'_scaler.pkl'
#model_filename  = data_root+'models/'+model_id+'_SVM.pkl'
network_filename = data_root+'models/'+model_id+'DLv3.hd5'

if os.path.exists(scaler_filename):
    print 'Aborting all pickle operations: file already exists at specified path ('+scaler_filename+')'
#elif os.path.exists(model_filename):
#    print 'Aborting all pickle operations: file already exists at specified path ('+model_filename+')'
elif os.path.exists(network_filename):
    print 'Aborting all pickle operations: file already exists at specified path ('+network_filename+')'
else:
    print scaler_filename
#    print model_filename
    print network_filename
    pickle.dump(scaler, open(scaler_filename, 'wb'))
#    pickle.dump(model, open(model_filename, 'wb'))
    network.save(network_filename)
    # tracking only occurs if all saves are successful
    util_workflow.record_model_creation(
        model_id, notes, place_images, label_suffix, resolution, stack_label, feature_count, window, cats_map, balancing, 
        network.get_config(), epochs, batch_size,
        train_confusion, train_recalls_expanded, train_precisions_expanded, train_accuracy,
        train_f_score_open, train_f_score_nonres, train_f_score_res, train_f_score_roads, train_f_score_average,
        valid_confusion, valid_recalls_expanded, valid_precisions_expanded, valid_accuracy,
        valid_f_score_open, valid_f_score_nonres, valid_f_score_res, valid_f_score_roads, valid_f_score_average,)

/data/phase_iii/models/3cat_Sit_QRTUV_tuned_scaler.pkl
/data/phase_iii/models/3cat_Sit_QRTUV_tunedDLv3.hd5
