In [1]:
import os
from keras import preprocessing
import matplotlib.pyplot as plt
import shutil
import numpy as np
import keras
import pandas as pd
import cv2

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# data_dir = '/scratch/zhang.chi9/architectural-styles-dataset/'

# def split_files(source, destination):
#     files = os.listdir(source)
#     for f in files:
#         if np.random.rand(1) < 0.2:
#             shutil.move(source + '/'+ f, destination + '/'+ f)

# for folder_name in os.listdir(data_dir):
#     source = data_dir + folder_name
#     destination = '/scratch/zhang.chi9/test/' + folder_name
#     if not os.path.isdir(destination):
#         os.makedirs(destination)
#         split_files(source,destination)

In [3]:
def sketchify(jc):

    jc = np.uint8(jc)
    
    scale_percent = 1

    width = int(jc.shape[1]*scale_percent)
    height = int(jc.shape[0]*scale_percent)

    dim = (width,height)
    resized = cv2.resize(jc,dim,interpolation = cv2.INTER_AREA)

    kernel_sharpening = np.array([[-1,-1,-1], 
                                  [-1, 9,-1],
                                  [-1,-1,-1]])
    sharpened = cv2.filter2D(resized,-1,kernel_sharpening)



    gray = cv2.cvtColor(sharpened , cv2.COLOR_BGR2GRAY)
    inv = 255-gray
    gauss = cv2.GaussianBlur(inv,ksize=(15,15),sigmaX=0,sigmaY=0)

    def dodgeV2(image,mask):
        return cv2.divide(image,255-mask,scale=256)

    pencil_jc = 255 - dodgeV2(gray,gauss)
    img = np.zeros(jc.shape)
    img[:,:,0] = pencil_jc
    img[:,:,1] = pencil_jc
    img[:,:,2] = pencil_jc
    return img

In [4]:
traingen = preprocessing.image.ImageDataGenerator(rescale=1/255.0,horizontal_flip=True,preprocessing_function=sketchify) #
testgen = preprocessing.image.ImageDataGenerator(rescale=1/255.0,horizontal_flip=True,preprocessing_function=sketchify)

In [5]:
train = '/scratch/zhang.chi9/train/'
test = '/scratch/zhang.chi9/test/'
train_generator = traingen.flow_from_directory(train)
test_generator = testgen.flow_from_directory(test)

Found 12308 images belonging to 25 classes.
Found 3124 images belonging to 25 classes.


In [6]:
x_train,y_train = next(train_generator)

In [7]:
from keras.losses import categorical_crossentropy
from keras.metrics import categorical_accuracy
from keras.callbacks import ModelCheckpoint

In [8]:
base_model = keras.applications.Xception(
    weights='imagenet',  # Load weights pre-trained on ImageNet.
    include_top=False) 

base_model.trainable = False

inputs = keras.Input(shape=(None, None, 3))
x = base_model(inputs)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.8)(x)
outputs = keras.layers.Dense(25,activation='softmax')(x)
model = keras.Model(inputs, outputs)

In [9]:
model.compile(optimizer=keras.optimizers.Adam(),
              loss=categorical_crossentropy,
              metrics=[categorical_accuracy])

weights_path = '/scratch/zhang.chi9/arch_weights/'
best_acc = ModelCheckpoint(f'{weights_path}best_acc_model.h5', monitor='val_categorical_accuracy',save_best_only=True, mode='max',verbose = 1)
model.fit_generator(train_generator, validation_data = test_generator, epochs = 10,callbacks=[best_acc])

Epoch 1/10

Epoch 00001: val_categorical_accuracy improved from -inf to 0.11076, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 2/10

Epoch 00002: val_categorical_accuracy improved from 0.11076 to 0.13924, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 3/10

Epoch 00003: val_categorical_accuracy improved from 0.13924 to 0.14309, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 4/10

Epoch 00004: val_categorical_accuracy improved from 0.14309 to 0.15109, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 5/10

Epoch 00005: val_categorical_accuracy did not improve from 0.15109
Epoch 6/10

Epoch 00006: val_categorical_accuracy did not improve from 0.15109
Epoch 7/10

Epoch 00007: val_categorical_accuracy did not improve from 0.15109
Epoch 8/10

Epoch 00008: val_categorical_accuracy did not improve from 0.15109
Epoch 9/10

Epoch 00009: val_categorical_accuracy improved from 0.15109 to 0.16293,

<keras.callbacks.History at 0x2aba1f6de438>

In [10]:
# fine tuning
base_model.trainable = True
model.compile(optimizer=keras.optimizers.Adam(1e-5),
              loss=categorical_crossentropy,
              metrics=[categorical_accuracy])
weights_path = '/scratch/zhang.chi9/arch_weights/'
best_acc = ModelCheckpoint(f'{weights_path}best_acc_model.h5', monitor='val_categorical_accuracy',save_best_only=True, mode='max',verbose = 1)

In [None]:
model.fit_generator(train_generator, validation_data = test_generator, epochs = 20,callbacks=[best_acc])

Epoch 1/20

Epoch 00001: val_categorical_accuracy improved from 0.80730 to 0.80954, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 2/20

Epoch 00002: val_categorical_accuracy improved from 0.80954 to 0.81402, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 3/20

Epoch 00003: val_categorical_accuracy improved from 0.81402 to 0.81658, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 4/20

Epoch 00004: val_categorical_accuracy improved from 0.81658 to 0.82490, saving model to /scratch/zhang.chi9/arch_weights/best_acc_model.h5
Epoch 5/20

Epoch 00005: val_categorical_accuracy did not improve from 0.82490
Epoch 6/20

Epoch 00006: val_categorical_accuracy did not improve from 0.82490
Epoch 7/20

In [12]:
from keras.models import load_model

In [13]:
weights_path = '/scratch/zhang.chi9/arch_weights/'
model = load_model(f'{weights_path}best_acc_model.h5')

KeyboardInterrupt: 

In [None]:
model.summary()

In [None]:
from keras import Model

In [None]:
embedding_model = Model(inputs = model.input, outputs = model.layers[2].output)

In [None]:
test_generator.batch_size = 3124
x_test, y_test = next(test_generator)

In [None]:
y_embedding = embedding_model.predict(x_test)

In [None]:
y_embedding.shape

In [None]:
train_generator.batch_size = 12308
x_train, y_train = next(train_generator)

In [None]:
train_embedding = embedding_model.predict(x_train)

In [None]:
embedding = np.concatenate([y_embedding,train_embedding],axis = 0)

In [None]:
embedding.shape

In [None]:
df = pd.DataFrame(embedding)

In [None]:
df.to_csv('./all_embedding.csv',index=False)

In [None]:
import matplotlib.image as mpimg

img = mpimg.imread('building.jpg')

In [None]:
plt.figure(figsize = (8,8))
plt.imshow(img)

In [None]:
img = img/255;

In [None]:
xx = embedding_model.predict(img[np.newaxis,:])

In [None]:
from scipy import spatial

cosine_similarity = []
for i in range(len(embedding)):
    vector1 = embedding[i,:]
    cosine_similarity.append(1 - spatial.distance.cosine(vector1, xx))

In [None]:
print(np.argmin(cosine_similarity),np.max(cosine_similarity))

In [None]:
11310 - 3124

In [None]:
plt.imshow(x_test[1171,:])

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(y_embedding)

In [None]:
y_label = np.argmax(y_test,axis=1)

In [None]:
data = np.concatenate([principalComponents,y_label[:,np.newaxis]],axis = -1)

In [None]:
df = pd.DataFrame(data,columns=['PC1','PC2','label'])

In [None]:
df.to_csv('pca.csv',index=False)

In [None]:
plt.figure(figsize=(16,16))
for class_ in range(7):
    plt.scatter(df.loc[df.label==class_,'PC1'],df.loc[df.label==class_,'PC2'])

In [None]:
y_pred = model.predict(x_test)

In [None]:
y_pred = np.argmax(y_pred,axis=1)
y_test = np.argmax(y_test,axis=1)

In [None]:
import itertools
from sklearn.metrics import accuracy_score
print(f'The accuracy score is {accuracy_score(y_test, y_pred)}')

class_names = ['Achaemenid architecture', 'American Foursquare architecture', 'American craftsman style', 'Ancient Egyptian architecture', 'Art Deco architecture', 'Art Nouveau architecture', 'Baroque architecture', 'Bauhaus architecture', 'Beaux-Arts architecture', 'Byzantine architecture', 'Chicago school architecture', 'Colonial architecture', 'Deconstructivism', 'Edwardian architecture', 'Georgian architecture', 'Gothic architecture', 'Greek Revival architecture', 'International style', 'Novelty architecture', 'Palladian architecture', 'Postmodern architecture', 'Queen Anne architecture', 'Romanesque architecture', 'Russian Revival architecture', 'Tudor Revival architecture']
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    fig,ax = plt.subplots(figsize=(16,16))
    ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.grid(False)
    plt.title(title)
    #plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45, fontsize=12)
    plt.yticks(tick_marks, classes, fontsize=15)
    ax.set_aspect('equal', 'datalim')

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

# Compute confusion matrix
cnf_matrix = confusion_matrix(y_test, y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure(figsize=(16,16))
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

In [None]:
test_embeddings = embedding_model.predict_generator(test_generator)

In [None]:
test_generator.classes[0:32]

In [None]:
test_embeddings.shape

In [None]:
fig,ax = plt.subplots(4,4,figsize = (16,16))
for i in range(0,16):
    x, y = int(i/4), i%4
    ax[x][y].imshow(x_train[i])

In [None]:
fig,ax = plt.subplots(4,4,figsize = (16,16))
for i in range(0,16):
    x, y = int(i/4), i%4
    ax[x][y].imshow(x_test[i])

In [None]:
y_pred = model.predict(x_train)