In [37]:
from keras.applications.vgg16 import VGG16
import numpy as np
import matplotlib.pyplot as plt

import time

In [2]:
model = VGG16()
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_______________________________________________________

In [3]:
print("The num of layers: %d" % (len(model.layers)))

The num of layers: 23


In [4]:
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from numpy import expand_dims

In [21]:
## Change the image name
import os

relative_path = './Flickr_Data/images/'
img_count = 0
old_name_list = [] # In order
for directory, subdir, files in os.walk(relative_path):
    for file in files:
        img_count += 1
        oldname = relative_path + os.sep + file
        newname = relative_path + os.sep + str(img_count) + file[-4:]
        old_name_list.append(oldname)
        os.rename(oldname, newname)

In [34]:
print(len(old_name_list))
print(old_name_list[:10])
print(img_count)

8091
['./Flickr_Data/images//2387197355_237f6f41ee.jpg', './Flickr_Data/images//2609847254_0ec40c1cce.jpg', './Flickr_Data/images//2046222127_a6f300e202.jpg', './Flickr_Data/images//2853743795_e90ebc669d.jpg', './Flickr_Data/images//2696951725_e0ae54f6da.jpg', './Flickr_Data/images//3421131122_2e4bde661e.jpg', './Flickr_Data/images//3229730008_63f8ca2de2.jpg', './Flickr_Data/images//3220009216_10f088185e.jpg', './Flickr_Data/images//3415578043_03d33e6efd.jpg', './Flickr_Data/images//3437273677_47d4462974.jpg']
8091


In [33]:
def predict_vector(model_using, image_path):
    img = load_img(image_path, target_size=(224, 224))
    
    img = img_to_array(img)
    
    img = expand_dims(img, axis=0)
    
    img = preprocess_input(img)
    
    vector = model.predict(img)
    
    return vector.reshape(-1)

In [38]:
vector_values = []
relative_path = './Flickr_Data/images'
start_time = time.time()
for i in range(img_count):
    input_image = relative_path + '/' + str(i+1) + '.jpg'
    vector_values.append(predict_vector(model, input_image))
    if (i+1) % 50 == 0:
        print("The %dth image finished!" % (i))
end_time = time.time()

The 49th image finished!
The 99th image finished!
The 149th image finished!
The 199th image finished!
The 249th image finished!
The 299th image finished!
The 349th image finished!
The 399th image finished!
The 449th image finished!
The 499th image finished!
The 549th image finished!
The 599th image finished!
The 649th image finished!
The 699th image finished!
The 749th image finished!
The 799th image finished!
The 849th image finished!
The 899th image finished!
The 949th image finished!
The 999th image finished!
The 1049th image finished!
The 1099th image finished!
The 1149th image finished!
The 1199th image finished!
The 1249th image finished!
The 1299th image finished!
The 1349th image finished!
The 1399th image finished!
The 1449th image finished!
The 1499th image finished!
The 1549th image finished!
The 1599th image finished!
The 1649th image finished!
The 1699th image finished!
The 1749th image finished!
The 1799th image finished!
The 1849th image finished!
The 1899th image finish

In [47]:
print("The time is: %.2fs." % (end_time - start_time))

The time is: 3841.03s.


In [44]:
vector_values = np.array(vector_values)

In [82]:
vector_values[vector_values < 1e-4] = 0

In [83]:
# Save this array to the local folder
np.savetxt('All_vectors', vector_values)

In [84]:
from sklearn.cluster import KMeans

K = 100
estimator = KMeans(n_clusters=K)
fit_start = time.time()
estimator.fit(vector_values)
fit_end = time.time()

label_pred = estimator.labels_
centroids = estimator.cluster_centers_

print("The fitting time for %d clusters is %.2fs." % (K, fit_end-fit_start))

The fitting time for 100 clusters is 64.77s.


In [89]:
np.savetxt('labels_100', label_pred)
np.savetxt('center_100', centroids)

In [85]:
estimator.n_iter_

50

In [87]:
'''
dic = {}
for i in range(len(label_pred)):
    if label_pred[i] not in dic:
        dic[label_pred[i]] = 1
    else:
        dic[label_pred[i]] += 1
print(dic)
'''

{19: 3071, 61: 43, 46: 842, 16: 34, 11: 95, 37: 12, 54: 28, 51: 101, 85: 74, 99: 15, 95: 100, 50: 73, 29: 48, 27: 24, 98: 24, 10: 28, 55: 45, 26: 18, 57: 29, 30: 83, 0: 259, 20: 43, 87: 41, 33: 24, 60: 32, 90: 43, 39: 54, 14: 80, 96: 69, 4: 75, 40: 20, 8: 125, 43: 49, 78: 29, 58: 29, 89: 47, 63: 31, 86: 25, 84: 85, 28: 21, 81: 14, 6: 73, 22: 81, 49: 34, 15: 53, 65: 24, 18: 35, 75: 36, 3: 38, 72: 60, 76: 21, 9: 54, 48: 41, 25: 34, 1: 69, 62: 61, 56: 43, 23: 37, 67: 29, 13: 95, 7: 61, 88: 31, 74: 30, 42: 10, 47: 113, 21: 49, 24: 35, 36: 29, 64: 28, 31: 31, 79: 22, 2: 115, 71: 26, 52: 37, 73: 16, 82: 32, 32: 27, 12: 29, 38: 25, 93: 19, 35: 27, 45: 38, 92: 20, 5: 33, 34: 44, 66: 14, 59: 6, 69: 18, 68: 15, 77: 21, 97: 28, 44: 20, 91: 15, 17: 21, 70: 36, 41: 12, 83: 9, 94: 12, 53: 15, 80: 22}


In [90]:
#sorted(dic.items(), key=lambda item:item[1])