In [1]:
import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt
import pickle

import tensorflow as tf
#from keras.applications.resnet_v2 import *
from keras.applications.vgg19 import *
from keras.preprocessing.image_dataset import *
from keras.preprocessing import image
from keras.models import Model

In [2]:
train_i = []
test_i = []

train_xy = pd.read_csv('train.csv')
train_path = train_xy['id'].values
test_path = pd.read_csv('imagenames.csv')['id'].values

for f in train_path:
    #i = cv2.imread('./train/' + f + '.jpg')
    img = image.load_img('./train/' + f + '.jpg', target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    train_i.append(x)
for f in test_path:
    #i = cv2.imread('./test/' + f + '.jpg')
    img = image.load_img('./test/' + f + '.jpg', target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    test_i.append(x)

In [3]:
#model = ResNet152V2(weights = 'imagenet', include_top = False)
base_model = VGG19(weights='imagenet', include_top=True)
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)

In [4]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [5]:
train_preds = []
test_preds = []

for i in tqdm(train_i):
    #inp = preprocess_input(np.expand_dims(i, axis=0))
    pred = model.predict(i)
    train_preds.append(pred)

for i in tqdm(test_i):
    #inp = preprocess_input(np.expand_dims(i, axis=0))
    pred = model.predict(i)
    test_preds.append(pred)

100%|██████████| 7500/7500 [26:55<00:00,  4.64it/s]
100%|██████████| 1200/1200 [04:31<00:00,  4.43it/s]


In [6]:
# f = open(f'./test_vgg19_2.pckl','wb')
# pickle.dump(test_preds,f)
# f.close()

# f = open(f'./train_vgg19_2.pckl','wb')
# pickle.dump(train_preds,f)
# f.close()

In [7]:
best_train = []
for test in tqdm(test_preds):
    match = [np.sum((train-test)**2)**0.5 for train in train_preds]
    best_match = np.argsort(match)[:3]
    row = train_xy.iloc[best_match]['id'].values
    best_train.append(row)

df = pd.DataFrame(best_train, columns=['1','2','3'])
df.to_csv('vgg19_matches_2.csv',index=False)

100%|██████████| 1200/1200 [1:48:02<00:00,  5.40s/it]


In [9]:
out=[]
for i in range(len(df)):
    coor = [train_xy[train_xy['id']== df[label][i]] for label in ['1','2','3']]
    result = pd.concat(coor)
    out.append([test_path[i], np.mean(result['x']), np.mean(result['y'])])
    
out_csv = pd.DataFrame(out, columns=['id','x','y'])
out_csv.to_csv('vgg19_top3_out.csv',index=False)

In [8]:
# from joblib import Parallel, delayed

# best_train = []
# def get_best(test):
#     match = [np.sum((train-test)**2)**0.5 for train in train_preds]
#     best_match = np.argsort(match)[:3]
#     row = train_xy.iloc[best_match]['id'].values
#     best_train.append(row)

# Parallel(n_jobs=2,verbose=10)(delayed(get_best)(test) for test in test_preds)

# df = pd.DataFrame(best_train, columns=['1','2','3'])
# df.to_csv('vgg19_matches_2.csv',index=False)