In [1]:
from tensorflow.keras.preprocessing import image

from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as preprocess_input_irv2
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input


from tensorflow.keras.models import Model
from sklearn.metrics.pairwise import cosine_similarity
from pathlib import Path
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from numpy import dot
from numpy.linalg import norm
import os
from glob import glob
import re
import shutil
import time
from tqdm.auto import tqdm



# VGG16

In [None]:
class FeatureExtractor:
  def __init__(self):
    # Use VGG-16 as the architecture and ImageNet for the weight
    base_model = VGG16(weights='imagenet')
    # Customize the model to return features from fully-connected layer
    self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

  def extract(self, img):
  # Resize the image
    img = img.resize((224, 224))

    # Convert the image color space
    img = img.convert('RGB')
    # Reformat the image
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    # Extract Features
    feature = self.model.predict(x)[0]
    return feature

In [None]:
tot_image_path = r"merged_tot_image"
file_list = os.listdir(tot_image_path)
len(file_list)

In [None]:
tot_feature_path = "merged_features"

In [None]:
features = []
img_paths = []

fe = FeatureExtractor()
# Save Image Feature Vector with Database Images

for i in tqdm(range(1609, 10000)):
  try:
    image_path = tot_image_path + '/{}'.format(file_list[i]) # 파일 속 이미지 파일 이름
    img_paths.append(image_path)

    # Extract Features
    feature = fe.extract(img=Image.open(image_path))

    features.append(feature)
    # Save the Numpy array (.npy) on designated path
    feature_path = tot_feature_path +"/" + file_list[i].strip('.jpg') + ".npy"     # Feature를 저장할 폴더 경로
    np.save(feature_path, feature)
  except Exception as e:
    print('예외가 발생했습니다.', e)

## L2: 유클리디안 거리

In [None]:
# Insert the image query
img = Image.open("2928418_36357327_0.jpg") # 알고 싶은 이미지 경로
# Extract its features
query = fe.extract(img)
# Calculate the similarity (distance) between images # 유사도 비교
dists = np.linalg.norm(features - query, axis = 1)

# Extract 30 images that have lowest distance # 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30]

scores = [(dists[id], img_paths[id], id) for id in ids]
# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()

## Cosine similarity : 코사인 유사도

In [None]:
# Insert the image query
img = Image.open("2928418_36357327_0.jpg") # 알고 싶은 이미지 경로
# Extract its features
query = fe.extract(img)

from sklearn.metrics.pairwise import cosine_similarity

# Calculate the cosine similarity between images
sims = cosine_similarity(features, query.reshape(1, -1))

# Convert similarity scores to distance
dists = 1 - sims.flatten()

# Extract 30 images that have lowest distance # 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30][::-1]

scores = [(dists[id], img_paths[id], id) for id in ids]
# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()


## L1 : 맨해튼 거리

In [None]:
# Insert the image query
img = Image.open("2928418_36357327_0.jpg") # 알고 싶은 이미지 경로
# Extract its features
query = fe.extract(img)

from sklearn.metrics.pairwise import manhattan_distances

# Calculate the cosine similarity between images
sims = manhattan_distances(features, query.reshape(1, -1))

# Convert similarity scores to distance
dists = 1 - sims.flatten()

# Extract 30 images that have lowest distance # 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30][::-1]

scores = [(dists[id], img_paths[id], id) for id in ids]
# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()


# VGG19

In [None]:
# VGG19
class FeatureExtractor:
    def __init__(self):
        base_model = VGG19(weights='imagenet')
        # Customize the model to return features from fully-connected layer
        self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('predictions').output)

    def extract(self, img):
      # Resize the image
        img = img.resize((224, 224))
        # Convert the image color space
        img = img.convert('RGB')
        # Reformat the image
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        # Extract Features
        feature = self.model.predict(x)[0]
        return feature 

In [None]:
features = []
img_paths = []
files = []

# 파일 경로 설정
fe = FeatureExtractor()
# Save Image Feature Vector with Database Images
for i in range(9990,10010):
    if i%100 == 0:
        print(i)
    try:
        path = 'C:/Users/NT550/Desktop/DL_project/image'
        os.chdir(path)
        files = os.listdir(path)
        image_path = 'C:/Users/NT550/Desktop/DL_project/image/{}'.format(files[i]) # 파일 속 이미지 파일 이름
        img_paths.append(image_path)
        
        # Extract Features
        feature = fe.extract(img=Image.open(image_path))
            
        features.append(feature)
        # Save the Numpy array (.npy) on designated path
     # Feature를 저장할 폴더 경로 설정(폴더 생성 후 경로 설정)
        feature_path = "C:/Users/NT550/Desktop/DL_project/features/features" + files[i].strip('.jpg') + ".npy"
        np.save(feature_path, feature)
    except Exception as e:
        print('예외가 발생했습니다.', e)

In [None]:
import os
features_vgg19 = []
file_list = os.listdir('C:/Users/NT550/Desktop/DL_project/features_VGG19')
for file in file_list[:10000]:
    feature = np.load(f'C:/Users/NT550/Desktop/DL_project/features_VGG19/{file}')
    features_vgg19.append(feature)

In [None]:
image_path = []
path = "C:/Users/NT550/Desktop/DL_project/image"
image_list = os.listdir(path)
for file in image_list[:10000]:
    image = path + '/' + file
    image_path.append(image)

In [None]:
img = Image.open("C:/Users/NT550/Desktop/DL_project/bg_1.jpg") # 알고 싶 이미지 경로
fe = FeatureExtractor()
query_vgg19 = fe.extract(img)
dists_vgg19 = np.linalg.norm(features_vgg19 - query_vgg19, axis=1)
dists_vgg19_cosine = cosine_distances(features_vgg19, query_vgg19.reshape(1, -1)).ravel()
dists_vgg19_manhattan = manhattan_distances(features_vgg19, query_vgg19.reshape(1, -1)).ravel()

## cosine

In [None]:
# Insert the image query
img = Image.open("C:/Users/NT550/Desktop/DL_project/bg_0.jpg") # 알고 싶은 이미지 경로

fe = FeatureExtractor()
# Extract its features
query = fe.extract(img)

# Calculate the similarity (distance) between images using cosine distance
dists = cosine_distances(features, query.reshape(1, -1)).ravel()

# Extract 30 images that have lowest distance
ids = np.argsort(dists)[:30]

scores = [(dists[id], image_path[id], id) for id in ids]

# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()

## 맨해튼

# Insert the image query
img = Image.open("C:/Users/NT550/Desktop/DL_project/bg_0.jpg") # 알고 싶은 이미지 경로

fe = FeatureExtractor()
# Extract its features
query = fe.extract(img)

# Calculate the similarity (distance) between images using Manhattan distance
dists = manhattan_distances(features, query.reshape(1, -1)).ravel()

# Extract 30 images that have lowest distance
ids = np.argsort(dists)[:30]

scores = [(dists[id], image_path[id], id) for id in ids]

# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()

# InceptionV3

In [None]:
class FeatureExtractor:
  def __init__(self):
    base_model = InceptionV3(weights='imagenet')
    # Customize the model to return features from fully-connected layer
    self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('predictions').output) 
    # InceptionV3 : ['batch_normalization_93', 'activation_85', 'mixed9_1', 'concatenate_1', 'activation_93', 'mixed10', 'avg_pool', 'predictions'].

  def extract(self, img):
  # Resize the image
    img = img.resize((299,299)) 
    # Convert the image color space
    img = img.convert('RGB')
    # Reformat the image
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    # Extract Features
    feature = self.model.predict(x)[0]
    return feature  # feature / np.linalg.norm(feature)
  


In [None]:
features = []
img_paths = []

fe = FeatureExtractor()


In [None]:
# 폴더에 저장되어 있는 features npy 가져오기
features_dir = r"C:\Users\NT550009\Desktop\features"
os.chdir(features_dir)
feature_list = os.listdir(features_dir)
print(feature_list)
# ['2734389_3.npy', '2734389_4.npy', '2734391_0.npy', 

# 각 파일을 읽어서 features에 추가합니다.
for file_path in feature_list:
    data = np.load(file_path)
    features.append(data)

## L2

In [None]:
# Insert the image query
img = Image.open(r"C:\Users\NT550009\Desktop\recommendation\cropped_image_2.jpg") # 알고 싶은 이미지 경로
# query 에서 feature 추출
query = fe.extract(img) 
# Calculate the similarity (distance) between images # 유사도 비교
dists = np.linalg.norm(features - query, axis=1) 

# Extract 30 images that have lowest distance 
# 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30]

scores = [(dists[id], img_paths[id], id) for id in ids]

# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8)) # figsize 설정
for a in range(5*6): # 30개의 서브플롯
    score = scores[a] # 거리유사도가 낮은 순서대로 
    axes.append(fig.add_subplot(5, 6, a+1)) # 채워진 서브플롯 위치 저장
    
    string = score[1]
    pattern = r'[^/\\]+(?=\.[^.]+$)'
    match = re.search(pattern, string)
    if match:
        product_num = match.group()
      
    subplot_title=str(round(score[0],2)) + "/" + str(product_num)
    
    axes[-1].set_title(subplot_title) 
    plt.axis('off') # 현재 서브플롯의 축에 대한 눈금과 레이블을 제거
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()


## cosine

In [None]:
from sklearn.metrics.pairwise import cosine_distances
# cosine similarity
dists = cosine_distances(features, query.reshape(1, -1)).ravel()


# Extract 30 images that have lowest distance 
# 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30]
 
scores = [(dists[id], img_paths[id], id) for id in ids]

# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/m" + str(score[2]+1)
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()


## Manhattan

In [None]:
# boston
distance = np.sum(np.abs(features - query))


dists = manhattan_distances(features, query.reshape(1, -1)).ravel()

dists = np.sum(np.abs(features - query), axis=1)

# Extract 30 images that have lowest distance 
# 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30]


scores = [(dists[id], img_paths[id], id) for id in ids]

# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/m" + str(score[2]+1)
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()


# ResNet50

In [None]:
class FeatureExtractor:
    def __init__(self):
        base_model = ResNet50(weights='imagenet')
        # Customize the model to return features from fully-connected layer
        self.model = Model(inputs=base_model.input, outputs=base_model.get_layer('predictions').output)

    def extract(self, img):
      # Resize the image
        img = img.resize((224, 224))
        # Convert the image color space
        img = img.convert('RGB')
        print(img)
        # Reformat the image
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        # Extract Features
        feature = self.model.predict(x)[0]
        return feature

In [None]:
features = []
img_paths = []
files = []

# 파일 경로 설정



fe = FeatureExtractor()
# Save Image Feature Vector with Database Images
for i in range(31082,45000):
    if i%100 == 0:
        print(i)
    try:
        path = 'C:/Users/NT550/Desktop/DL_project/image'

        os.chdir(path)
        files = os.listdir(path)
        image_path = 'C:/Users/NT550/Desktop/DL_project/image/{}'.format(files[i]) # 파일 속 이미지 파일 이름
        img_paths.append(image_path)
        
        # Extract Features
        feature = fe.extract(img=Image.open(image_path))
            
        features.append(feature)
        # Save the Numpy array (.npy) on designated path
     # Feature를 저장할 폴더 경로 설정(폴더 생성 후 경로 설정)
        feature_path = "C:/Users/NT550/Desktop/DL_project/features/features" + files[i].strip('.jpg') + ".npy"
        np.save(feature_path, feature)
    except Exception as e:
        print('예외가 발생했습니다.', e)

## L2

In [None]:
# 그냥 유사도
img = Image.open("C:/Users/NT550/Desktop/DL_project/bg_0.jpg") # 알고 싶 이미지 경로
fe = FeatureExtractor()
# Extract its features
query = fe.extract(img)

# 유사도 비교 (L2 norm)
dists = np.linalg.norm(features - query, axis=1)

# 코사인 유사도
dists_cosine = cosine_distances(features, query.reshape(1, -1)).ravel()

# 유클리디안 유사도
dists_euclidean = euclidean_distances(features, query.reshape(1, -1)).ravel()

# 맨허튼 유사도
dists_manhattan = manhattan_distances(features, query.reshape(1, -1)).ravel()


# Extract 30 images that have lowest distance # 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30]

scores = [(dists[id], image_path[id], id) for id in ids]
# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()

## cosine

In [None]:
# 코사인 유사도
img = Image.open("C:/Users/NT550/Desktop/DL_project/bg_0.jpg") # 알고 싶 이미지 경로
fe = FeatureExtractor()
# Extract its features
query = fe.extract(img)

# Calculate the similarity (distance) between images # 유사도 비교 (L2 norm)
dists = cosine_distances(features, query.reshape(1, -1)).ravel()

# Extract 30 images that have lowest distance # 가장 가까운 30개의 사진 추천
ids = np.argsort(dists)[:30]

scores = [(dists[id], image_path[id], id) for id in ids]
# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()

## manhattan

In [None]:
from PIL import Image
import numpy as np
from sklearn.metrics.pairwise import manhattan_distances
import matplotlib.pyplot as plt

# Insert the image query
img = Image.open("C:/Users/NT550/Desktop/DL_project/bg_0.jpg") # 알고 싶은 이미지 경로

fe = FeatureExtractor()
# Extract its features
query = fe.extract(img)

# Calculate the similarity (distance) between images using Manhattan distance
dists = manhattan_distances(features, query.reshape(1, -1)).ravel()

# Extract 30 images that have lowest distance
ids = np.argsort(dists)[:30]

scores = [(dists[id], image_path[id], id) for id in ids]

# Visualize the result
axes=[]
fig=plt.figure(figsize=(8,8))
for a in range(5*6):
    score = scores[a]
    axes.append(fig.add_subplot(5, 6, a+1))
    subplot_title=str(round(score[0],2)) + "/" + score[1].split('/')[-1][:4]
    axes[-1].set_title(subplot_title)  
    plt.axis('off')
    plt.imshow(Image.open(score[1]))
fig.tight_layout()
plt.show()