In [5]:
from PIL import Image
from tqdm import tqdm
import numpy as np
from tqdm import tqdm
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from tensorflow.keras.models import Model
import tensorflow as tf
import glob
from umap import UMAP
import pandas as pd

In [6]:
base_model = VGG16(weights='imagenet')
# Customize the model to return features from fully-connected layer
model = Model(inputs=base_model.input, outputs=base_model.get_layer('fc1').output)

In [7]:
def extract(model, img):
    # Resize the image
    img = img.resize((224, 224))
    # Convert the image color space
    img = img.convert('RGB')
    # Reformat the image
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    # Extract Features
    feature = model.predict(x)[0]
    return feature / np.linalg.norm(feature)

def get_feature(model, image_data:list):
    features = []
    for img_path in tqdm(image_data): # Iterate through images 
        # Extract Features
        try:
            feature = extract(model, img=Image.open(img_path))
            features.append(feature)
        except:
            features.append(None)
            continue
    return features

In [9]:
files = glob.glob("static/images/kba/kba-0016-1/*.jpg")
len(files)
files = [file.replace("\\", "/") for file in files]
files[0]

'static/images/kba/kba-0016-1/0001.jpg'

In [10]:
with open("temp.txt", "r") as f:
    urls = f.read().splitlines()

In [11]:
features = get_feature(model, files)

100%|████████████████████████████████████████████████████████████████████████████████| 140/140 [00:11<00:00, 12.23it/s]


In [14]:
df = pd.DataFrame(files, columns=['image'])

In [15]:
# Reduce the dimensions with UMAP
umap = UMAP()
X_tfm = umap.fit_transform(features)

# Apply coordinates
df['x'] = X_tfm[:, 0]
df['y'] = X_tfm[:, 1]
df.to_csv("ready.csv", index=False)