# Introduction

Notebook containing 
* a color analyser for checking out the three color channels
* K-Means clustering

With K-Means clustering, single images can be checked out and also a whole dataframe can be transformed.

## Imports

In [None]:
import imageio
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
import cv2
from skimage.color import rgb2lab, deltaE_cie76
from collections import Counter
import os 
import pandas as pd

## Color Analyser

Output: single color channels (RGB) compared to the original image

In [None]:
pic=imageio.imread('../images/ID_0C6ABQ6L.JPG')

print('Maximum RGB value in this image {}'.format(pic.max()))
print('Minimum RGB value in this image {}'.format(pic.min())) 
print('Value of only R channel {}'.format(pic[:,:,0].max()))
print('Value of only G channel {}'.format(pic[:,:,1].max()))
print('Value of only B channel {}'.format(pic[:,:,2].max()))

fig, axs = plt.subplots(1, 4, figsize=(20, 4), sharey=True)
axs[0].imshow(pic)
axs[1].imshow(pic[ : , : , 0])
axs[2].imshow(pic[ : , : , 1])
axs[3].imshow(pic[ : , : , 2])
fig.suptitle('color channels (full, R, G, B)')
plt.show()

In [None]:
pic=imageio.imread('../images/ID_0BY2FNBS.JPG')

print('Maximum RGB value in this image {}'.format(pic.max()))
print('Minimum RGB value in this image {}'.format(pic.min())) 
print('Value of only R channel {}'.format(pic[:,:,0].max()))
print('Value of only G channel {}'.format(pic[:,:,1].max()))
print('Value of only B channel {}'.format(pic[:,:,2].max()))

fig, axs = plt.subplots(1, 4, figsize=(20, 4), sharey=True)
axs[0].imshow(pic)
axs[1].imshow(pic[ : , : , 0])
axs[2].imshow(pic[ : , : , 1])
axs[3].imshow(pic[ : , : , 2])
fig.suptitle('color channels (full, R,G,B)')
plt.show()

In [None]:
pic=imageio.imread('../images/ID_0FEWYAAG.JPG')

print('Maximum RGB value in this image {}'.format(pic.max()))
print('Minimum RGB value in this image {}'.format(pic.min())) 
print('Value of only R channel {}'.format(pic[:,:,0].max()))
print('Value of only G channel {}'.format(pic[:,:,1].max()))
print('Value of only B channel {}'.format(pic[:,:,2].max()))

fig, axs = plt.subplots(1, 4, figsize=(20, 4), sharey=True)
axs[0].imshow(pic)
axs[1].imshow(pic[ : , : , 0])
axs[2].imshow(pic[ : , : , 1])
axs[3].imshow(pic[ : , : , 2])
fig.suptitle('color channels (full, R,G,B)')
plt.show()

In [None]:
print("The type of image is {}".format(type(pic)))
print("Shape of the image: {}".format(pic.shape)) 

## K-Means

* Clustering using K-Means for singe image to fine tune hyperparameters.
* Clustering of dataframe with image paths to transform whole dataset.

### Single image to find best parameters of K-Means, blur and threshold

In [None]:
picarray = np.asarray(pic)
type(picarray)
picarray.shape

In [None]:
# convert to 2d array
pic2d = picarray.reshape((picarray.shape[0]*picarray.shape[1],picarray.shape[2]))

In [None]:
pic2d.shape

In [None]:
# K-Means
kmeans_orig = KMeans(n_clusters=5, n_init= 5, 
               max_iter=50, random_state= 42).fit(pic2d)
img_orig = np.reshape(np.array(kmeans_orig.labels_, dtype=np.uint8),(picarray.shape[0], picarray.shape[1]))

In [None]:
# Blur-Filter
kerne_size = 10
kernel = np.ones((kerne_size,kerne_size),np.float32)/(kerne_size**2)
dst = cv2.filter2D(img_orig,-1,kernel)

In [None]:
# Binary output
threshold = dst.mean()
pred_mask = dst.copy()
pred_mask[pred_mask <= threshold] = 0
pred_mask[pred_mask > threshold] = 255

In [None]:
dst.max()

In [None]:
imageio.imsave(images_path + 'clustered_images/' + 'ID_0FEWYAAG.JPG', pred_mask)

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(20, 4), sharey=True)
axs[0].imshow(pic)
axs[1].imshow(img_orig)
axs[2].imshow(dst)
axs[3].imshow(pred_mask)
fig.suptitle('Clustering with binary output')
plt.show()

### Transform dataset

In [None]:
# Dataframe to be transformed
df = pd.read_csv('../data/test.csv')

In [None]:
# image ids as list with suffix .JPG
df2 = df.copy()
df2.image_id = df2.image_id.apply(lambda x: x.strip()+".JPG")
images = list(df2['image_id'])

In [None]:
# Path to image folder with images to be transormed
images_path = '../images/'

In [None]:
# Transformation of all the images in the dataframe
for i, path in enumerate(images):
    if i % 10 == 0:
        print(i, 'images processed')
    
    # Read and reshape image
    img = imageio.imread(images_path + path)
    img_reshaped = img.reshape((img.shape[0]*img.shape[1],img.shape[2]))
    
    # K-Means-fit for this image
    kmeans_orig = KMeans(n_clusters=5, n_init= 5, 
               max_iter=50, random_state= 42).fit(img_reshaped)
    img_orig = np.reshape(np.array(kmeans_orig.labels_, dtype=np.uint8),(img.shape[0], img.shape[1]))
    
    # Blur filter
    kerne_size = 8
    kernel = np.ones((kerne_size,kerne_size),np.float32)/(kerne_size**2)
    dst = cv2.filter2D(img_orig,-1,kernel)
    
    # threshold to transform images as black and white
    threshold = dst.mean()
    pred_mask = dst.copy()
    pred_mask[pred_mask <= threshold] = 0
    pred_mask[pred_mask > threshold] = 255
    
    # save clustered image
    imageio.imsave(images_path + 'clust_test/' + path, pred_mask)