# Image Posterization using K-means Clustering Algorithm

In [9]:
#import PIL as pil
from PIL import Image
from skimage import io
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.cluster import AgglomerativeClustering
import joblib
import time
import os 


Image compression will be in a method that takes an image and a percentage of pixels to sample. The outputs will be the k selected and the compressed image.

In [None]:
def smartPostDebug(oldImage, k, percentage, fileName):
    image = oldImage
    pixels = image.load()

    random.seed(a=100)
    #k = 100   #we would have a loop and iterate k

    skip = int(1 / np.sqrt(percentage))
    count = 0
    size = np.ceil(image.size[0] / skip) * np.ceil(image.size[1] / skip)
    df_pixels = pd.DataFrame(columns=list('RGB'), index = np.arange(size))

    timeBeforeSample = time.time()
    for i in range(0, image.size[0], skip):
        for j in range(0, image.size[1], skip):
            i_ = i + random.randint(-3,3)
            if i_>=image.size[0]:
                i_ = image.size[0] - 1
            if i_<0:
                i_ = 0
            j_ = j + random.randint(-3,3)
            if j_>=image.size[1]:
                j_ = image.size[1] - 1
            if j_<0:
                j_ = 0
                
            df_pixels.iloc[[count]] = [[pixels[i_,j_][0], pixels[i_,j_][1], pixels[i_,j_][2]]]
            count = count + 1
    timeAfterSample = time.time()

    kmeans = KMeans(n_clusters=100, random_state=1, n_init=1) #k
    kmeans.fit(df_pixels) #run on df_pixels
    
    timeAfterKmeansFit = time.time()

    joblib.dump(kmeans, 'kmeans.pkl')
    kmeans_loaded = joblib.load('kmeans.pkl')
    kmeans_loaded

    for i in range(0, image.size[0]):
        for j in range(0, image.size[1]):
            predicted = kmeans_loaded.predict([[pixels[i,j][0], pixels[i,j][1], pixels[i,j][2]]])
            pixels[i,j] = (round(kmeans.cluster_centers_[predicted][0][0]), round(kmeans.cluster_centers_[predicted][0][1]), round(kmeans.cluster_centers_[predicted][0][2]))
    
    saveLocation = fileName.replace("Inputs","Outputs")

    image.save(saveLocation, format="png")
    #return kmeans.inertia_

In [10]:
#How to decide which pixels are selected:
#If the number of pixels to be sampled is 100*k, then divide the number of pixels in the image by that value. Call
#the result n. Every nth pixel will be selected for sampling. To determine if pixel[i,j] is that nth pixel, do
# index = j*width + i to get the pixel index, then mod that value by n. If index % n = 0, you are on that pixel.

#inputs are image and percentage

def smartPost(oldImage, k, percentage, fileName):
    image = oldImage
    pixels = image.load()

    random.seed(a=100)
    #k = 100   #we would have a loop and iterate k

    skip = int(1 / np.sqrt(percentage))
    count = 0
    size = np.ceil(image.size[0] / skip) * np.ceil(image.size[1] / skip)
    df_pixels = pd.DataFrame(columns=list('RGB'), index = np.arange(size))

    for i in range(0, image.size[0], skip):
        for j in range(0, image.size[1], skip):
            i_ = i + random.randint(-3,3)
            if i_>=image.size[0]:
                i_ = image.size[0] - 1
            if i_<0:
                i_ = 0
            j_ = j + random.randint(-3,3)
            if j_>=image.size[1]:
                j_ = image.size[1] - 1
            if j_<0:
                j_ = 0
                
            df_pixels.iloc[[count]] = [[pixels[i_,j_][0], pixels[i_,j_][1], pixels[i_,j_][2]]]
            count = count + 1

    kmeans = KMeans(n_clusters=100, random_state=1, n_init=1) #k
    kmeans.fit(df_pixels) #run on df_pixels
    joblib.dump(kmeans, 'kmeans.pkl')
    kmeans_loaded = joblib.load('kmeans.pkl')
    kmeans_loaded

    for i in range(0, image.size[0]):
        for j in range(0, image.size[1]):
            predicted = kmeans_loaded.predict([[pixels[i,j][0], pixels[i,j][1], pixels[i,j][2]]])
            pixels[i,j] = (round(kmeans.cluster_centers_[predicted][0][0]), round(kmeans.cluster_centers_[predicted][0][1]), round(kmeans.cluster_centers_[predicted][0][2]))
    
    saveLocation = fileName.replace("Inputs","Outputs")

    image.save(saveLocation, format="png")
    return kmeans.inertia_

Testing the speed of the program.

In [11]:
percentage = 0.10
k = 100

currDir = 'Inputs/Explorations' 
filename = 'landscape.png'
currImg = Image.open(str(currDir)+'/'+str(filename))
smartPost(currImg, k, percentage, str(currDir)+'/'+str(filename))


KeyboardInterrupt: 

In [None]:
#percentage = 0.02
#
#currDir = 'Inputs/Explorations' 
#distortions = []
#for filename in os.listdir(currDir):
#    if filename.endswith(".png"):
#        currImg = Image.open(str(currDir)+'/'+str(filename))
#        for k in range(2,20):
#            distortions.append(smartPost(currImg, k, percentage, str(currDir)+'/'+str(filename)))
#
#graph = plt.figure(figsize=(15,5))
#graph.plot(range(2,20), distortions)
#graph.title('Elbow Curve')

In [None]:
#percentage = 0.01
#k=100 #remove later
#
#currDir = 'Inputs/Landscapes' 
#for filename in os.listdir(currDir):
#    if filename.endswith(".png"):
#        currImg = Image.open(str(currDir)+'/'+str(filename))
#        smartPost(currImg, k, percentage, str(currDir)+'/'+str(filename))
#
#currDir = 'Inputs/Animals' 
#for filename in os.listdir(currDir):
#    if filename.endswith(".png"):
#        currImg = Image.open(str(currDir)+'/'+str(filename))
#        smartPost(currImg, k, percentage, str(currDir)+'/'+str(filename))
#
#currDir = 'Inputs/Cars' 
#for filename in os.listdir(currDir):
#    if filename.endswith(".png"):
#        currImg = Image.open(str(currDir)+'/'+str(filename))
#        smartPost(currImg, k, percentage, str(currDir)+'/'+str(filename))