<a href="https://cognitiveclass.ai/">
    <img src="https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/CV0101/Logo/SNLogo.png" width="200" align="center">
</a>

<h1>Image Compression and Color Quantization using K Means Clustering</h1>


<hr>

In [None]:
# importing OpenCV and urllib for downloading and displaying the bunny image
import urllib.request
import cv2
bunny_image_url = "http://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/CV0101/Dataset/bunny.png"
urllib.request.urlretrieve(bunny_image_url, "bunny.png") # downloads file as "bunny.png"
im = cv2.imread("bunny.png")

In [None]:
# loading standard python modules
import os
import math
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# We read a bunny image here and display it
img_corrected = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(img_corrected)
print("Original size of bunny's image is: {} Kilo Bytes".format(str(math.ceil((os.stat('bunny.png').st_size)/1000))))

In [None]:
k_means_url = "http://i.stack.imgur.com/cIDB3.png"
urllib.request.urlretrieve(k_means_url, "K_Means_clustering.png") # downloads file as "K_Means_clustering.png"
k_means_im = cv2.imread("K_Means_clustering.png")
k_means_im_corrected = cv2.cvtColor(k_means_im, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(k_means_im_corrected)

<h2>Compressing Images using K Means Clustering</h2>

In [None]:
# We are using the sklearn python module and are importing the in built KMeans
# function from it
from sklearn.cluster import KMeans

In [None]:
# we import numpy here to transform image dimensions
import numpy as np

In [None]:
# Extracting num_rows and num_cols from bunny's image (stored in im variable)
num_rows = im.shape[0]
num_cols = im.shape[1]
transform_image_for_KMeans = im.reshape(num_rows * num_cols, 3)

In [None]:
# Perform KMeans to compress image, here K = 8 clusters
kmeans = KMeans(n_clusters=8)
kmeans.fit(transform_image_for_KMeans)

cluster_centroids = np.asarray(kmeans.cluster_centers_,dtype=np.uint8) 

# labels represent the label of each pixel and which cluster it belongs to
labels = np.asarray(kmeans.labels_,dtype=np.uint8 )  
labels = labels.reshape(num_rows,num_cols);    

In [None]:
compressed_image = np.ones((num_rows, num_cols, 3), dtype=np.uint8)
for r in range(num_rows):
    for c in range(num_cols):
        compressed_image[r, c, :] = cluster_centroids[labels[r, c], :]

cv2.imwrite("compressed_bunny.png", compressed_image)
compressed_bunny_im = cv2.imread("compressed_bunny.png")
compressed_bunny_im_corrected = cv2.cvtColor(compressed_bunny_im, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(compressed_bunny_im_corrected)    

In [None]:
print("Compressed size of bunny's image is: {} Kilo Bytes".format(str(math.ceil((os.stat('compressed_bunny.png').st_size)/1000))))

<h2>Exercise 1</h2>

<p>Below, I have provided a few pictures for you to play with. Feel free to use K Means Clustering (try it out with different values of K) to compress an image. Notice how the image size varies when you increase the value of K.</p>

In [None]:
# We read a fish image here and display it
fish_image_url = "http://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/CV0101/Dataset/fish.png"
urllib.request.urlretrieve(fish_image_url, "fish.png") # downloads file as "fish.png"
im2 = cv2.imread("fish.png")
fish_im_corrected = cv2.cvtColor(im2, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(fish_im_corrected)
print("Original size of fish image is: {} Kilo Bytes".format(str(math.ceil((os.stat('fish.png').st_size)/1000))))

In [None]:
# Write your code here to Perform K Means Clustering on the fish image 
# (stored in variable im2) and compress its size 

#Extracting num_rows and num_cols from fish image


num_rows_fish = im2.shape[0]
num_cols_fish = im2.shape[1]
transform_fish_image_for_KMeans = im2.reshape(num_rows_fish * num_cols_fish, 3)


#Perform KMeans to compress fish image here, feel free to choose
#any value for K, (i.e. K < 256) for compressing the image size. Use the value
#of K to fill the value of n_clusters


kmeans_fish = KMeans(n_clusters=)
kmeans_fish.fit(transform_fish_image_for_KMeans)
cluster_centroids_fish = np.asarray(kmeans_fish.cluster_centers_,dtype=np.uint8) 


#labels represent the label of each pixel and which cluster it belongs to


labels_fish = np.asarray(kmeans_fish.labels_,dtype=np.uint8 )  
labels_fish = labels_fish.reshape(num_rows_fish,num_cols_fish)

#After running the above code, run the code below
   

compressed_image_fish = np.ones((num_rows_fish, num_cols_fish, 3), dtype=np.uint8)
for r in range(num_rows_fish):
    for c in range(num_cols_fish):
        compressed_image_fish[r, c, :] = cluster_centroids_fish[labels_fish[r, c], :]
cv2.imwrite("compressed_fish.png", compressed_image_fish)
compressed_fish_im = cv2.imread('compressed_fish.png')
compressed_fish_im_corrected = cv2.cvtColor(compressed_fish_im, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(compressed_fish_im_corrected) 
print("Compressed size of fish image is: {} Kilo Bytes".format(str(math.ceil((os.stat('compressed_fish.png').st_size)/1000))))




<h2>Exercise 2</h2>

In [None]:
# We read a butterfly image here and display it
butterfly_image_url = "http://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/CV0101/Dataset/butterfly.png"
urllib.request.urlretrieve(butterfly_image_url, "butterfly.png") # downloads file as "butterfly.png"
im3 = cv2.imread("butterfly.png")
butterfly_im_corrected = cv2.cvtColor(im3, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(butterfly_im_corrected)
print("Original size of butterfly image is: {} Kilo Bytes".format(str(math.ceil((os.stat('butterfly.png').st_size)/1000))))

In [None]:
# Write your code here to Perform K Means Clustering on the butterfly image 
# (stored in variable im3) and compress its size

#Extracting num_rows and num_cols from butterfly image


num_rows_butterfly = im3.shape[0]
num_cols_butterfly = im3.shape[1]
transform_butterfly_image_for_KMeans = im3.reshape(num_rows_butterfly * num_cols_butterfly, 3)


#Perform KMeans to compress butterfly image here, feel free to choose
#any value for K, (i.e. K < 256) for compressing the image size. Use the value
#of K to fill the value of n_clusters**


kmeans_butterfly = KMeans(n_clusters=)
kmeans_butterfly.fit(transform_butterfly_image_for_KMeans)
cluster_centroids_butterfly = np.asarray(kmeans_butterfly.cluster_centers_,dtype=np.uint8) 


#labels represent the label of each pixel and which cluster it belongs to


labels_butterfly = np.asarray(kmeans_butterfly.labels_,dtype=np.uint8 )  
labels_butterfly = labels_butterfly.reshape(num_rows_butterfly,num_cols_butterfly)


#After running the above code, run the code below


compressed_image_butterfly = np.ones((num_rows_butterfly, num_cols_butterfly, 3), dtype=np.uint8)
for r in range(num_rows_butterfly):
    for c in range(num_cols_butterfly):
        compressed_image_butterfly[r, c, :] = cluster_centroids_butterfly[labels_butterfly[r, c], :]
cv2.imwrite("compressed_image_butterfly.png", compressed_image_butterfly)
compressed_butterfly_im = cv2.imread('compressed_image_butterfly.png')
compressed_butterfly_im_corrected = cv2.cvtColor(compressed_butterfly_im, cv2.COLOR_BGR2RGB)
plt.axis('off')
plt.imshow(compressed_butterfly_im_corrected)
print("Compressed size of butterfly image is: {} Kilo Bytes".format(str(math.ceil((os.stat('compressed_image_butterfly.png').st_size)/1000))))




<h3>Credits</h3>

This lab was written by <a href="https://www.linkedin.com/in/sacchitchadha/" target="_blank" >Sacchit Chadha</a> and revised by Nayef Abou Tayoun

<p><a href="https://www.linkedin.com/in/sacchitchadha/" target="_blank">Sacchit Chadha</a> is a Software Engineer at IBM, and is a rising senior pursuing a Bachelors Degree in Computer Science from the University of Waterloo. His work at IBM is focused on Computer Vision, Cloud Computing and Blockchain.</p>
<p>Nayef Abou Tayoun is a Cognitive Data Scientist at IBM, and is pursuing a Master's degree in Artificial Intelligence. 