## Image Compression using k-means Clustering

#### Refi Revina (G5501211006)
#### Yusti Qomah (G5501211008)

In [1]:
import numpy as np
import matplotlib.image as img
import imageio
import os
from os import listdir
from PIL import Image
import pandas as pd
from sewar.full_ref import mse

In [2]:
def read_image():
    img_list = []
    file_name = []
    file_size = []
    # Menentukan path/directory tempat gambar
    folder_dir = "C:/Users/acer/trial q/Data/mat1621"
    for img in os.listdir(folder_dir):
 
        # Melakukan pengecekan apakah ekstensi file gambar jpg
        if (img.endswith(".jpg")):
            imgread=imageio.imread(img)
            # scaling
            imgscal = imgread / 255
            img1 = Image.open(img)
            fbyte=len(img1.fp.read())
            img_list.append(imgscal)
            file_name.append(img)
            file_size.append(fbyte)

    return img_list,file_name,file_size

In [3]:
def initialize_means(img, clusters):

    # pembentukan ulang/perubahan matriks mejadi 2d
    points = np.reshape(img, (img.shape[0] * img.shape[1],
                                            img.shape[2]))
    m, n = points.shape
    
    # means adalah array dari centroid
    means = np.zeros((clusters, n))

    # inisialisasi means secara acak
    for i in range(clusters):
        rand1 = int(np.random.random(1)*10)
        rand2 = int(np.random.random(1)*8)
        means[i, 0] = points[rand1, 0]
        means[i, 1] = points[rand2, 1]
        means[i, 0] = points[rand1, 0] 
        means[i, 1] = points[rand2, 1]
        means[i, 2] = points[rand2, 2]
       
    return points, means

In [4]:
# Menghitung jarak
def distance(x1, y1, x2, y2):

    dist = np.square(x1 - x2) + np.square(y1 - y2)
    dist = np.sqrt(dist)

    return dist

In [5]:
def k_means(points, means, clusters):

    iterations = 10 # jumlah iterasi
    m, n = points.shape
    
    # indeks yang berhubungan dengan cluster tempat setiap piksel
    index = np.zeros(m)

    # algoritme k-means
    while(iterations > 0):

        for j in range(len(points)):

            # inisialisasi nilai minimum ke nilai yang besar
            minv = 1000
            temp = None

            for k in range(clusters):

                x1 = points[j, 0]
                y1 = points[j, 1]
                x2 = means[k, 0]
                y2 = means[k, 1]

                if(distance(x1, y1, x2, y2) < minv):
                    minv = distance(x1, y1, x2, y2)
                    temp = k
                    index[j] = k

        for k in range(clusters):

            sumx = 0
            sumy = 0
            count = 0

            for j in range(len(points)):

                if(index[j] == k):
                    sumx += points[j, 0]
                    sumy += points[j, 1]
                    count += 1

            if(count == 0):
                count = 1
                
            means[k, 0] = float(sumx / count)
            means[k, 1] = float(sumy / count)

        iterations -= 1

    return means, index

In [6]:
def compress_image(means, index, img, fnm):

    # mengembalikan gambar hasil kompresi dengan menetapkan setiap piksel ke centroid (pusat) yang sesuai
    centroid = np.array(means)
    recovered = centroid[index.astype(int), :]

    # mengembalikan bentuk matriks 3d (row, col, rgb(3))
    recovered = np.reshape(recovered, (img.shape[0], img.shape[1],
                                                    img.shape[2]))

    # menyimpan gambar hasil kompresi
    imageio.imsave('C:/Users/acer/trial q/Data/mat1621/Hasil C/compressed_' + fnm + '_' + str(clusters) +
                        '_colors.jpg', recovered)
    
    return recovered

In [12]:
def size_hasil():
    file_nameh = []
    file_sizeh = []
    
    # Menentukan path/directory tempat gambar
    folder_dir = "C:/Users/acer/trial q/Data/mat1621/Hasil C/"
    for img in os.listdir(folder_dir):
        print(img)
        # Melakukan pengecekan apakah ekstensi file gambar jpg
        if (img.endswith(".jpg")):
            print(img)
            img1 = Image.open(str(folder_dir)+img)
            fbyte=len(img1.fp.read())
            file_nameh.append(img)
            file_sizeh.append(fbyte)

    return file_nameh,file_sizeh

In [8]:
if __name__ == '__main__':
    idx = 0
    fn=[]
    fs=[]
    __mse_result=[]
    time_process=[]
    df=pd.DataFrame()
    img_list, filename, file_size = read_image()
    c=[]
    ctest=[]
    
    while True:
        import time
        start_time = time.time()
        clusters = int(input('Enter the number of colors in the compressed image\n'))
        for (img,fname,fsize) in zip(img_list,filename,file_size):
            points, means = initialize_means(img, clusters)
            means, index = k_means(points, means, clusters)
            try :
                img2=compress_image(means, index, img, fname)  
                end_time = time.time() - start_time
                print("--- %s seconds ---" % (time.time() - start_time))
                mse_result = mse(img,img2)
                print('mse ' , mse_result)
                print ('means ',means)
                print ('index ',index)
                print ('clusters : ',clusters)
                time_process.append(end_time)
                fn.append(fname)
                fs.append(fsize)
                __mse_result.append(mse_result)
                ctest.append(clusters)
                
            except Exception as e :
                print(" error : ",e)
                pass
            
        if clusters == 0:
            break
            
        print ('idx : ', idx)
        idx= idx+1

Enter the number of colors in the compressed image
4




--- 2371.4773523807526 seconds ---
mse  0.11561920237549124
means  [[0.66188282 0.6345883  1.        ]
 [0.42361587 0.41234642 1.        ]
 [0.24223277 0.26711713 1.        ]
 [0.13871896 0.17188851 1.        ]]
index  [0. 0. 0. ... 1. 2. 2.]
clusters :  4




--- 6908.527924537659 seconds ---
mse  0.01525297110790749
means  [[0.73252469 0.73267503 0.75294118]
 [0.82598916 0.83254987 0.75294118]
 [0.06868296 0.06162824 0.75294118]
 [0.75048804 0.38129148 0.74901961]]
index  [0. 0. 0. ... 1. 1. 1.]
clusters :  4




--- 7120.948247432709 seconds ---
mse  0.042477717805029784
means  [[0.10872262 0.12405242 0.40784314]
 [0.53500485 0.54264743 0.47058824]
 [0.86614444 0.80744679 0.3254902 ]
 [0.35917399 0.32459289 0.54117647]]
index  [2. 1. 1. ... 3. 3. 1.]
clusters :  4




--- 7416.853531122208 seconds ---
mse  0.013324950005558871
means  [[0.38980516 0.57711457 0.19607843]
 [0.25719147 0.37497461 0.18823529]
 [0.72301599 0.77129021 0.23921569]
 [0.12493277 0.21320958 0.20784314]]
index  [0. 0. 0. ... 1. 1. 1.]
clusters :  4




--- 7620.241701364517 seconds ---
mse  0.04795134364464624
means  [[0.28141485 0.34104559 0.03529412]
 [0.58612044 0.49156442 0.03137255]
 [0.91989758 0.88391525 0.03529412]
 [0.14098319 0.17352185 0.03137255]]
index  [3. 3. 3. ... 0. 0. 0.]
clusters :  4




--- 10897.19723892212 seconds ---
mse  0.030179916550303627
means  [[0.76066201 0.68497685 0.14509804]
 [0.27502911 0.24579064 0.14901961]
 [0.51107379 0.46354574 0.14509804]
 [0.12335095 0.1207934  0.1254902 ]]
index  [3. 3. 3. ... 0. 2. 2.]
clusters :  4




--- 11092.72267627716 seconds ---
mse  0.022262009295705012
means  [[0.52718716 0.4495395  0.54901961]
 [0.20155942 0.1452082  0.54901961]
 [0.72752196 0.66057208 0.57647059]
 [0.86785709 0.82629286 0.57647059]]
index  [2. 2. 2. ... 0. 0. 2.]
clusters :  4




--- 18187.131343364716 seconds ---
mse  0.01825537938825847
means  [[0.83853795 0.55275681 0.49803922]
 [0.31272351 0.35176622 0.49803922]
 [0.5842894  0.61281696 0.50196078]
 [0.1120403  0.23380955 0.49803922]]
index  [2. 2. 2. ... 3. 3. 3.]
clusters :  4




--- 18424.89339184761 seconds ---
mse  0.017211668951891378
means  [[0.42155207 0.27203314 0.09019608]
 [0.14681857 0.10531875 0.31372549]
 [0.6711318  0.48577283 0.31372549]
 [0.89457795 0.77062863 0.4       ]]
index  [0. 1. 2. ... 3. 3. 3.]
clusters :  4




--- 19980.34034538269 seconds ---
mse  0.019882742113220712
means  [[0.05026848 0.2245417  0.54901961]
 [0.89983468 0.73768535 0.55294118]
 [0.05220797 0.35490461 0.55294118]
 [0.63581297 0.34397159 0.55294118]]
index  [2. 2. 2. ... 0. 0. 0.]
clusters :  4
idx :  0
Enter the number of colors in the compressed image
8




--- 8666.65765285492 seconds ---
mse  0.11560475512867634
means  [[0.66189084 0.63460073 1.        ]
 [0.42388203 0.41256634 1.        ]
 [0.24408747 0.26876831 1.        ]
 [0.1402993  0.17373268 1.        ]
 [0.09396863 0.0692126  1.        ]
 [0.03826288 0.0436939  1.        ]
 [0.         0.         1.        ]
 [0.         0.         1.        ]]
index  [0. 0. 0. ... 1. 2. 2.]
clusters :  8




--- 42209.46343755722 seconds ---
mse  0.01505746934953346
means  [[0.82682893 0.83331121 0.75294118]
 [0.32708737 0.25950755 0.75294118]
 [0.09378903 0.08590308 0.75294118]
 [0.03902083 0.03638212 0.75294118]
 [0.73500374 0.73847221 0.75294118]
 [0.01642979 0.01433644 0.75294118]
 [0.82393969 0.39658848 0.74901961]
 [0.00267795 0.00131864 0.74901961]]
index  [4. 4. 4. ... 0. 0. 0.]
clusters :  8




--- 42551.71320581436 seconds ---
mse  0.020772274156799598
means  [[0.04292012 0.05415483 0.54117647]
 [0.89508983 0.83454489 0.58431373]
 [0.48379677 0.42315198 0.23529412]
 [0.61320635 0.60889311 0.40784314]
 [0.37225293 0.47616783 0.47058824]
 [0.2917848  0.30370618 0.16078431]
 [0.16509364 0.18114826 0.23529412]
 [0.57296627 0.15623906 0.16078431]]
index  [3. 3. 3. ... 5. 2. 3.]
clusters :  8




--- 43091.99394464493 seconds ---
mse  0.011683375063326567
means  [[0.49886713 0.69568774 0.25490196]
 [0.79559191 0.81961971 0.21960784]
 [0.42700099 0.55985621 0.23529412]
 [0.26957734 0.43311122 0.18431373]
 [0.32798554 0.57352771 0.23529412]
 [0.18016698 0.28945027 0.18823529]
 [0.10319843 0.16965744 0.23529412]
 [0.55680882 0.40859349 0.20784314]]
index  [4. 4. 4. ... 3. 3. 3.]
clusters :  8




--- 43401.753668785095 seconds ---
mse  0.046703850266787535
means  [[0.6119755  0.51245963 0.03137255]
 [0.30791137 0.38321607 0.03529412]
 [0.34567294 0.18714132 0.03529412]
 [0.9284329  0.89571555 0.03529412]
 [0.09980246 0.14612204 0.03529412]
 [0.18130972 0.257179   0.03137255]
 [0.15583445 0.04992495 0.03529412]
 [0.03981268 0.02121739 0.03529412]]
index  [4. 4. 4. ... 5. 5. 5.]
clusters :  8




--- 43725.556324481964 seconds ---
mse  0.02921263494914311
means  [[0.13055291 0.12234075 0.14509804]
 [0.78628217 0.70528117 0.14509804]
 [0.17238577 0.18004048 0.14901961]
 [0.2477047  0.20944991 0.1372549 ]
 [0.06835581 0.09079376 0.14509804]
 [0.55988926 0.51112421 0.14509804]
 [0.06069099 0.02731725 0.1372549 ]
 [0.35565421 0.32081875 0.14901961]]
index  [2. 0. 0. ... 5. 5. 5.]
clusters :  8




--- 44004.78750371933 seconds ---
mse  0.02058150281104414
means  [[0.68026326 0.60637773 0.55294118]
 [0.36163323 0.29293644 0.54901961]
 [0.1697402  0.11723027 0.5372549 ]
 [0.74587757 0.68151026 0.57647059]
 [0.82061702 0.76876303 0.55294118]
 [0.51851172 0.44176823 0.54901961]
 [0.91143823 0.8801514  0.57647059]
 [0.61794542 0.53355524 0.53333333]]
index  [7. 7. 7. ... 5. 7. 0.]
clusters :  8




--- 50897.421916246414 seconds ---
mse  0.016711343134347443
means  [[0.60656486 0.65081763 0.50196078]
 [0.2216762  0.35319222 0.49803922]
 [0.09645542 0.24122652 0.49803922]
 [0.50045386 0.45749307 0.50196078]
 [0.84440569 0.55916575 0.49803922]
 [0.31928884 0.20849608 0.49803922]
 [0.12287654 0.09209272 0.50196078]
 [0.02212386 0.00399088 0.49803922]]
index  [3. 3. 3. ... 6. 6. 6.]
clusters :  8




--- 52082.58871674538 seconds ---
mse  0.015799078682987532
means  [[0.09932544 0.06969243 0.4       ]
 [0.18510825 0.13491101 0.04313725]
 [0.46555617 0.35175118 0.4       ]
 [0.92674494 0.81462132 0.4       ]
 [0.30347712 0.2179835  0.31372549]
 [0.46202867 0.22886189 0.09019608]
 [0.75640583 0.58798108 0.4       ]
 [0.62158131 0.41896091 0.31372549]]
index  [2. 0. 6. ... 6. 3. 3.]
clusters :  8




--- 55183.61092042923 seconds ---
mse  0.018869574513661284
means  [[0.05193369 0.35797967 0.55294118]
 [0.91508948 0.77159366 0.55294118]
 [0.70138177 0.39588586 0.55294118]
 [0.36552642 0.19564601 0.55294118]
 [0.11869406 0.05617082 0.55294118]
 [0.04038309 0.02669772 0.55294118]
 [0.04272723 0.24252062 0.54901961]
 [0.01802582 0.01264232 0.55294118]]
index  [0. 0. 0. ... 6. 6. 6.]
clusters :  8
idx :  1
Enter the number of colors in the compressed image
0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index 0 is out of bounds for axis 0 with size 0
 error :  index

In [9]:
print (fn)
print (fs)
print (__mse_result)
print(time_process)
print (ctest)

['Bird1.jpg', 'Bird2.jpg', 'Butterfly1.jpg', 'Butterfly2.jpg', 'Butterfly3.jpg', 'Elephant1.jpg', 'Elephant2.jpg', 'Fox1.jpg', 'Goat1.jpg', 'Turtle1.jpg', 'Bird1.jpg', 'Bird2.jpg', 'Butterfly1.jpg', 'Butterfly2.jpg', 'Butterfly3.jpg', 'Elephant1.jpg', 'Elephant2.jpg', 'Fox1.jpg', 'Goat1.jpg', 'Turtle1.jpg']
[301421, 311100, 117696, 115380, 91603, 124721, 93551, 232427, 227612, 224931, 301421, 311100, 117696, 115380, 91603, 124721, 93551, 232427, 227612, 224931]
[0.11561920237549124, 0.01525297110790749, 0.042477717805029784, 0.013324950005558871, 0.04795134364464624, 0.030179916550303627, 0.022262009295705012, 0.01825537938825847, 0.017211668951891378, 0.019882742113220712, 0.11560475512867634, 0.01505746934953346, 0.020772274156799598, 0.011683375063326567, 0.046703850266787535, 0.02921263494914311, 0.02058150281104414, 0.016711343134347443, 0.015799078682987532, 0.018869574513661284]
[2371.4773523807526, 6908.527924537659, 7120.948247432709, 7416.853531122208, 7620.241701364517, 1089

In [13]:
a,b=size_hasil()
print(a,b)

compressed_Bird1.jpg_4_colors.jpg
compressed_Bird1.jpg_4_colors.jpg
compressed_Bird1.jpg_8_colors.jpg
compressed_Bird1.jpg_8_colors.jpg
compressed_Bird2.jpg_4_colors.jpg
compressed_Bird2.jpg_4_colors.jpg
compressed_Bird2.jpg_8_colors.jpg
compressed_Bird2.jpg_8_colors.jpg
compressed_Butterfly1.jpg_4_colors.jpg
compressed_Butterfly1.jpg_4_colors.jpg
compressed_Butterfly1.jpg_8_colors.jpg
compressed_Butterfly1.jpg_8_colors.jpg
compressed_Butterfly2.jpg_4_colors.jpg
compressed_Butterfly2.jpg_4_colors.jpg
compressed_Butterfly2.jpg_8_colors.jpg
compressed_Butterfly2.jpg_8_colors.jpg
compressed_Butterfly3.jpg_4_colors.jpg
compressed_Butterfly3.jpg_4_colors.jpg
compressed_Butterfly3.jpg_8_colors.jpg
compressed_Butterfly3.jpg_8_colors.jpg
compressed_Elephant1.jpg_4_colors.jpg
compressed_Elephant1.jpg_4_colors.jpg
compressed_Elephant1.jpg_8_colors.jpg
compressed_Elephant1.jpg_8_colors.jpg
compressed_Elephant2.jpg_4_colors.jpg
compressed_Elephant2.jpg_4_colors.jpg
compressed_Elephant2.jpg_8_color

In [14]:
hasil_recovered = []
for (i,j) in zip(a,b):
    words=i.split("_")
    ff = words[1]
    kk = words[2]
    print ("ff,kk ",ff,kk)
    hasil_recovered.append([ff,kk,j])
df_result = pd.DataFrame(hasil_recovered,columns=['file_name','K','Size_Result'])   

ff,kk  Bird1.jpg 4
ff,kk  Bird1.jpg 8
ff,kk  Bird2.jpg 4
ff,kk  Bird2.jpg 8
ff,kk  Butterfly1.jpg 4
ff,kk  Butterfly1.jpg 8
ff,kk  Butterfly2.jpg 4
ff,kk  Butterfly2.jpg 8
ff,kk  Butterfly3.jpg 4
ff,kk  Butterfly3.jpg 8
ff,kk  Elephant1.jpg 4
ff,kk  Elephant1.jpg 8
ff,kk  Elephant2.jpg 4
ff,kk  Elephant2.jpg 8
ff,kk  Fox1.jpg 4
ff,kk  Fox1.jpg 8
ff,kk  Goat1.jpg 4
ff,kk  Goat1.jpg 8
ff,kk  Turtle1.jpg 4
ff,kk  Turtle1.jpg 8


In [15]:
my_dictionary = {'file_name':fn
                         ,'file_size':fs
                         ,'K':ctest
                         ,'mse':__mse_result
                         ,'time_process':time_process}

In [16]:
my_dictionary

{'file_name': ['Bird1.jpg',
  'Bird2.jpg',
  'Butterfly1.jpg',
  'Butterfly2.jpg',
  'Butterfly3.jpg',
  'Elephant1.jpg',
  'Elephant2.jpg',
  'Fox1.jpg',
  'Goat1.jpg',
  'Turtle1.jpg',
  'Bird1.jpg',
  'Bird2.jpg',
  'Butterfly1.jpg',
  'Butterfly2.jpg',
  'Butterfly3.jpg',
  'Elephant1.jpg',
  'Elephant2.jpg',
  'Fox1.jpg',
  'Goat1.jpg',
  'Turtle1.jpg'],
 'file_size': [301421,
  311100,
  117696,
  115380,
  91603,
  124721,
  93551,
  232427,
  227612,
  224931,
  301421,
  311100,
  117696,
  115380,
  91603,
  124721,
  93551,
  232427,
  227612,
  224931],
 'K': [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
 'mse': [0.11561920237549124,
  0.01525297110790749,
  0.042477717805029784,
  0.013324950005558871,
  0.04795134364464624,
  0.030179916550303627,
  0.022262009295705012,
  0.01825537938825847,
  0.017211668951891378,
  0.019882742113220712,
  0.11560475512867634,
  0.01505746934953346,
  0.020772274156799598,
  0.011683375063326567,
  0.046703850266787535,

In [17]:
print(len(fn), len(fs), len(ctest), len(__mse_result), len(time_process))

20 20 20 20 20


In [18]:
df2 = pd.DataFrame.from_dict(my_dictionary)
df2

Unnamed: 0,file_name,file_size,K,mse,time_process
0,Bird1.jpg,301421,4,0.115619,2371.477352
1,Bird2.jpg,311100,4,0.015253,6908.527925
2,Butterfly1.jpg,117696,4,0.042478,7120.948247
3,Butterfly2.jpg,115380,4,0.013325,7416.853531
4,Butterfly3.jpg,91603,4,0.047951,7620.241701
5,Elephant1.jpg,124721,4,0.03018,10897.197239
6,Elephant2.jpg,93551,4,0.022262,11092.722676
7,Fox1.jpg,232427,4,0.018255,18187.131343
8,Goat1.jpg,227612,4,0.017212,18424.893392
9,Turtle1.jpg,224931,4,0.019883,19980.340345


In [19]:
df_result

Unnamed: 0,file_name,K,Size_Result
0,Bird1.jpg,4,204881
1,Bird1.jpg,8,205709
2,Bird2.jpg,4,191862
3,Bird2.jpg,8,213927
4,Butterfly1.jpg,4,78419
5,Butterfly1.jpg,8,79956
6,Butterfly2.jpg,4,75208
7,Butterfly2.jpg,8,77893
8,Butterfly3.jpg,4,62708
9,Butterfly3.jpg,8,64305


In [20]:
df_result['K']=df_result['K'].astype(int)

In [21]:
df = pd.merge(df2, df_result, how="left", on=["file_name", "K"])
df

Unnamed: 0,file_name,file_size,K,mse,time_process,Size_Result
0,Bird1.jpg,301421,4,0.115619,2371.477352,204881
1,Bird2.jpg,311100,4,0.015253,6908.527925,191862
2,Butterfly1.jpg,117696,4,0.042478,7120.948247,78419
3,Butterfly2.jpg,115380,4,0.013325,7416.853531,75208
4,Butterfly3.jpg,91603,4,0.047951,7620.241701,62708
5,Elephant1.jpg,124721,4,0.03018,10897.197239,76563
6,Elephant2.jpg,93551,4,0.022262,11092.722676,49448
7,Fox1.jpg,232427,4,0.018255,18187.131343,147994
8,Goat1.jpg,227612,4,0.017212,18424.893392,138044
9,Turtle1.jpg,224931,4,0.019883,19980.340345,174592
