In [13]:
#importation des bibliothèques nécessaire
import cv2 
import os
import time
import numpy as np
import pandas as pd
from skimage.io import imread
from scipy.stats import kurtosis, entropy
from skimage.feature import greycomatrix, greycoprops

In [2]:
#REPOSITORY PATH OF PROJECT
(PATH_PROJECT,PATH_DEV) = os.path.split(os.getcwd())
PATH_DATA = os.path.join(PATH_PROJECT,'Data')
PATH_DATA_CBIRH = os.path.join(PATH_DATA,'DATA_CBIRH')
LIST_CLASS = os.listdir(PATH_DATA_CBIRH)
PATH_DATA_CSV = os.path.join(PATH_DATA,'Data_csv')

PATH_LIST =[]

for f in ['fruit', 'car', 'dog', 'person'] :
    PATH_LIST.append(os.path.join(PATH_DATA_CBIRH,f))

## Descripteurs 

In [15]:
def EXTRACT_FEATURS(hist,centroids):

    Vector = []
    Features = []
    for (percent, color) in zip(hist, centroids):     
        color = list(color.flatten())
        color = [np.round(item,2) for item in color ]
        color.append(np.round(percent*100,3) )
        Features.append(color)    
        #print("percent :",np.round(percent*100,3),"\tcolor : ",color)
    return Features

def DOMINANT_COLOR_DESCRIPTOR(img):
    """
    Input : Image
    Output : Vector of Features : ('Color_1','Percent_1','Color_2','Percent_2'','Color_3','Percent_3')
    """
    #start = time.time()
    img = cv2.cvtColor(cv2.imread(img), cv2.COLOR_BGR2RGB)
    # Convert Image from RGB to CIE LUV
    img_Luv = cv2.cvtColor(img, cv2.COLOR_BGR2LUV)
    pixels = np.float32(img_Luv.reshape(-1, 3))
    # Nuber of class chosen by the elbow method
    n_colors = 3
    # Generate cluster centers using the kmeans() methode of opencv
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, .1)
    flags = cv2.KMEANS_RANDOM_CENTERS
    _, labels, centers = cv2.kmeans(pixels, n_colors, None, criteria, 10, flags)
    # Generate the histogram to calculate the percent of each color 
    numLabels = np.arange(0, len(np.unique(labels)) + 1)
    hist, bins = np.histogram(labels, bins = numLabels)
    hist = hist.astype("float")
    hist /= hist.sum()
    Features = EXTRACT_FEATURS(hist,centers)
    Flat_Features = [np.round(item,3) for Sublist_Features in Features for item in Sublist_Features]
    #end = time.time()
    #print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return Flat_Features

def DESCRIPTOR_TEXTURE_MATRIX_COCCURRENCE(img):
    """
    Input : Image
    Output : Vector of Features : ('dissimilarity','correlation', 'contrast','homogeneity','energy')
    Compute the contrast for GLCMs with distances [1] and angles [0 degrees]
    """
    #start = time.time()
    Vector = []
    Features = ['dissimilarity','correlation', 'contrast','homogeneity','energy']  
    image = cv2.imread (img,0)
    Greycomatrix = greycomatrix(image, distances=[1], angles=[0], levels=256,symmetric=True, normed=False)
    for f in Features:
        item = greycoprops(Greycomatrix, f)[0, 0]
        Vector.append(np.round(item,3))   
    #end = time.time()
    #print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return Vector

In [5]:
# Extration des caratérisqtiques de texture via hISTOGRAMME

def descriprot_color_hitogramme(image) :
    """
    Input : Image
    
    Output : Vector  : ('mean_color_1', 'var_color_1', 'kurtosis_color_1', 'entropy_color_1',
                        'mean_color_2', 'var_color_2', 'kurtosis_color_2', 'entropy_color_2'
                        'mean_color_3', 'var_color_3', 'kurtosis_color_3', 'entropy_color_3')
    
    calcule fréquence des pixels (comprises entre 0 et 255) en échelle de color
    """
    #start = time.time()
    Vector = []
    #cv2.calcHist (images, canaux, masque, histSize, plages )
    for i, col in enumerate(['b', 'g', 'r']):
        dst = cv2.calcHist(image, [i], None, [256], [0, 256])
        Vector.append(np.mean(dst))
        Vector.append(np.var(dst))
        k = kurtosis(dst,axis = 0)
        Vector.append(k[0])
        e = entropy(dst)
        Vector.append(e[0])
                
    #end = time.time()
    #print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return Vector

In [6]:
def descriprot_texture_hitogramme(image) :
    """
    Input : Image
    
    Output : Vector  : ('mean_color_gray', 'var_color_gray', 'kurtosis_color_gray', 'entropy_color_gray',)
    
    calcule fréquence des pixels (comprises entre 0 et 255) en échelle de color
    """
    #start = time.time()
    Vector = []
    #cv2.calcHist (images, canaux, masque, histSize, plages )
    dst = cv2.calcHist(image, [0], None, [256], [0, 256])
    Vector.append(np.mean(dst))
    Vector.append(np.var(dst))
    k = kurtosis(dst,axis = 0)
    Vector.append(k[0])
    e = entropy(dst)
    Vector.append(e[0])
                
    #end = time.time()
    #print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return Vector

In [7]:
def create_data_feautures_1_bis_color_dominant(PATH_LIST,columns,path_data_csv, name_file):
    """
    Input : 
        *PATH_LIST : list Image Directory Path
        *columns : header of data
        *path_data_csv : CSV File Directory Path
        *name_file : The name of the file (Data_Features)
    Output : 
        *dataframe
        *CSV File "The descriptor database " save in directory 'Data_CSV' 
    """
    start = time.time()
    df = pd.DataFrame(columns = columns)
    for f in range(0,4):
        for i in range(0,25):
            ligne = []
            path_img = os.path.join(PATH_LIST[f],os.listdir(PATH_LIST[f])[i])
            name_img = os.listdir(PATH_LIST[f])[i].split('.')[0]
            ligne.append(name_img)

            #img_color = cv2.cvtColor(cv2.imread(path_img), cv2.COLOR_BGR2RGB)
            ligne.extend(DOMINANT_COLOR_DESCRIPTOR(path_img))

            ligne.append(f)

            # Using append to add the list to DataFrame
            df = df.append(pd.DataFrame([ligne], columns=columns), ignore_index=True)
    
    if os.path.isfile(os.path.join(path_data_csv,name_file))==False:
        df.to_csv(os.path.join(path_data_csv,name_file), header=True, index = False, sep =',')
    else : 
        print("file already exict")
    end = time.time()
    print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return df

In [8]:
def create_data_feautures_2_bis_matrix_coccurrence(PATH_LIST,columns,path_data_csv, name_file):
    """
    Input : 
        *PATH_LIST : list Image Directory Path
        *columns : header of data
        *path_data_csv : CSV File Directory Path
        *name_file : The name of the file (Data_Features)
    Output : 
        *dataframe
        *CSV File "The descriptor database " save in directory 'Data_CSV' 
    """
    start = time.time()
    df = pd.DataFrame(columns = columns)
    for f in range(0,4):
        for i in range(0,25):
            ligne = []
            path_img = os.path.join(PATH_LIST[f],os.listdir(PATH_LIST[f])[i])
            name_img = os.listdir(PATH_LIST[f])[i].split('.')[0]
            ligne.append(name_img)

            #img_gray=cv2.imread (path_img)
            ligne.extend(DESCRIPTOR_TEXTURE_MATRIX_COCCURRENCE(path_img))

            ligne.append(f)

            # Using append to add the list to DataFrame
            df = df.append(pd.DataFrame([ligne], columns=columns), ignore_index=True)
    
    if os.path.isfile(os.path.join(path_data_csv,name_file))==False:
        df.to_csv(os.path.join(path_data_csv,name_file), header=True, index = False, sep =',')
        #print('hola')
    else : 
        print("file already exict")
    end = time.time()
    print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return df

In [9]:
def create_data_feautures_3_bis_hitogramme_color(PATH_LIST,columns,path_data_csv, name_file):
    """
    Input : 
        *PATH_LIST : list Image Directory Path
        *columns : header of data
        *path_data_csv : CSV File Directory Path
        *name_file : The name of the file (Data_Features)
    Output : 
        *dataframe
        *CSV File "The descriptor database " save in directory 'Data_CSV' 
    """
    start = time.time()
    df = pd.DataFrame(columns = columns)
    for f in range(0,4):
        for i in range(0,25):
            ligne = []
            path_img = os.path.join(PATH_LIST[f],os.listdir(PATH_LIST[f])[i])
            name_img = os.listdir(PATH_LIST[f])[i].split('.')[0]
            ligne.append(name_img)

            img_color = cv2.cvtColor(cv2.imread(path_img), cv2.COLOR_BGR2RGB)
            ligne.extend(descriprot_color_hitogramme(img_color))

            ligne.append(f)

            # Using append to add the list to DataFrame
            df = df.append(pd.DataFrame([ligne], columns=columns), ignore_index=True)
    
    if os.path.isfile(os.path.join(path_data_csv,name_file))==False:
        df.to_csv(os.path.join(path_data_csv,name_file), header=True, index = False, sep =',')
    else : 
        print("file already exict")
    end = time.time()
    print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return df


In [10]:
def create_data_feautures_4_bis_hitogramme_texture(PATH_LIST,columns,path_data_csv, name_file):
    """
    Input : 
        *PATH_LIST : list Image Directory Path
        *columns : header of data
        *path_data_csv : CSV File Directory Path
        *name_file : The name of the file (Data_Features)
    Output : 
        *dataframe
        *CSV File "The descriptor database " save in directory 'Data_CSV' 
    """
    start = time.time()
    df = pd.DataFrame(columns = columns)
    for f in range(0,4):
        for i in range(0,25):
            ligne = []
            path_img = os.path.join(PATH_LIST[f],os.listdir(PATH_LIST[f])[i])
            name_img = os.listdir(PATH_LIST[f])[i].split('.')[0]
            ligne.append(name_img)

            img_gray=cv2.imread (path_img,0)
            ligne.extend(descriprot_texture_hitogramme(img_gray))

            ligne.append(f)

            # Using append to add the list to DataFrame
            df = df.append(pd.DataFrame([ligne], columns=columns), ignore_index=True)
    
    if os.path.isfile(os.path.join(path_data_csv,name_file))==False:
        df.to_csv(os.path.join(path_data_csv,name_file), header=True, index = False, sep =',')
    else : 
        print("file already exict")
    end = time.time()
    print("[INFO] Applying took {:.2f} seconds".format(end - start))
    return df


In [11]:
columns = ['name_img','L_1','U_1','V_1','Percent_1','L_2','U_2','V_2','Percent_2','L_3','U_3','V_3','Percent_3','label']

create_data_feautures_1_bis_color_dominant(PATH_LIST,columns,PATH_DATA_CSV, 'Data_Features_1_bis.csv')

[INFO] Applying took 9.26 seconds


Unnamed: 0,name_img,L_1,U_1,V_1,Percent_1,L_2,U_2,V_2,Percent_2,L_3,U_3,V_3,Percent_3,label
0,fruit_0000,252.460007,94.709999,135.419998,36.990,122.260002,77.150002,124.360001,18.210,189.520004,70.339996,121.910004,44.800,0
1,fruit_0001,249.979996,96.089996,135.889999,28.560,16.230000,95.120003,115.339996,38.340,62.349998,93.349998,79.300003,33.100,0
2,fruit_0002,98.930000,81.739998,94.529999,8.290,253.500000,95.790001,136.050003,39.890,200.449997,57.169998,106.739998,51.820,0
3,fruit_0003,249.380005,95.980003,134.059998,29.850,67.910004,92.000000,83.389999,10.850,110.720001,84.519997,19.389999,59.300,0
4,fruit_0004,39.230000,92.570000,106.570000,38.100,122.150002,84.690002,96.070000,38.190,251.720001,95.949997,135.880005,23.710,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,person_0020,19.219999,95.650002,127.139999,38.622,192.699997,101.010002,144.039993,23.036,77.120003,90.599998,108.970001,38.342,3
96,person_0021,104.800003,91.830002,124.099998,30.812,182.750000,93.040001,136.919998,11.467,36.730000,96.029999,133.550003,57.721,3
97,person_0022,104.239998,89.309998,91.559998,30.656,237.929993,95.820000,135.860001,41.733,22.150000,94.949997,130.490005,27.611,3
98,person_0023,22.090000,96.320000,130.179993,37.051,181.850006,85.739998,92.750000,22.575,115.739998,86.489998,82.110001,40.373,3


In [16]:
columns = ['name_img','dissimilarity','correlation', 'contrast','homogeneity','energy','label']

create_data_feautures_2_bis_matrix_coccurrence(PATH_LIST,columns,PATH_DATA_CSV, 'Data_Features_2_bis.csv')

[INFO] Applying took 0.82 seconds


Unnamed: 0,name_img,dissimilarity,correlation,contrast,homogeneity,energy,label
0,fruit_0000,3.738,0.973,135.925,0.479,0.207,0
1,fruit_0001,5.714,0.982,327.895,0.428,0.126,0
2,fruit_0002,4.748,0.944,225.034,0.547,0.301,0
3,fruit_0003,3.999,0.980,161.597,0.500,0.198,0
4,fruit_0004,6.703,0.974,337.205,0.341,0.123,0
...,...,...,...,...,...,...,...
95,person_0020,3.351,0.995,43.784,0.404,0.038,3
96,person_0021,3.171,0.992,37.554,0.419,0.041,3
97,person_0022,4.224,0.993,110.043,0.453,0.063,3
98,person_0023,3.160,0.997,29.807,0.383,0.032,3


In [17]:
columns = ['name_img','mean_color_1', 'var_color_1', 'kurtosis_color_1', 'entropy_color_1',
                        'mean_color_2', 'var_color_2', 'kurtosis_color_2', 'entropy_color_2',
                        'mean_color_3', 'var_color_3', 'kurtosis_color_3', 'entropy_color_3','label']

create_data_feautures_3_bis_hitogramme_color(PATH_LIST,columns,PATH_DATA_CSV, 'Data_Features_3_bis.csv')

[INFO] Applying took 0.64 seconds


Unnamed: 0,name_img,mean_color_1,var_color_1,kurtosis_color_1,entropy_color_1,mean_color_2,var_color_2,kurtosis_color_2,entropy_color_2,mean_color_3,var_color_3,kurtosis_color_3,entropy_color_3,label
0,fruit_0000,1.171875,189.150146,234.013916,1.051480,1.171875,202.243896,236.767822,0.923059,1.171875,212.040771,239.448227,0.890240,0
1,fruit_0001,1.171875,129.157959,216.100266,1.536723,1.171875,109.603271,194.628632,1.598176,1.171875,80.197021,210.176575,2.346885,0
2,fruit_0002,1.171875,167.907959,240.901321,1.479838,1.171875,170.939209,241.658997,1.487373,1.171875,149.095459,239.989014,1.811911,0
3,fruit_0003,1.171875,149.704834,223.507874,1.315909,1.171875,172.993896,222.622864,1.066722,1.171875,152.392334,220.307968,1.264182,0
4,fruit_0004,1.171875,117.181396,212.469849,1.705785,1.171875,79.243896,215.504089,2.578870,1.171875,62.853271,215.221497,2.998443,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,person_0020,3.000000,10.515625,1.931457,4.989875,3.000000,9.593750,0.994186,5.010484,3.000000,10.656250,2.823411,4.984765,3
96,person_0021,3.000000,14.578125,1.419034,4.756175,3.000000,15.335938,2.072972,4.737643,3.000000,16.382812,3.129020,4.706685,3
97,person_0022,3.000000,62.914062,17.125673,3.957088,3.000000,63.562500,18.444830,4.002325,3.000000,64.367188,21.021227,4.008346,3
98,person_0023,3.000000,12.648438,2.773515,4.863359,3.000000,12.382812,1.217864,4.860567,3.000000,12.156250,1.120935,4.873137,3


In [18]:
columns = ['name_img','mean_color_gray', 'var_color_gray', 'kurtosis_color_gray', 'entropy_color_gray','label']

create_data_feautures_4_bis_hitogramme_texture(PATH_LIST,columns,PATH_DATA_CSV, 'Data_Features_4_bis.csv')

[INFO] Applying took 0.60 seconds


Unnamed: 0,name_img,mean_color_gray,var_color_gray,kurtosis_color_gray,entropy_color_gray,label
0,fruit_0000,0.390625,24.753662,228.664688,0.655818,0
1,fruit_0001,0.390625,11.261475,120.360558,1.402632,0
2,fruit_0002,0.390625,26.378662,246.016937,0.803683,0
3,fruit_0003,0.390625,27.331787,240.592346,0.593292,0
4,fruit_0004,0.390625,12.925537,159.098663,1.472703,0
...,...,...,...,...,...,...
95,person_0020,1.000000,3.328125,6.136811,4.305597,3
96,person_0021,1.000000,2.578125,4.059394,4.454858,3
97,person_0022,1.000000,9.234375,30.282726,3.651691,3
98,person_0023,1.000000,2.843750,8.069195,4.449944,3


In [3]:
PATH_DATA_CSV

'C:\\Users\\yousra.amrani_adm\\Documents\\Data\\Data_csv'