In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import os
import time
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import shutil
from collections import Counter
from keras.preprocessing.image import ImageDataGenerator
from sklearn.utils import class_weight
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from contour_features import load_image, process_image, cut_section, cut_image, calculate_features, show_process
from time import time

In [2]:
seed = 2802
test_size = 0.2
batch_size = 4
num_classes = 3

input_dir = "data/images"
target_dir = "data/mask"

input_shape = (320,320)

features = ['dist', 'extent', 'slope', 'pwp_limit', 'pwp_middle']

PROCESS_IMAGES = False

In [3]:
df = pd.read_csv('data/dataset_train_test.csv') 
df['path'] = input_dir+'/'+df['image_name']+'.jpg'
df['category'] = np.where(df['class']==1, 'NB', np.where(df['class']==2, 'NB', 'B'))
df['binary_class'] = np.where(df['class']==1, 0, np.where(df['class']==2, 0, 1))

df.head()

Unnamed: 0.1,Unnamed: 0,image_name,part,section,min_height,max_height,diff_height,piece_position,class,image_id,set,path,category,binary_class
0,0,P01_001_0,P01,0,181,200,19,Down,2,1,Train,data/images/P01_001_0.jpg,NB,0
1,1,P01_001_1,P01,1,198,208,10,Down,1,1,Train,data/images/P01_001_1.jpg,NB,0
2,2,P01_001_2,P01,2,204,214,10,Down,1,1,Train,data/images/P01_001_2.jpg,NB,0
3,3,P01_001_3,P01,3,200,212,12,Down,1,1,Train,data/images/P01_001_3.jpg,NB,0
4,4,P01_002_0,P01,0,232,238,6,Down,1,2,Train,data/images/P01_002_0.jpg,NB,0


In [4]:
if PROCESS_IMAGES:
    data = pd.DataFrame(columns=['img_name', 'category','set']+features)
    for _, row in df.iterrows():
        print(row['path'])
        image = load_image(row['path'])
        img_process = process_image(image)
        data_vector =  cut_section(img_process[-1])
        if "dist" in data_vector:  
                img_cut = cut_image(img_process[0],data_vector['y1'],data_vector['y2'])
                if img_cut.shape[0]>10:
                    img_process_cut = process_image(img_cut, clahe_tile = (5,5), kernel_size=(17,39))            
                    data_vector_cut =  cut_section(img_process_cut[-1], True, True)

                    if "contour_line" in data_vector_cut:
                        calculate_features(img_process_cut[-1], data_vector_cut, True)  
                        if "extent" in data_vector_cut:
                            info = {'img_name': row['image_name'],'category': row['class'], 'binary_class': row['binary_class'], 'set': row['set']}
                            for feature in features:
                                info[feature] = data_vector_cut[feature]
                            data = data.append(info, ignore_index=True)
    data.to_csv('results/traditional_data_complete.csv', index=False, sep=';', decimal=".")
    data.head()

In [5]:
data = pd.read_csv('results/traditional_data_complete.csv', delimiter=';', decimal=".")

In [6]:
data_train = data[data['set']=='Train']
data_test = data[data['set']=='Test']

X_train = data_train[features]
y_train = data_train[['binary_class']].values.ravel()

X_test = data_test[features]
y_test = data_test[['binary_class']].values.ravel()

In [7]:
model = RandomForestClassifier(random_state=seed,n_jobs=-1, criterion = 'entropy', max_depth= 14, n_estimators=95)
start = time()
model = model.fit(X_train, y_train)
y_predict_train = model.predict(X_train)
print(classification_report(y_train,y_predict_train, digits=4))
y_predict= model.predict(X_test)
print(classification_report(y_test,y_predict, digits=4))
train_time = time()-start
print(train_time)

              precision    recall  f1-score   support

         0.0     1.0000    1.0000    1.0000       661
         1.0     1.0000    1.0000    1.0000       185

    accuracy                         1.0000       846
   macro avg     1.0000    1.0000    1.0000       846
weighted avg     1.0000    1.0000    1.0000       846

              precision    recall  f1-score   support

         0.0     0.9162    0.9053    0.9107       169
         1.0     0.6522    0.6818    0.6667        44

    accuracy                         0.8592       213
   macro avg     0.7842    0.7936    0.7887       213
weighted avg     0.8616    0.8592    0.8603       213

0.4052236080169678


In [8]:
tn, fp, fn, tp = confusion_matrix(y_test,y_predict).ravel()

sensivity = tp /(tp+fn)
specificity = tn /(tn+fp)

print('Sensivity', sensivity, '\tSpecificity', specificity)

Sensivity 0.6818181818181818 	Specificity 0.9053254437869822


In [9]:
image = load_image(input_dir+'/'+data.loc[0].img_name+'.jpg')

times = []
for _ in range(15):
    start = time()
    img_process = process_image(image)
    data_vector =  cut_section(img_process[-1])
    img_cut = cut_image(img_process[0],data_vector['y1'],data_vector['y2'])
    img_process_cut = process_image(img_cut, clahe_tile = (5,5), kernel_size=(17,39))            
    data_vector_cut =  cut_section(img_process_cut[-1], True, True)
    calculate_features(img_process_cut[-1], data_vector_cut, True)  
    info = []
    for feature in features:
        info.append(data_vector_cut[feature])
    X_input = np.expand_dims(info, axis=0)

    a = model.predict(X_input)
    evaluation_time = time()-start
    times.append(evaluation_time)
mean_time = np.median(np.array(times))
print(mean_time,'seconds')
print(mean_time*1000,'milliseconds')

  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


0.8891596794128418 seconds
889.1596794128418 milliseconds


  "X does not have valid feature names, but"
