In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from pathlib import Path
from sklearn.model_selection import train_test_split
import tensorflow as tf
import os
from sklearn.metrics import confusion_matrix,classification_report

In [8]:
filepath=Path('/kaggle/input/surface-crack-detection')

In [9]:
negative_path=Path('/kaggle/input/surface-crack-detection/Negative')
positive_path=Path('/kaggle/input/surface-crack-detection/Positive')

# Creating DataFrames

In [10]:
def generate_df(image_dir,label):
    filepath=pd.Series(list(positive_path.glob(r'*.jpg')),name='Filepath').astype(str)
    
    label=pd.Series(label,name='Label',index=filepath.index)
    
    img_dir=pd.concat([filepath,label],axis=1)
    
    return img_dir
    
    

In [11]:
positive_dataframe=generate_df(positive_path,'POSITIVE')
negative_dataframe=generate_df(negative_path,'NEGATIVE')
df=pd.concat([positive_dataframe,negative_dataframe],axis=0).reset_index(drop=True)
df


Unnamed: 0,Filepath,Label
0,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
1,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
2,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
3,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
4,/kaggle/input/surface-crack-detection/Positive...,POSITIVE
...,...,...
39995,/kaggle/input/surface-crack-detection/Positive...,NEGATIVE
39996,/kaggle/input/surface-crack-detection/Positive...,NEGATIVE
39997,/kaggle/input/surface-crack-detection/Positive...,NEGATIVE
39998,/kaggle/input/surface-crack-detection/Positive...,NEGATIVE


In [12]:
df['Filepath'][1]

'/kaggle/input/surface-crack-detection/Positive/19812.jpg'

In [14]:
train_df,test_df=train_test_split(df.sample(6000,random_state=1),train_size=0.7,shuffle=True)
print(train_df.shape)
print(test_df.shape)

(4200, 2)
(1800, 2)


# Loading Image Data

In [15]:
train_gen=tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255,validation_split=0.2
)
test_gen=tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255

)

# Flow from Dataframe

In [16]:
train_data=train_gen.flow_from_dataframe(train_df,subset='training',x_col='Filepath',y_col='Label',target_size=(120,120),color_mode='rgb',
                                         class_mode='binary',batch_size=32,shuffle=True,seed=42)

val_data=train_gen.flow_from_dataframe(train_df,subset='validation',x_col='Filepath',y_col='Label',target_size=(120,120),color_mode='rgb',
                                       class_mode='binary',batch_size=32,shuffle=True,seed=42)
                                       
test_data=test_gen.flow_from_dataframe(test_df,x_col='Filepath',y_col='Label',target_size=(120,120),color_mode='rgb',class_mode='binary',
                                       batch_size=32,shuffle=False,seed=42)


Found 3360 validated image filenames belonging to 2 classes.
Found 840 validated image filenames belonging to 2 classes.
Found 1800 validated image filenames belonging to 2 classes.


# Training the Model

In [23]:
inputs=tf.keras.Input(shape=(120,120,3))
x=tf.keras.layers.Conv2D(filters=16,kernel_size=(3,3),activation='relu')(inputs)
x=tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)

x=tf.keras.layers.Conv2D(filters=16,kernel_size=(3,3),activation='relu')(x)
x=tf.keras.layers.MaxPool2D(pool_size=(2,2))(x)

x=tf.keras.layers.GlobalAveragePooling2D()(x)
outputs=tf.keras.layers.Dense(1,activation='sigmoid')(x)
model=tf.keras.Model(inputs=inputs,outputs=outputs)
model.compile(optimizer='adam',loss='binary_crossentropy',
             metrics=['accuracy'])
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 120, 120, 3)]     0         
                                                                 
 conv2d_3 (Conv2D)           (None, 118, 118, 16)      448       
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 59, 59, 16)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 57, 57, 16)        2320      
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 28, 28, 16)       0         
 2D)                                                             
                                                                 
 global_average_pooling2d_1   (None, 16)               0   

# Training the Model

In [25]:
history=model.fit(train_data,validation_data=val_data,
                  epochs=100,
                  callbacks=[tf.keras.callbacks.EarlyStopping(
                  monitor='val_loss',
                  patience=3,
                  restore_best_weights=True)]
                 )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


In [26]:
fig=px.line(
history.history,
y=['loss','val_loss'],
labels={'index':'Epochs','value':'Loss'},
title='Training and Validation Loss')
fig.show()

In [30]:
prediction=np.squeeze(model.predict(test_data))
print(prediction)

[0.50764817 0.5078826  0.5085171  ... 0.507825   0.50933504 0.5077733 ]


In [32]:
prediction=(prediction>=0.5).astype(np.int)
print(prediction)

[1 1 1 ... 1 1 1]



Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations



# Confusion Matrix

In [39]:
cm=confusion_matrix(prediction,np.array(test_df['Label'].replace({'POSITIVE':1,'NEGATIVE':0})))

In [43]:
cm

array([[  0,   0],
       [887, 913]])