# (2) CV WINTER 2023 - Blurred Image Detection via Supervised Learning

### Project Description
There is a dataset of images, par of which is blurred. The task is to develop a machine learning algorithm to detect whether the image is blurred or sharp on the unknown dataset.

### Data Description
The dataset consists of images, and some of them are blurred. The images are blurred using augmentation.

Data files:
- ```train``` - a folder for training
- ```test``` - a folder with images, for which we make predicitons
- ```train.csv``` - labels (answers) to the train sample: **if 1, the image is blurred.**

### Papers:
- https://jiaya.me/all_final_papers/blur_detect_cvpr08.pdf
- https://fled.github.io/paper/blur.pdf
- http://graphics.im.ntu.edu.tw/docs/mmm08.pdf


In [206]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import cv2

from keras.preprocessing import image
from sklearn import svm
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import GlobalMaxPooling2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras import layers
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

import warnings
warnings.simplefilter("ignore")

# show the version of tf
print(tf.__version__)
print(cv2.__version__)

2.11.0
4.7.0


## 1 Data Load

### 1.1 Loading images and labels from given datasets

In [207]:
labels = pd.read_csv('/Users/yuliabezginova/PycharmProjects/00_files-for_NLP/shift-cv-winter-2023/train.csv')
labels.head()

Unnamed: 0,filename,blur
0,kagouracdzwrjjxzzedi.jpg,0.0
1,ahnamimqdfqoqdnozabc.jpg,0.0
2,gwhdadvghuzinmzhzssx.jpg,0.0
3,onqwabwwckubrydgbzly.jpg,0.0
4,ewpqdruddbokqyzzupcw.jpg,1.0


In [208]:
train_datagen = ImageDataGenerator(
    validation_split=0.25,
    rescale=1./255,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=90,
    width_shift_range=0.2,
    height_shift_range=0.2
)

validation_datagen = ImageDataGenerator(
    validation_split=0.25,
    rescale=1./255)

# Чтобы загрузчик извлёк данные из папки, вызовем функцию 
# flow_from_directory() (англ. «поток из директории»):

train_datagen_flow = train_datagen.flow_from_directory(
    '/Users/yuliabezginova/PycharmProjects/00_files-for_NLP/shift-cv-winter-2023/',
    target_size=(150, 150),
    batch_size=16,
    class_mode='sparse',
    subset='training',
    seed=12345)

val_datagen_flow = validation_datagen.flow_from_directory(
    '/Users/yuliabezginova/PycharmProjects/00_files-for_NLP/shift-cv-winter-2023/',
    target_size=(150, 150),
    batch_size=16,
    class_mode='sparse',
    subset='validation',
    seed=12345)

Found 2579 images belonging to 2 classes.
Found 859 images belonging to 2 classes.


### 1.2 Exploratory Data Analysis

#### 1.2.1 Understanding the variables type, labels structure, number of target classes.

In [None]:
labels.info()

In [None]:
labels['blur'].unique()

In [None]:
print(len(labels))

#### 1.2.2 Descriptive statistics of labels

In [None]:
labels.describe().T

In [None]:
labels['blur'].hist(bins=100);

#### 1.2.3 Checking the target value for missing and duplicates

In [None]:
labels.isna().sum()

In [None]:
labels.duplicated().sum()

#### 1.2.4  Checking the target value for disbalance

In [None]:
labels_df = pd.DataFrame(labels)

In [None]:
labels_df['blur'].value_counts()

In [None]:
train_blur = labels[labels["blur"]==1]
train_sharp = labels[labels["blur"]==0]

print("Blur images:", len(train_blur))
print("Sharp images:", len(train_sharp))

### ***Conclusion:*** Type of variable in the target values is ```float64``` with two unique values - 0, 1. This is fine for solving classification problem, but the target should be converted into ```string``` type. Number of observations if 2664. The target dataset is balanced.

## 2 Constructing CNN models

In [310]:
path = '/Users/yuliabezginova/PycharmProjects/00_files-for_NLP/shift-cv-winter-2023/'

In [311]:
def load_train(path):
    labels = pd.read_csv(path + 'train.csv')
    train_datagen = ImageDataGenerator(validation_split=0.25, horizontal_flip=True, rescale=1./255)
    train_gen_flow = train_datagen.flow_from_dataframe(
        dataframe=labels,
        directory=path + 'train/train/',
        x_col='filename',
        y_col='blur',
        target_size=(96, 96),
        batch_size=32,
        class_mode='raw',
        subset='training',
        seed=5)

    return train_gen_flow


def load_test(path):
    labels = pd.read_csv(path + 'train.csv')
    test_datagen = ImageDataGenerator(validation_split=0.25, rescale=1./255)
    test_gen_flow = test_datagen.flow_from_dataframe(
        dataframe=labels,
        directory=path + 'train/train/',
        x_col='filename',
        y_col='blur',
        target_size=(96, 96),
        batch_size=32,
        class_mode='raw',
        subset='validation',
        seed=5)

    return test_gen_flow

In [312]:
train = load_train(path)

Found 1998 validated image filenames.


In [313]:
valid = load_test(path)

Found 666 validated image filenames.


In [314]:
from tensorflow.keras.applications.resnet import ResNet50

In [354]:
def create_model_ResNet50(input_shape):    
    backbone = ResNet50(input_shape=input_shape,
                    weights='imagenet', 
                    include_top=False)
    
#     замораживаем ResNet50 без верхушки
#     backbone.trainable = False

    model = Sequential()
    model.add(backbone)
    model.add(GlobalAveragePooling2D())
    model.add(Dropout(0.25))
#     model.add(BatchNormalization())
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='softmax')) 

    optimizer = Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer, loss = "binary_crossentropy",
                  metrics=['accuracy'])

    return model

In [355]:
def create_model(input_shape):
    model = Sequential()
#     model.add(layers.Reshape((96, 96, 2), input_shape=input_shape))
    model.add(Conv2D(64, (3, 3), padding='same', input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
#     model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
#     model.add(BatchNormalization())
    model.add(Flatten())    
    model.add(Dense(10))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
#     model.add(BatchNormalization())
    model.add(Dense(1, activation='softmax'))
    
    optimizer = Adam(learning_rate=0.0001)
    
    model.compile(optimizer, loss = "categorical_crossentropy",
                  metrics=['accuracy'])
    return model

In [347]:
def train_model(model, train_data, test_data, batch_size=None, epochs=30,
                steps_per_epoch=None, validation_steps=None):

    if steps_per_epoch is None:
        steps_per_epoch = len(train_data)
    if validation_steps is None:
        validation_steps = len(test_data)

    model.fit(train_data,
              validation_data=test_data,
              batch_size=batch_size,
              epochs=epochs,
              steps_per_epoch=steps_per_epoch,
              validation_steps=validation_steps,
              verbose=2)

    return model

In [356]:
input_shape = (96, 96, 3)

In [357]:
# Initiating the model
model = create_model(input_shape)

In [358]:
model.summary()

Model: "sequential_39"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_69 (Conv2D)          (None, 96, 96, 64)        1792      
                                                                 
 activation_79 (Activation)  (None, 96, 96, 64)        0         
                                                                 
 conv2d_70 (Conv2D)          (None, 94, 94, 32)        18464     
                                                                 
 activation_80 (Activation)  (None, 94, 94, 32)        0         
                                                                 
 max_pooling2d_32 (MaxPoolin  (None, 47, 47, 32)       0         
 g2D)                                                            
                                                                 
 dropout_45 (Dropout)        (None, 47, 47, 32)        0         
                                                     

In [359]:
# Initiating the model
model_ResNet50 = create_model_ResNet50(input_shape)

In [360]:
# training the model
ResNet50_model = train_model(model_ResNet50, 
                             train, 
                             valid, 
                             batch_size=32, 
                             epochs=10, 
                             steps_per_epoch=None, 
                             validation_steps=None)

Epoch 1/10
63/63 - 316s - loss: 0.6404 - accuracy: 0.4850 - val_loss: 1.4745 - val_accuracy: 0.4925 - 316s/epoch - 5s/step
Epoch 2/10
63/63 - 271s - loss: 0.2949 - accuracy: 0.4850 - val_loss: 0.8885 - val_accuracy: 0.4925 - 271s/epoch - 4s/step
Epoch 3/10
63/63 - 273s - loss: 0.1924 - accuracy: 0.4850 - val_loss: 1.0560 - val_accuracy: 0.4925 - 273s/epoch - 4s/step
Epoch 4/10
63/63 - 298s - loss: 0.1337 - accuracy: 0.4850 - val_loss: 0.7147 - val_accuracy: 0.4925 - 298s/epoch - 5s/step
Epoch 5/10
63/63 - 295s - loss: 0.1337 - accuracy: 0.4850 - val_loss: 3.5372 - val_accuracy: 0.4925 - 295s/epoch - 5s/step
Epoch 6/10
63/63 - 334s - loss: 0.1000 - accuracy: 0.4850 - val_loss: 3.1512 - val_accuracy: 0.4925 - 334s/epoch - 5s/step
Epoch 7/10
63/63 - 410s - loss: 0.0860 - accuracy: 0.4850 - val_loss: 0.8635 - val_accuracy: 0.4925 - 410s/epoch - 7s/step
Epoch 8/10
63/63 - 380s - loss: 0.0840 - accuracy: 0.4850 - val_loss: 1.0129 - val_accuracy: 0.4925 - 380s/epoch - 6s/step
Epoch 9/10
63/63

In [362]:
STEP_SIZE_TRAIN=train.n//train.batch_size
STEP_SIZE_VALID=valid.n//valid.batch_size

In [363]:
# training the model
model_2 = train_model(model, 
                        train,
                        valid, 
                        batch_size=32, 
                        epochs=30, 
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        validation_steps=STEP_SIZE_VALID)

Epoch 1/30
62/62 - 81s - loss: 0.0000e+00 - accuracy: 0.4842 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 81s/epoch - 1s/step
Epoch 2/30
62/62 - 65s - loss: 0.0000e+00 - accuracy: 0.4852 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 65s/epoch - 1s/step
Epoch 3/30
62/62 - 77s - loss: 0.0000e+00 - accuracy: 0.4842 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 77s/epoch - 1s/step
Epoch 4/30
62/62 - 62s - loss: 0.0000e+00 - accuracy: 0.4868 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 62s/epoch - 994ms/step
Epoch 5/30
62/62 - 57s - loss: 0.0000e+00 - accuracy: 0.4847 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 57s/epoch - 926ms/step
Epoch 6/30
62/62 - 59s - loss: 0.0000e+00 - accuracy: 0.4832 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 59s/epoch - 951ms/step
Epoch 7/30
62/62 - 59s - loss: 0.0000e+00 - accuracy: 0.4858 - val_loss: 0.0000e+00 - val_accuracy: 0.4925 - 59s/epoch - 947ms/step
Epoch 8/30
62/62 - 65s - loss: 0.0000e+00 - accuracy: 0.4847 - val_loss: 0.0000e+00 -

In [391]:
# Ruba's CNN
def create_model_3(input_shape):
    model = Sequential()

    model.add(Conv2D(32, kernel_size=(2, 2), activation ='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization()) # added
    model.add(Conv2D(64, kernel_size = (2,2), activation ='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization()) # added
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))

    optimizer = Adam(learning_rate=0.0001) # updated to lr=0.0001

    model.compile(loss = "binary_crossentropy", 
              optimizer=optimizer, metrics=['accuracy'])
    return model

In [392]:
# Initiating the model
model_3 = create_model_3(input_shape)

In [371]:
# training the model
model_3 = train_model(model_3, 
                        train,
                        valid, 
                        batch_size=32, 
                        epochs=50, 
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        validation_steps=STEP_SIZE_VALID)

Epoch 1/50
62/62 - 38s - loss: 0.7034 - accuracy: 0.6516 - val_loss: 0.7453 - val_accuracy: 0.4922 - 38s/epoch - 605ms/step
Epoch 2/50
62/62 - 41s - loss: 0.4514 - accuracy: 0.8006 - val_loss: 0.7386 - val_accuracy: 0.4922 - 41s/epoch - 657ms/step
Epoch 3/50
62/62 - 52s - loss: 0.3687 - accuracy: 0.8423 - val_loss: 0.7503 - val_accuracy: 0.5422 - 52s/epoch - 836ms/step
Epoch 4/50
62/62 - 29s - loss: 0.3161 - accuracy: 0.8739 - val_loss: 0.6689 - val_accuracy: 0.6203 - 29s/epoch - 471ms/step
Epoch 5/50
62/62 - 32s - loss: 0.2379 - accuracy: 0.9125 - val_loss: 0.9495 - val_accuracy: 0.5703 - 32s/epoch - 521ms/step
Epoch 6/50
62/62 - 29s - loss: 0.1998 - accuracy: 0.9329 - val_loss: 0.7145 - val_accuracy: 0.6516 - 29s/epoch - 471ms/step
Epoch 7/50
62/62 - 31s - loss: 0.1718 - accuracy: 0.9379 - val_loss: 0.5623 - val_accuracy: 0.7109 - 31s/epoch - 498ms/step
Epoch 8/50
62/62 - 33s - loss: 0.1744 - accuracy: 0.9349 - val_loss: 0.6352 - val_accuracy: 0.7328 - 33s/epoch - 527ms/step
Epoch 9/

In [388]:
def create_model_4(input_shape):
    model1 = Sequential()
    model1.add(Conv2D(32, (3, 3),
                     input_shape=input_shape))
    model1.add(Activation('relu'))
    model1.add(MaxPooling2D(pool_size=(2, 2)))
    model1.add(Dropout(0.25))
    model1.add(Conv2D(64, (3, 3)))
#     model1.add(Activation('relu'))
#     model1.add(Conv2D(128, (3, 3)))
    model1.add(Activation('relu'))
    model1.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization()) # added
    model1.add(Dropout(0.25))

    model1.add(Flatten())

    model1.add(Dense(1, activation='softmax'))

    optimizer = Adam(learning_rate=0.0001) # updated to lr=0.0001
    model1.compile(optimizer=optimizer, 
                  loss="binary_crossentropy", 
              metrics=["accuracy"])
    return model

In [389]:
# Initiating the model
model_4 = create_model_4(input_shape)

# training the model
model_4 = train_model(model_4, 
                        train,
                        valid, 
                        batch_size=32, 
                        epochs=30, 
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        validation_steps=STEP_SIZE_VALID)

Epoch 1/30
62/62 - 64s - loss: 0.0000e+00 - accuracy: 0.4817 - val_loss: 0.0000e+00 - val_accuracy: 0.4922 - 64s/epoch - 1s/step
Epoch 2/30
62/62 - 60s - loss: 0.0000e+00 - accuracy: 0.4842 - val_loss: 0.0000e+00 - val_accuracy: 0.4969 - 60s/epoch - 967ms/step
Epoch 3/30
62/62 - 63s - loss: 0.0000e+00 - accuracy: 0.4837 - val_loss: 0.0000e+00 - val_accuracy: 0.4922 - 63s/epoch - 1s/step
Epoch 4/30
62/62 - 62s - loss: 0.0000e+00 - accuracy: 0.4863 - val_loss: 0.0000e+00 - val_accuracy: 0.4906 - 62s/epoch - 1s/step
Epoch 5/30
62/62 - 59s - loss: 0.0000e+00 - accuracy: 0.4847 - val_loss: 0.0000e+00 - val_accuracy: 0.4984 - 59s/epoch - 957ms/step
Epoch 6/30
62/62 - 60s - loss: 0.0000e+00 - accuracy: 0.4858 - val_loss: 0.0000e+00 - val_accuracy: 0.4953 - 60s/epoch - 974ms/step
Epoch 7/30
62/62 - 62s - loss: 0.0000e+00 - accuracy: 0.4852 - val_loss: 0.0000e+00 - val_accuracy: 0.4891 - 62s/epoch - 995ms/step
Epoch 8/30
62/62 - 61s - loss: 0.0000e+00 - accuracy: 0.4852 - val_loss: 0.0000e+00 -

In [399]:
def create_model_5(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape))
    model.add(Activation('relu'))
    
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    
    model.add(BatchNormalization()) # added
    
#     model.add(Conv2D(64, (3, 3), padding='same'))
#     model.add(Activation('relu'))
    
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Dropout(0.25)) # regularization
    model.add(BatchNormalization()) # added

    model.add(Flatten())

    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='Adamax', 
                  loss="binary_crossentropy", 
              metrics=["accuracy"])
    return model

In [None]:
# Initiating the model
model_5 = create_model_5(input_shape)

# training the model
model_5 = train_model(model_5, 
                        train,
                        valid, 
                        batch_size=32, 
                        epochs=30, 
                        steps_per_epoch=STEP_SIZE_TRAIN, 
                        validation_steps=STEP_SIZE_VALID)

Epoch 1/30


## 3 Choosing the best CNN model

- Model 1 - ResNet50

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
t = f.suptitle('Basic ResNet50 CNN Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

epoch_list = list(range(0, 10))
ax1.plot(epoch_list, ResNet50_model.history['accuracy'], label='Train Accuracy')
ax1.plot(epoch_list, ResNet50_model.history['val_accuracy'], label='Validation Accuracy')

ax1.set_xticks(np.arange(0, 10, 1))
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc="best")

ax2.plot(epoch_list, ResNet50_model.history['loss'], label='Train Loss')
ax2.plot(epoch_list, ResNet50_model.history['val_loss'], label='Validation Loss')
ax2.set_xticks(np.arange(0, 10, 1))
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")

- Model 2 CNN

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
t = f.suptitle('Basic CNN Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

epoch_list = list(range(0, 10))
ax1.plot(epoch_list, model.history['accuracy'], label='Train Accuracy')
ax1.plot(epoch_list, model.history['val_accuracy'], label='Validation Accuracy')

ax1.set_xticks(np.arange(0, 10, 1))
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc="best")

ax2.plot(epoch_list, model.history['loss'], label='Train Loss')
ax2.plot(epoch_list, model.history['val_loss'], label='Validation Loss')
ax2.set_xticks(np.arange(0, 10, 1))
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")

In [None]:
plt.plot(r.history['accuracy'], label='acc', color='red')
plt.plot(r.history['val_accuracy'], label='val_acc', color='darkblue')
plt.legend()