<a href="https://colab.research.google.com/github/viniciusrpb/116319_estruturasdedados/blob/main/cap11_2_cnn_alexnet_document_classification_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#!pip install -U keras
#!pip install tensorflow

In [3]:
#!cp -r "/content/drive/My Drive/ocr/full_3_balanced/" "documents"

In [4]:
from keras.models import Sequential
from keras.layers import Dense,Activation,Conv2D,MaxPooling2D,BatchNormalization,Flatten
from tensorflow.keras.optimizers import SGD
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
import numpy as np
import os
import pandas as pd

In [5]:
path_data = "documents"
list_subfolders = os.listdir(path_data)
    
list_subfolders.sort()

dataset_dict = {}

dataset_dict['filename'] = []
dataset_dict['label'] = []

for folder in list_subfolders:

    list_images_path = os.listdir(path_data+"/"+folder)
    
    list_images_path.sort()

    for image_name in list_images_path:

        dataset_dict['filename'].append(folder+"/"+image_name)

        dataset_dict['label'].append(folder)

In [6]:
df = pd.DataFrame.from_dict(dataset_dict)

In [7]:
df

Unnamed: 0,filename,label
0,advertisement/0000035285.tif,advertisement
1,advertisement/00001168.tif,advertisement
2,advertisement/0000160476.tif,advertisement
3,advertisement/0000171775.tif,advertisement
4,advertisement/0000183298.tif,advertisement
...,...,...
895,scientific_publication/9575.tif,scientific_publication
896,scientific_publication/98793269_3285.tif,scientific_publication
897,scientific_publication/9893.tif,scientific_publication
898,scientific_publication/99026124_6146.tif,scientific_publication


In [8]:
X = df['filename'].values
y = df['label'].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [9]:
from keras.utils.np_utils import to_categorical

labels_train = pd.Categorical(y_train)
labels_valid = pd.Categorical(y_valid)
labels_test = pd.Categorical(y_test)

y_train_int = labels_train.codes
y_valid_int = labels_valid.codes
y_test_int = labels_test.codes

y_train_logits = to_categorical(y_train_int)
y_valid_logits = to_categorical(y_valid_int)
y_test_logits = to_categorical(y_test_int)

In [10]:
agnostic_datagen = ImageDataGenerator(rescale=1./255)

In [11]:
train_list = []
valid_list = []
test_list = []

for ind in range(0,len(X_train)):
    train_list.append([X_train[ind],y_train[ind]])
        
for ind in range(0,len(X_valid)):
    valid_list.append([X_valid[ind],y_valid[ind]])

for ind in range(0,len(X_test)):
    test_list.append([X_test[ind],y_test[ind]])

df_train = pd.DataFrame(train_list, columns =['filename','label'])
df_valid = pd.DataFrame(valid_list, columns =['filename','label'])
df_test = pd.DataFrame(test_list, columns =['filename','label'])

train_generator = agnostic_datagen.flow_from_dataframe(df_train, directory = path_data,
                                                        x_col = "filename", y_col = "label",
                                                        class_mode = "categorical", shuffle = True)
        
validation_generator = agnostic_datagen.flow_from_dataframe(df_valid, directory = path_data,
                                                            x_col = "filename", y_col = "label",
                                                            class_mode = "categorical", shuffle = True)

test_generator = agnostic_datagen.flow_from_dataframe(df_test, directory = path_data,
                                                            x_col = "filename", y_col = "label",
                                                            class_mode = "categorical", shuffle = True)

Found 630 validated image filenames belonging to 3 classes.
Found 135 validated image filenames belonging to 3 classes.
Found 135 validated image filenames belonging to 3 classes.


Hyperparameters

In [12]:
dims = (224,224,3)
num_classes = 3

In [13]:
model = Sequential()
# Parte 1 da AlexNet
model.add(layers.experimental.preprocessing.Resizing(224,224,interpolation="bilinear",input_shape=(224,224,3)))
model.add(Conv2D(96,kernel_size=(11,11), strides=(4,4),input_shape=dims,padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((3, 3),strides=(2,2),padding="same"))
#model.add(BatchNormalization())

# Parte 2 da AlexNet
model.add(Conv2D(256,kernel_size=(5,5), strides=(1,1),padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D((3, 3),strides=(2,2)))
#model.add(BatchNormalization())

# Parte 3 da AlexNet
model.add(Conv2D(384,kernel_size=(3,3), strides=(1,1),padding="same"))
model.add(Activation("relu"))

model.add(Conv2D(384,kernel_size=(3,3), strides=(1,1),padding="same"))
model.add(Activation("relu"))

model.add(Conv2D(256,kernel_size=(3,3), strides=(1,1),padding="same"))
model.add(Activation("relu"))

model.add(MaxPooling2D((3, 3),strides=(2,2),padding="same"))

model.add(Flatten())

model.add(Dense(4096, input_shape=(224*224*3,)))
model.add(Activation("relu"))

model.add(Dense(4096))
model.add(Activation("relu"))

model.add(Dense(num_classes))
model.add(Activation("softmax"))

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing (Resizing)         (None, 224, 224, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 56, 56, 96)        34944     
                                                                 
 activation (Activation)     (None, 56, 56, 96)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 28, 28, 96)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 256)       614656    
                                                                 
 activation_1 (Activation)   (None, 28, 28, 256)       0         
                                                        

In [15]:
early_stopping = EarlyStopping(monitor='loss', patience=5)

In [16]:
sgd = SGD(lr = 0.001)

model.compile(optimizer = sgd, 
              loss = 'categorical_crossentropy',
              metrics='accuracy')

history_fine = model.fit(train_generator,
                         epochs=60,
                         batch_size=16,
                         validation_data=validation_generator,
                         callbacks=[early_stopping]
                         )

  super(SGD, self).__init__(name, **kwargs)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [18]:
y_prob = model.predict(test_generator)
y_pred= np.argmax(y_prob,axis=1)
y_true = test_generator.classes

#Plotting the confusion matrix
from sklearn.metrics import confusion_matrix

print(classification_report(y_true,y_pred))

print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.24      0.21      0.23        38
           1       0.35      0.29      0.32        48
           2       0.39      0.49      0.43        49

    accuracy                           0.34       135
   macro avg       0.33      0.33      0.33       135
weighted avg       0.33      0.34      0.33       135

[[ 8 11 19]
 [15 14 19]
 [10 15 24]]


Com transfer learning

In [20]:
#from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.resnet50 import ResNet50

#pre_trained_model = InceptionV3(input_shape=(224,224,3),include_top=False,weights='imagenet')
pre_trained_model = ResNet50(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=3)

for layer in pre_trained_model.layers:
  layer.trainable = False

In [22]:
model_resnet = Sequential()

model_resnet.add(pre_trained_model)
model_resnet.add(BatchNormalization())

model_resnet.add(Dense(3,activation="softmax"))

In [24]:
sgd2 = SGD(lr = 0.01)

model_resnet.compile(optimizer = sgd2, 
              loss = 'categorical_crossentropy', 
              metrics = ['accuracy'])

history_fine = model_resnet.fit(train_generator,
                         epochs=6,
                         batch_size=16,
                         validation_data=validation_generator,
                         callbacks=[early_stopping]
                         )

Epoch 1/6


  super(SGD, self).__init__(name, **kwargs)


Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [25]:
y_prob = model_resnet.predict(test_generator)
y_pred= np.argmax(y_prob,axis=1)
y_true = test_generator.classes

#Plotting the confusion matrix
from sklearn.metrics import confusion_matrix

print(classification_report(y_true,y_pred))

print(confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0       0.33      0.34      0.34        38
           1       0.00      0.00      0.00        48
           2       0.36      0.71      0.48        49

    accuracy                           0.36       135
   macro avg       0.23      0.35      0.27       135
weighted avg       0.23      0.36      0.27       135

[[13  0 25]
 [12  0 36]
 [14  0 35]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
