<a href="https://colab.research.google.com/github/ytg000629/Dacon_Dobae/blob/main/modeling/InceptionV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#라이브러리

import tensorflow as tf
import numpy as np

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.models import Model

from sklearn.metrics import f1_score

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import tensorflow as tf
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalMaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC, Precision, Recall
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix, f1_score

# define constants
img_width, img_height = 299, 299
batch_size = 32
epochs = 20
num_classes = 19

train_data_dir = "/content/drive/MyDrive/dacon/data/data_split/train"
val_data_dir = '/content/drive/MyDrive/dacon/data/data_split/val'
test_data_dir = '/content/drive/MyDrive/dacon/data/test'


In [5]:
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))

# add a global spatial max pooling layer
x = base_model.output
x = GlobalMaxPooling2D()(x)

# add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# and a logistic layer
predictions = Dense(num_classes, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [6]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [7]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 149, 149, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 149, 149, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [8]:
!pip install tensorflow-addons==0.16.1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-addons==0.16.1
  Downloading tensorflow_addons-0.16.1-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typeguard>=2.7 (from tensorflow-addons==0.16.1)
  Downloading typeguard-4.0.0-py3-none-any.whl (33 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.16.1 typeguard-4.0.0


In [9]:
import tensorflow_addons.metrics
w_f1 = tensorflow_addons.metrics.F1Score(num_classes=19, average = 'weighted')

 The versions of TensorFlow you are currently using is 2.12.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [11]:
# compile the model (should be done *after* setting layers to non-trainable)
from tensorflow.keras.losses import CategoricalCrossentropy
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss= CategoricalCrossentropy(label_smoothing=0.1),
              metrics=['accuracy', w_f1])

In [4]:
# apply data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

val_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical')

Found 2410 images belonging to 19 classes.
Found 1047 images belonging to 19 classes.


In [12]:
# implement early stopping
callbacks = [
    ReduceLROnPlateau(monitor = 'val_f1_score', factor = 0.1, patience = 5, mode = 'min', verbose = 1),
    EarlyStopping(patience=10, verbose=1),
    ModelCheckpoint('model.h5', verbose=1, save_best_only=True, save_weights_only=True)
]

# train the model on the new data for a few epochs
model.fit(
    train_generator,
    epochs=50,
    validation_data=val_generator,
    callbacks=callbacks)

Epoch 1/50
Epoch 1: val_loss improved from inf to 1.88141, saving model to model.h5
Epoch 2/50
Epoch 2: val_loss improved from 1.88141 to 1.68882, saving model to model.h5
Epoch 3/50
Epoch 3: val_loss improved from 1.68882 to 1.62956, saving model to model.h5
Epoch 4/50
Epoch 4: val_loss did not improve from 1.62956
Epoch 5/50
Epoch 5: val_loss did not improve from 1.62956
Epoch 6/50
Epoch 6: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.

Epoch 6: val_loss improved from 1.62956 to 1.58449, saving model to model.h5
Epoch 7/50
Epoch 7: val_loss improved from 1.58449 to 1.49289, saving model to model.h5
Epoch 8/50
Epoch 8: val_loss improved from 1.49289 to 1.48419, saving model to model.h5
Epoch 9/50
Epoch 9: val_loss did not improve from 1.48419
Epoch 10/50
Epoch 10: val_loss did not improve from 1.48419
Epoch 11/50
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.

Epoch 11: val_loss did not improve from 1.48419
Epoch 12/50
Epoch 12: val

<keras.callbacks.History at 0x7f3480988250>

In [13]:
# Evaluate the model
# Load the best saved model based on validation data
model.load_weights('model.h5')

In [16]:
import numpy as np
import os
from tensorflow.keras.preprocessing import image


# get all the image filenames
image_filenames = [f for f in os.listdir(test_data_dir) if os.path.isfile(os.path.join(test_data_dir, f))]

# sort the filenames
image_filenames.sort()

# for each image filename
for image_filename in image_filenames:
    # load the image
    img = image.load_img(os.path.join(test_data_dir, image_filename), target_size=(img_width, img_height))
    # convert image to array
    x = image.img_to_array(img)
    # scale the image
    x *= 1./255
    # add an extra dimension (for the batch)
    x = np.expand_dims(x, axis=0)
    # get the prediction from the model
    pred = model.predict(x)
    # get the most likely class
    pred_class = np.argmax(pred, axis=1)
    # print the predicted class
    print(f"The image {image_filename} is predicted to be in class {pred_class}")

The image 000.png is predicted to be in class [18]
The image 001.png is predicted to be in class [18]
The image 002.png is predicted to be in class [18]
The image 003.png is predicted to be in class [3]
The image 004.png is predicted to be in class [10]
The image 005.png is predicted to be in class [18]
The image 006.png is predicted to be in class [10]
The image 007.png is predicted to be in class [18]
The image 008.png is predicted to be in class [3]
The image 009.png is predicted to be in class [11]
The image 010.png is predicted to be in class [7]
The image 011.png is predicted to be in class [11]
The image 012.png is predicted to be in class [18]
The image 013.png is predicted to be in class [10]
The image 014.png is predicted to be in class [18]
The image 015.png is predicted to be in class [18]
The image 016.png is predicted to be in class [18]
The image 017.png is predicted to be in class [1]
The image 018.png is predicted to be in class [18]
The image 019.png is predicted to b

In [40]:
import pandas as pd

# ... (the rest of your code)

# for each image filename
image_predictions = []
for image_filename in image_filenames:
    # load the image
    img = image.load_img(os.path.join(test_data_dir, image_filename), target_size=(img_width, img_height))
    # convert image to array
    x = image.img_to_array(img)
    # scale the image
    x *= 1./255
    # add an extra dimension (for the batch)
    x = np.expand_dims(x, axis=0)
    # get the prediction from the model
    pred = model.predict(x)
    # get the most likely class
    pred_class = np.argmax(pred, axis=1)
    # append the filename and its prediction to the list
    image_predictions.append((image_filename, pred_class[0]))

# convert the list to a DataFrame
df = pd.DataFrame(image_predictions, columns=['filename', 'predicted_class'])

# save the DataFrame to a csv file
df.to_csv('inceptionsV3.csv', index=False)




In [19]:
from google.colab import files

files.download('inceptionsV3.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
##여기 코드 숫자랑 label이름 매칭이 안됨
##다시 짜야됨

# Rename the columns
df = df.rename(columns={'filename': 'id', 'predicted_class': 'label'})

# Replace the values in the 'id' column
df['id'] = df['id'].str.replace('.png', '').str.zfill(3).apply(lambda x: f'TEST_{x}')

# Replace the values in the 'label' column
class_names = ['가구수정', '걸레받이수정', '곰팡이', '꼬임', '녹오염', '들뜸', '면불량', '몰딩수정', '반점', '석고수정', '오염', '오타공', '울음', '이음부불량', '창틀&문틀수정', '터짐', '틈새과다', '피스', '훼손']
class_dict = {i: class_name for i, class_name in enumerate(class_names, 0)}
df['label'] = df['label'].replace(class_dict)

# save the DataFrame to a csv file
df.to_csv('inceptionV3.csv', index=False, encoding= 'utf-8-sig')

  df['id'] = df['id'].str.replace('.png', '').str.zfill(3).apply(lambda x: f'TEST_{x}')


In [23]:
train_generator.class_indices

{'가구수정': 0,
 '걸레받이수정': 1,
 '곰팡이': 2,
 '꼬임': 3,
 '녹오염': 4,
 '들뜸': 5,
 '면불량': 6,
 '몰딩수정': 7,
 '반점': 8,
 '석고수정': 9,
 '오염': 10,
 '오타공': 11,
 '울음': 12,
 '이음부불량': 13,
 '창틀,문틀수정': 14,
 '터짐': 15,
 '틈새과다': 16,
 '피스': 17,
 '훼손': 18}

In [37]:
class_dict

{0: '가구수정',
 1: '걸레받이수정',
 2: '곰팡이',
 3: '꼬임',
 4: '녹오염',
 5: '들뜸',
 6: '면불량',
 7: '몰딩수정',
 8: '반점',
 9: '석고수정',
 10: '오염',
 11: '오타공',
 12: '울음',
 13: '이음부불량',
 14: '창틀&문틀수정',
 15: '터짐',
 16: '틈새과다',
 17: '피스',
 18: '훼손'}