## project : yoga position type classification with image dataset / skku 기계학습 수업
info : 요가에 관한 이미지가 주어졌을 때, 각 요가 이미지가 어떤 자세 레이블인지를 예측하는 프로젝트.
name : 조병웅  
model : 해당 프로젝트에는 사전학습된 VGG 모델을 사용.  

## 라이브러리 및 사전 학습 모델 호출

In [1]:
!pip install split-folders

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [2]:
from keras import models, layers
from keras.applications import VGG16
from keras import Input
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, initializers, regularizers, metrics
from keras.callbacks import ModelCheckpoint
import os
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import math
import splitfolders

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 데이터 전처리 및 학습set 나누기

In [4]:
#test셋 validation 셋 나누기
splitfolders.ratio("/content/drive/MyDrive/Colab Notebooks/train", output="output", seed=1337, ratio=(.8, .2))

Copying files: 2498 files [00:39, 63.06 files/s] 


In [5]:
#데이터 증강 및 제너레이터 형성
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=40,
                                      width_shift_range=0.2,
                                      height_shift_range=0.2,
                                      shear_range=0.2,
                                      zoom_range=0.2,
                                      horizontal_flip=True,
                                      fill_mode='nearest')
val_datagen = ImageDataGenerator(rescale=1./255)

In [6]:
train_dir = os.path.join('/content/output/train')
val_dir = os.path.join('/content/output/val')

In [7]:
#제너레이터 형성
train_generator = train_datagen.flow_from_directory(train_dir, batch_size=16, target_size=(224, 224), color_mode='rgb')
val_generator = val_datagen.flow_from_directory(val_dir, batch_size=16, target_size=(224, 224), color_mode='rgb')


Found 1993 images belonging to 7 classes.
Found 503 images belonging to 7 classes.


In [8]:
train_generator.classes

array([1, 1, 1, ..., 6, 6, 6], dtype=int32)

In [9]:
train_generator.class_indices

{'.ipynb_checkpoints': 0, '0': 1, '1': 2, '2': 3, '3': 4, '4': 5, '5': 6}

In [10]:
input_tensor = Input(shape=(224, 224, 3), dtype='float32', name='input')


## 모델 불러오기

In [11]:
#VGG 모듈 불러오기
pre_trained_vgg = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
pre_trained_vgg.trainable = False
pre_trained_vgg.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)    

In [12]:
#fc레이어 설정
additional_model = models.Sequential()
additional_model.add(pre_trained_vgg)
additional_model.add(layers.Flatten())
additional_model.add(layers.Dense(4096, activation='relu'))
additional_model.add(layers.Dense(2048, activation='relu'))
additional_model.add(layers.Dense(1024, activation='relu'))
additional_model.add(layers.Dense(7, activation='softmax'))
 
 
additional_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 7, 7, 512)         14714688  
                                                                 
 flatten (Flatten)           (None, 25088)             0         
                                                                 
 dense (Dense)               (None, 4096)              102764544 
                                                                 
 dense_1 (Dense)             (None, 2048)              8390656   
                                                                 
 dense_2 (Dense)             (None, 1024)              2098176   
                                                                 
 dense_3 (Dense)             (None, 7)                 7175      
                                                                 
Total params: 127,975,239
Trainable params: 113,260,551


In [13]:
#메모리 용량 오류 방지 코드
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
#모델 훈련
checkpoint = ModelCheckpoint(filepath='pretrained_VGG_weight.hdf5', 
            monitor='loss', 
            mode='min', 
            save_best_only=True)
#lr2e-5 : 0.88 / 90 / 0.0001 / 0.0001 : 0.86 91 / +70 : 92
additional_model.compile(loss='categorical_crossentropy', optimizer=optimizers.RMSprop(lr=0.0001), metrics=['acc'])
 
 
history = additional_model.fit_generator(train_generator, 
            steps_per_epoch=math.ceil(train_generator.n / train_generator.batch_size), 
            epochs=70, 
            validation_data=val_generator, 
            validation_steps=math.ceil(val_generator.n / val_generator.batch_size), 
            callbacks=[checkpoint])


  super().__init__(name, **kwargs)
  history = additional_model.fit_generator(train_generator,


Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70

## 결과

In [None]:
#결과 visualization
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
 
epochs = range(1, len(acc) + 1)
 
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'r', label='Validation acc')
plt.title('Accuracy')
plt.legend()
plt.figure()
 
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Loss')
plt.legend()
 
plt.show()


In [None]:
#test_data 불러오기 및 sub파일 생성
import cv2
from keras.utils import load_img, img_to_array
sub_list = []
sub_name_list = []
for i in range(389):
  path = '/content/drive/MyDrive/Colab Notebooks/test/'+str(i)+'.jpg'
  img = load_img(path, target_size=(224, 224))
  test_data = img_to_array(img)
  test_data_name = str(i)+'.jpg'
  test_data = test_data / 255
  test_data = np.expand_dims(test_data, axis = 0)
  pred_probability = additional_model.predict(test_data)
  pred = np.argmax(pred_probability)
  for i in range(1,7):
    if pred == i:
      pred = (i-1)
  sub_list.append(pred)
  sub_name_list.append(test_data_name)




In [None]:
sub_name_list

In [None]:
#make_sub
"""
def make_sub(self, x):
  z = self.predict(x)
  z = z.astype(np.int64)
  sub_df = pd.DataFrame({'label' : z})
  sub_df.index = range(0,z.shape[0])
  sub_df.to_csv('submission_example.csv', index_label=['id'])
"""
import pandas as pd

sub_list = np.array(sub_list)
sub_list = sub_list.astype(np.int64)
sub_df = pd.DataFrame({'Id' : sub_name_list, 'Category' : sub_list})
sub_df.to_csv('submission-example.csv', index = False)

## 캐글로 전송

In [None]:
!pip install kaggle
from google.colab import files
files.upload()
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle competitions submit -c skku-2023-1-machine-learning-third-project -f submission-example.csv -m "Message"