#### Creating File DataFrame

- 데이터 출처
    - https://open.selectstar.ai/ko/?page_id=5976
    - https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=242
    - https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=79

In [1]:
import pandas as pd
import numpy as np
import os
import json
import random
from natsort import natsorted
from pathlib import Path

In [2]:
BASE_PATH = '../data'

dir_path = Path(BASE_PATH)

In [3]:
food1_ps = list(dir_path.joinpath('food1').rglob("*.png"))
food1_paths = []

for i in food1_ps:
    if i.stem.startswith(('._', 'BBQ', 'bingsu', 'cake', 'chicken', 'coffee_hot', 'coffee_ice', 'galbi', 'serial', 'soup', 'pasta', 'chicken')) == True:
        pass
    else:
        food1_paths.append(i)
        
food1_labels = list(map(lambda x: os.path.split(x)[0].split('\\')[3], food1_paths))

print(len(food1_paths), len(food1_labels))

84748 84748


In [4]:
food2_cate = []

for (root, directories, files) in os.walk(dir_path.joinpath('food2')):
    for d in directories:
        category = os.path.join(root, d).split('\\')[-1]
        food2_cate.append(category)

len(food2_cate)

130

In [5]:
food2_paths = []

for category in food2_cate:
    if len(list(dir_path.joinpath(f'food2/{category}').rglob("*.jpg"))) > 1300:
        random_elements = random.sample(list(dir_path.joinpath(f'food2/{category}').rglob("*.jpg")), 1300)
        food2_paths.extend(random_elements)
    else :
        small = list(dir_path.joinpath(f'food2/{category}').rglob("*.jpg"))
        food2_paths.extend(small)

food2_labels = list(map(lambda x: os.path.split(x)[0].split('\\')[3], food2_paths))

print(len(food2_paths), len(food2_labels))

143574 143574


In [6]:
food3_paths = list(dir_path.joinpath('food3').rglob("*.jpg"))

food3_png = list(dir_path.joinpath('food3').rglob("*.png"))

print(len(food3_paths), len(food3_png))

40261 39


In [7]:
food3_labels = list(map(lambda x: os.path.split(x)[0].split('\\')[3], food3_paths))

len(food3_labels)

40261

In [8]:
all_image_paths = food1_paths + food2_paths + food3_paths
all_labels = food1_labels + food2_labels + food3_labels

print(len(all_image_paths), len(all_labels))

268583 268583


In [9]:
image_series = pd.Series(all_image_paths, name='imagepath').astype(str)
labels_series = pd.Series(all_labels, name='label')

images_df = pd.concat([image_series, labels_series], axis=1)

In [10]:
images_df.head()

Unnamed: 0,imagepath,label
0,..\data\food1\baek_sook\png\baek_sook_0001.png,baek_sook
1,..\data\food1\baek_sook\png\baek_sook_0002.png,baek_sook
2,..\data\food1\baek_sook\png\baek_sook_0003.png,baek_sook
3,..\data\food1\baek_sook\png\baek_sook_0004.png,baek_sook
4,..\data\food1\baek_sook\png\baek_sook_0005.png,baek_sook


In [11]:
images_df['label'].value_counts()

label
caesar_salad            1300
toast                   1300
dumpling                1300
fish_cutlet             1300
fried_chicken           1300
                        ... 
chilled_jokbal_salad     312
sundaegukbap             305
mushroom                 248
fried_food               223
tteokgalbi               164
Name: count, Length: 253, dtype: int64

#### Setting

In [16]:
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [17]:
np.random.seed(42)
tf.random.set_seed(42)

In [18]:
from tensorflow.python.client import device_lib

print(device_lib.list_local_devices()) # print avaliable CPU, GPU list
print("───────────────────────────────────────")
print("am I using GPU?: ", bool(tf.config.list_physical_devices('GPU'))) # NVIDIA Quadro RTX 5000

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 18265116905075338660
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 13059315712
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6817138135249836071
physical_device_desc: "device: 0, name: Quadro RTX 5000, pci bus id: 0000:03:00.0, compute capability: 7.5"
xla_global_id: 416903419
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 13059315712
locality {
  bus_id: 1
  links {
  }
}
incarnation: 16060737681748408194
physical_device_desc: "device: 1, name: Quadro RTX 5000, pci bus id: 0000:81:00.0, compute capability: 7.5"
xla_global_id: 2144165316
]
───────────────────────────────────────
am I using GPU?:  True


In [19]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input, # 설정 변경 필요
    validation_split=0.2
)

test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input #설정 변경 필요
)