In [None]:
import pandas as pd

df = pd.read_csv('dataset.tsv', names=['size', 'width', 'height', 'data'], sep='\t')
df.head()

Unnamed: 0,size,width,height,data
0,a,60,60,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...
1,a,60,60,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...
2,a,60,60,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...
3,a,60,60,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...
4,a,60,60,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...


In [None]:
# TensorFlow と tf.keras のインポート
import tensorflow as tf
from tensorflow import keras

# ヘルパーライブラリのインポート
import numpy as np
import matplotlib.pyplot as plt

import base64
from sklearn.model_selection import train_test_split

In [None]:
class_names = sorted(df['size'].unique().tolist())
weight = {i: 1 - (j / len(df)) for i, j in df['size'].value_counts().items()}
weights = {idx: weight[c] for idx, c in enumerate(class_names)}

df['size'] = [class_names.index(x) for x in df['size']]
df['data'] = [base64.b64decode(d.encode()) for d in df['data']]
DATASET_SIZE = len(df)

In [None]:

df_train, df_test = train_test_split(df, test_size=0.1, random_state=10101, stratify=df['size'])
df_train, df_valid = train_test_split(df_train, test_size=0.1, random_state=10101, stratify=df_train['size'])

train_source = df_train.pop('data')
train_target = df_train.pop('size')
train_dataset = tf.data.Dataset.from_tensor_slices((train_source, train_target.values))

test_source = df_test.pop('data')
test_target = df_test.pop('size')
test_dataset = tf.data.Dataset.from_tensor_slices((test_source, test_target.values))

valid_source = df_valid.pop('data')
valid_target = df_valid.pop('size')
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_source, valid_target.values))

In [None]:
def decode_data(data, target):
  image = tf.image.decode_jpeg(data, channels=3)
  image = tf.image.resize(image, [60, 60])
  image /= 255.0  # normalize to [0,1] range
  return image, target

In [None]:
train_dataset = train_dataset.map(decode_data, num_parallel_calls=tf.data.AUTOTUNE).shuffle(10).batch(12)
test_dataset = test_dataset.map(decode_data, num_parallel_calls=tf.data.AUTOTUNE).shuffle(10).batch(12)
valid_dataset = valid_dataset.map(decode_data, num_parallel_calls=tf.data.AUTOTUNE).shuffle(10).batch(12)

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.4,fill_mode = 'reflect'),
  tf.keras.layers.experimental.preprocessing.RandomZoom(height_factor=0.2, fill_mode =  'reflect'),
  tf.keras.layers.experimental.preprocessing.RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="reflect"),
  tf.keras.layers.experimental.preprocessing.RandomContrast(factor=0.2),
  tf.keras.layers.experimental.preprocessing.RandomHeight(factor=0.2),
  tf.keras.layers.experimental.preprocessing.RandomWidth(factor=0.2)
])

In [None]:
train_dataset = train_dataset.map(lambda x, y: (data_augmentation(x, training=True), y), 
                num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
efn2 = tf.keras.applications.EfficientNetV2B0(input_shape=(60, 60, 3), include_top=False)

model = tf.keras.Sequential([
    efn2,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(len(class_names))])

model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 efficientnetv2-b0 (Function  (None, 2, 2, 1280)       5919312   
 al)                                                             
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 8)                 10248     
                                                                 
Total params: 5,929,560
Trainable params: 5,868,952
Non-trainable params: 60,608
_________________________________________________________________


In [None]:
epochs=50
history = model.fit(
  train_dataset,
  validation_data=valid_dataset,
  epochs=epochs,
  class_weight=weights
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
