In [None]:
import os
import glob
import shutil

from tqdm import tqdm

import datetime

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Activation
from keras.constraints import maxnorm
from keras.layers.convolutional import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing import image_dataset_from_directory

from sklearn.model_selection import train_test_split

In [None]:
PARENT_FOLDER = os.path.dirname(os.getcwd())

In [None]:
DATA_PATH = ... # todo

In [None]:
data = glob.glob(os.path.join(DATA_PATH,'*'))

train, test = train_test_split(data, test_size = 0.2)

names = set()
for file in tqdm(data):
  name = file.replace('Mushrooms/','').split('/')[0]
  names.add(name)

for name in tqdm(names):
  for set_name in ('train','test'):
    os.makedirs('{}/{}'.format(set_name,name), exist_ok = True)

for source in tqdm(train):
  target = source.replace('Mushrooms', 'train')
  shutil.copy(source,target)

for source in tqdm(test):
  target = source.replace('Mushrooms', 'test')
  shutil.copy(source,target)

In [None]:
dataset_config = {
    'labels'            : 'inferred',
    'label_mode'        : 'categorical',
    'class_names'       : ['Russula', 'Entoloma', 'Amanita', 'Lactarius', 'Cortinarius', 'Hygrocybe', 'Agaricus', 'Suillus', 'Boletus'],
    'color_mode'        : 'grayscale',
    'batch_size'        : 64,
    'shuffle'           : True,
    'seed'              : 42,
    'validation_split'  : 0.25,
    'image_size'        : (256, 256),
    'interpolation'     : 'bilinear',
    'follow_links'      : False
    }

train_dataset = image_dataset_from_directory(
    'train',
    labels            = dataset_config['labels'],
    label_mode        = dataset_config['label_mode'],
    class_names       = dataset_config['class_names'],
    color_mode        = dataset_config['color_mode'],
    batch_size        = dataset_config['batch_size'],
    image_size        = dataset_config['image_size'],
    shuffle           = dataset_config['shuffle'],
    seed              = dataset_config['seed'],
    #validation_split  = dataset_config['validation_split'],
    subset            = None, #'training',
    interpolation     = dataset_config['interpolation'],
    follow_links      = dataset_config['follow_links'],
)

test_dataset = image_dataset_from_directory(
    'test',
    labels            = dataset_config['labels'],
    label_mode        = dataset_config['label_mode'],
    class_names       = dataset_config['class_names'],
    color_mode        = dataset_config['color_mode'],
    batch_size        = dataset_config['batch_size'],
    image_size        = dataset_config['image_size'],
    shuffle           = dataset_config['shuffle'],
    seed              = dataset_config['seed'],
    subset            = None,
    interpolation     = dataset_config['interpolation'],
    follow_links      = dataset_config['follow_links'],
)

val_dataset = image_dataset_from_directory(
    'train',
    labels            = dataset_config['labels'],
    label_mode        = dataset_config['label_mode'],
    class_names       = dataset_config['class_names'],
    color_mode        = dataset_config['color_mode'],
    batch_size        = dataset_config['batch_size'],
    image_size        = dataset_config['image_size'],
    shuffle           = dataset_config['shuffle'],
    seed              = dataset_config['seed'],
    validation_split  = dataset_config['validation_split'],
    subset            = 'validation',
    interpolation     = dataset_config['interpolation'],
    follow_links      = dataset_config['follow_links'],
)

In [None]:
# Create the model
def create_model():
  model = Sequential()

  model.add(Conv2D(32, (3, 3), input_shape=(dataset_config['image_size'][0], dataset_config['image_size'][1], 1), padding='same'))
  model.add(Activation('relu'))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())

  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())

  model.add(Conv2D(64, (3, 3), padding='same'))
  model.add(Activation('relu'))
  model.add(MaxPooling2D(pool_size=(2, 2)))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())

  model.add(Conv2D(128, (3, 3), padding='same'))
  model.add(Activation('relu'))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())

  model.add(Flatten())
  model.add(Dropout(0.2))

  model.add(Dense(256, kernel_constraint=maxnorm(3)))
  model.add(Activation('relu'))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())
  model.add(Dense(128, kernel_constraint=maxnorm(3)))
  model.add(Activation('relu'))
  model.add(Dropout(0.2))
  model.add(BatchNormalization())

  model.add(Dense(9))
  model.add(Activation('softmax'))

  optimizer = 'Adam'

  model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

  return model

In [None]:
model = create_model()
model.fit(train_dataset, validation_data = val_dataset, epochs = 20, batch_size = 128)

In [None]:
scores = model.evaluate(test_dataset, verbose = 0)
print("Accuracy: %.2f%%" % (scores[1] * 100))

In [None]:
model_name = '{}_{}'.format(datetime.datetime.now(), 100 * scores[1])
model_path = os.path,join(PARENT_FOLDER,'model',model_name)

model.save(model_path)

print('Model name: {}'.format(model_name))
print('Model saved to: {}'.format(model_path))