<a href="https://colab.research.google.com/github/williamfrankholland/MMAI-Deep-Learning/blob/main/colab/Waste_Classification_Bill.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install focal-loss

import pandas as pd
import numpy as np
import glob
import PIL
from PIL import Image

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, Conv3D, Flatten, MaxPooling2D, AveragePooling2D

from sklearn.utils import shuffle


#from focal_loss import SparseCategoricalFocalLoss

SHAPE_SIZE = 256


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
imdir = '/content/gdrive/MyDrive/MMAI 894 Team Project/data_clean/'
image_classes = ['battery/','biological/','brown-glass/','cardboard_recycle/','clothes_donate/','clothes_garbage/','green_glass_recycle/','metal_recycle/','paper_recycle/','plastic_recycle/','shoes/','trash/','white-glass/']

df_list = []

#import images from every folder
for i in range(len(image_classes)):
    files = []
    files.extend(glob.glob(imdir + image_classes[i] + '*'))
    images = [np.asarray(Image.open(file).resize((SHAPE_SIZE,SHAPE_SIZE))) for file in files]
    df_list.append(pd.DataFrame(pd.Series(images), columns=['tensor']))
    
    #add label for every folder
    df_list[i]['label'] = i

In [None]:
#concatonate all image types into one df
df = pd.concat(df_list)

In [None]:
#reset indices after concatonation
df = df.reset_index(drop=True)

In [None]:
df.tail()

Unnamed: 0,tensor,label
9879,"[[[209, 197, 185], [209, 197, 185], [209, 197,...",12
9880,"[[[215, 217, 230], [215, 217, 230], [215, 217,...",12
9881,"[[[218, 220, 233], [218, 220, 233], [216, 218,...",12
9882,"[[[227, 217, 207], [227, 217, 207], [227, 217,...",12
9883,"[[[233, 219, 210], [233, 219, 210], [231, 217,...",12


In [None]:
#import raw X and Y data from concatonated df
shuffle(df)
raw_X = df['tensor']
raw_Y = df['label']

In [None]:
#del df, df_list
#del files
#el images

In [None]:
def clean_data(raw_X, raw_Y):

    cleaned_X = raw_X.to_list()
    cleaned_Y = raw_Y.to_list()

    #remove any images that don't contain shape (SHAPE_SIZE, SHAPE_SIZE, 3)
    clean_list = []
    for i in range(len(cleaned_X)):
      if cleaned_X[i].shape != (SHAPE_SIZE, SHAPE_SIZE, 3):
        clean_list.append(i)
    
    for i in sorted(clean_list, reverse=True):
      cleaned_X.pop(i)
      cleaned_Y.pop(i)
    
    #one hot encode labels
    cleaned_Y = keras.utils.to_categorical(cleaned_Y)
    cleaned_X = np.array(cleaned_X)
    cleaned_Y = np.array(cleaned_Y)

    return cleaned_X, cleaned_Y

cleaned_X, cleaned_Y = clean_data(raw_X, raw_Y)

In [None]:
#check for any images without shape (320, 320, 3)
for i in range(len(cleaned_X)):
  if cleaned_X[i].shape != (SHAPE_SIZE, SHAPE_SIZE, 3):
    print(cleaned_X[i].shape)
    print(i)

In [None]:
def split_data(cleaned_X, cleaned_Y):
  
    #split into 50% train, 20% validation, 30% test
    X_train, X_valtest, Y_train, Y_valtest = train_test_split(cleaned_X, cleaned_Y, test_size=0.5, random_state=42)
    X_val, X_test, Y_val, Y_test = train_test_split(X_valtest, Y_valtest, test_size=0.6, random_state=42)
    
    return X_val, X_test, X_train, Y_val, Y_test, Y_train

X_val, X_test, X_train, Y_val, Y_test, Y_train = split_data(cleaned_X, cleaned_Y)

In [None]:
def build_model():

    model = keras.Sequential()
    
    #define input shape
    model.add(keras.Input(shape=(SHAPE_SIZE,SHAPE_SIZE,3)))

    #model.add(keras.layers.Resizing(
    #SHAPE_SIZE, SHAPE_SIZE, interpolation="bilinear", crop_to_aspect_ratio=True)

    # Scale to -1 to 1 
    model.add(keras.layers.Rescaling(scale=1.0/127.5, offset=-1.0))
    #model.add(keras.layers.Rescaling(scale=1.0/255))

    model.add(keras.layers.RandomTranslation( height_factor=(-0.15, 0.15), width_factor=(-0.15, 0.15)))
    
    #Conv2+ReLU: [5x5, 32]
    model.add(keras.layers.Conv2D(filters=32, kernel_size=(5, 5), activation="relu", name="Conv2d-1", padding='same'))

    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("relu"))

    #MaxPooling: [4x4] strides=2
    model.add(keras.layers.MaxPooling2D(pool_size=(3, 3), name="Pool-1", padding="same"))

    #Conv2+ReLU: [3x3, 64]
    model.add(keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation="relu", name="Conv2d-2", padding='same'))

    #model.add(keras.layers.BatchNormalization())
    #model.add(keras.layers.Activation("relu"))

    #MaxPooling: [4x4]
    model.add(keras.layers.MaxPooling2D(pool_size=(4, 4), name="Pool-2", padding="same"))

    #Conv2+ReLU: [3x3, 128]
    model.add(keras.layers.Conv2D(filters=256, kernel_size=(3, 3), activation="relu", name="Conv2d-3", padding='same'))

    #model.add(keras.layers.BatchNormalization())
    #model.add(keras.layers.Activation("relu"))

    #MaxPooling: [4x4]
    model.add(keras.layers.MaxPooling2D(pool_size=(4, 4), name="Pool-3", padding="same"))

    #Conv2+ReLU: [3x3, 128]
    #model.add(keras.layers.SeparableConv2D(filters=512, kernel_size=(3, 3), activation="relu", name="Conv2d-4", padding='same'))

    #MaxPooling: [4x4]
    #model.add(keras.layers.MaxPooling2D(pool_size=(2, 2), padding="same", name="Pool-4"))
    
    # Flatten out the images before we pass to the dense layers
    model.add(keras.layers.Flatten())

    # 128 neurons
    model.add(keras.layers.Dense(512, activation="relu", name="Denselayer1"))
    # Drop rate = 25%
    model.add(keras.layers.Dropout(rate=0.25, name="drop_1_25"))

    # Use 64 neurons to narrow data size
    model.add(keras.layers.Dense(256, activation="relu", name="Denselayer2"))
    # Drop rate = 25%
    model.add(keras.layers.Dropout(rate=0.25, name="drop_2_25"))

    # Define weight initializer with a random seed to ensure reproducibility
    weight_initializer = keras.initializers.GlorotNormal(seed=42)

    #output layer for 13 classes, using softmax activation
    model.add(Dense(13, 
                    activation='softmax',
                    kernel_initializer=weight_initializer,  
                    kernel_constraint=None,
                    )
    )
    
    return model

def compile_model(model):

    model.compile(
       #use catgorical crossentropy loss
       loss=keras.losses.categorical_crossentropy,
       #loss=SparseCategoricalFocalLoss(gamma=2),

       #use Adam gradient descent optimizer
       optimizer=keras.optimizers.Adam(1e-3),
       
       #use accuracy as a metric
       metrics=['accuracy']
    )

    return model

def train_model(model, X_train, Y_train, X_val, Y_val):

    my_class_weight = {
            0: 12.0,
            1: 11.0,
            2: 19.0,
            3: 12.0,
            4: 9.0,
            5: 100.0,
            6: 19.0,
            7: 16.0,
            8: 14.0,
            9: 19.0,
            10: 6.0,
            11: 18.0,
            12: 16.0,
            }

    #fit model with train data
    history = model.fit(
        x = X_train,
        y = Y_train,
        
        #use 128 batch size, 12 epochs
        batch_size = 128,
        epochs = 20,
        
        #class_weight=my_class_weight,

        #verbose training, including validation data
        verbose = 2,
        validation_data = (X_val, Y_val)
    )
    return model, history


def eval_model(model, X_test, Y_test):

    #evaluate model with test data
    test_loss, test_accuracy = model.evaluate(
        x = X_test,
        y = Y_test,
        verbose = 2
    )

    return test_loss, test_accuracy



In [None]:
model = build_model()
model = compile_model(model)
model, history = train_model(model, X_train, Y_train, X_val, Y_val)
test_loss, test_accuracy = eval_model(model, X_test, Y_test)

Epoch 1/20
39/39 - 7s - loss: 2.4029 - accuracy: 0.2054 - val_loss: 2.2659 - val_accuracy: 0.2743 - 7s/epoch - 180ms/step
Epoch 2/20
39/39 - 5s - loss: 1.7258 - accuracy: 0.4082 - val_loss: 1.9348 - val_accuracy: 0.3925 - 5s/epoch - 135ms/step
Epoch 3/20
39/39 - 5s - loss: 1.4771 - accuracy: 0.5005 - val_loss: 1.6534 - val_accuracy: 0.4919 - 5s/epoch - 135ms/step
Epoch 4/20
39/39 - 5s - loss: 1.2478 - accuracy: 0.5832 - val_loss: 1.4731 - val_accuracy: 0.5867 - 5s/epoch - 135ms/step
Epoch 5/20
39/39 - 5s - loss: 1.1238 - accuracy: 0.6323 - val_loss: 1.2938 - val_accuracy: 0.6334 - 5s/epoch - 134ms/step
Epoch 6/20
39/39 - 5s - loss: 1.0274 - accuracy: 0.6682 - val_loss: 1.2188 - val_accuracy: 0.5913 - 5s/epoch - 135ms/step
Epoch 7/20
39/39 - 5s - loss: 0.9360 - accuracy: 0.6992 - val_loss: 1.2630 - val_accuracy: 0.5979 - 5s/epoch - 135ms/step
Epoch 8/20
39/39 - 5s - loss: 0.8603 - accuracy: 0.7238 - val_loss: 0.9813 - val_accuracy: 0.6937 - 5s/epoch - 135ms/step
Epoch 9/20
39/39 - 5s - 

In [None]:
raw_Y.value_counts()

10    1744
4     1120
1      940
0      894
3      862
8      719
7      653
12     646
11     582
6      547
9      543
2      533
5      101
Name: label, dtype: int64