In [1]:
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle
from keras.models import model_from_json
from keras.models import load_model
import matplotlib.pyplot as plt
from fastai.vision import *

from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
import os
import shutil
import re
import seaborn as sns



## splits indices for a folder into train, validation, and test indices with random sampling   
def split_indices(folder,seed1,seed2):    
    n = len(os.listdir(folder))
    full_set = list(range(1,n+1))

    ## train indices
    random.seed(seed1)
    train = random.sample(list(range(1,n+1)),int(.5*n))

    ## temp
    remain = list(set(full_set)-set(train))

    ## separate remaining into validation and test
    random.seed(seed2)
    valid = random.sample(remain,int(.5*len(remain)))
    test = list(set(remain)-set(valid))
    
    return(train,valid,test)

## gets file names for a particular type of trash, given indices
def get_names(waste_type,indices):
    file_names = [waste_type+str(i)+".jpg" for i in indices]
    return(file_names)    

## moves group of source files to another folder
def move_files(source_files,destination_folder):
    for file in source_files:
        shutil.move(file,destination_folder)

Using TensorFlow backend.
Using TensorFlow backend.


In [2]:
DATADIR = "dataset-resized"
SUBSETS = ["train", "valid"]
CATEGORIES = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
# os.listdir(os.path.join(os.getcwd(),"dataset-resized"))

for subset in SUBSETS:
    for waste_type in CATEGORIES:
        folder = os.path.join('data',subset,waste_type)
        if not os.path.exists(folder):
            os.makedirs(folder)
            
if not os.path.exists(os.path.join('data','test')):
    os.makedirs(os.path.join('data','test'))
    
for waste_type in CATEGORIES:
    source_folder = os.path.join('data', waste_type)
    train_ind, valid_ind, test_ind = split_indices(source_folder,1,1)
    
    train_names = get_names(waste_type,train_ind)
    train_source_files = [os.path.join(source_folder,name) for name in train_names]
    train_dest = "data/train/"+waste_type
    move_files(train_source_files,train_dest)
    
    valid_names = get_names(waste_type,valid_ind)
    valid_source_files = [os.path.join(source_folder,name) for name in valid_names]
    valid_dest = "data/valid/"+waste_type
    move_files(valid_source_files,valid_dest)
    
    test_names = get_names(waste_type,test_ind)
    test_source_files = [os.path.join(source_folder,name) for name in test_names]
    move_files(test_source_files,"data/test")

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'dataset-resized\\cardboard'

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'dataset-resized\\cardboard'

In [None]:
path = os.getcwd() +"/data"
path

In [None]:
tfms = get_transforms(do_flip=True,flip_vert=True)
data = ImageDataBunch.from_folder(path,test="test",ds_tfms=tfms,bs=16, num_workers=0)

In [None]:
data

In [None]:
print(data.classes)

In [None]:
data.show_batch(rows=3, figsize=(5,5))

In [None]:
learn = cnn_learner(data, models.resnet18, metrics=accuracy)
learn.fit_one_cycle(1,1e-2)
learn.save('mini_train')

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
losses,idxs = interp.top_losses()

In [None]:
interp.plot_top_losses(9, figsize=(15,11))

In [None]:
doc(interp.plot_top_losses)
interp.plot_confusion_matrix(figsize=(20,20), dpi=60)

In [None]:
interp.most_confused(min_val=2)

In [None]:
preds = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
print(preds[0].shape)
preds[0]

In [None]:
## saves the index (0 to 5) of most likely (max) predicted class for each image
max_idxs = np.asarray(np.argmax(preds[0],axis=1))

In [None]:
yhat = []
for max_idx in max_idxs:
    yhat.append(data.classes[max_idx])
yhat

In [None]:
learn.data.test_ds[0][0]

In [None]:
y = []

## convert POSIX paths to string first
for label_path in data.test_ds.items:
    y.append(str(label_path))
    
## then extract waste type from file path
pattern = re.compile("([a-z]+)[0-9]+")
for i in range(len(y)):
    y[i] = pattern.search(y[i]).group(1)

In [None]:
## predicted values
print(yhat[0:5])
## actual values
print(y[0:5])

In [None]:
cm = confusion_matrix(y,yhat)
print(cm)

In [None]:
df_cm = pd.DataFrame(cm,CATEGORIES,CATEGORIES)

plt.figure(figsize=(10,8))
sns.heatmap(df_cm,annot=True,fmt="d",cmap="YlGnBu")

In [None]:
correct = 0

for r in range(len(cm)):
    for c in range(len(cm)):
        if (r==c):
            correct += cm[r,c]

In [None]:
accuracy = correct/sum(sum(cm))
accuracy

In [None]:
learn.save('modelweights')