In [None]:
# Import necessary libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
import numpy as np
from tensorflow.keras.optimizers import RMSprop
import os
from os import listdir


## Import Data from Local Files and Create Directories

In [39]:
# O is for Organic Foods 
# R is for Recyclable Foods

# Connect to base directory of Project Data
base_dir = 'C:/Users/umara/Desktop/Waste/DATASET'

# Create a directory for our Training and Validation Images Data
train_dir = os.path.join(base_dir, 'TRAIN')
validation_dir = os.path.join(base_dir, 'TEST')
content_dir = os.path.join(base_dir, 'content')

# Directory with our training Organic/Recyclable pictures
train_org_dir = os.path.join(train_dir, 'O')
train_rec_dir = os.path.join(train_dir, 'R')

# Directory with our validation/test Organic/Recyclable pictures
test_org_dir = os.path.join(validation_dir, 'O')
test_rec_dir = os.path.join(validation_dir, 'R')

In [21]:
# Lets check how much images there are in each test/train image files
# There are a total of 22654 Training images and 2513 Test Images 
# Split up in 85% Train and 15% Test 
print('total training Organic item images :', len(os.listdir( train_org_dir )))
print('total training Recylabe item images :', len(os.listdir( train_rec_dir )))

print('total validation Organic item images :', len(os.listdir( test_org_dir ) ))
print('total validation Recylabe item images :', len(os.listdir( test_rec_dir ) ))


total training Organic item images : 12565
total training Recylabe item images : 9999
total validation Organic item images : 1401
total validation Recylabe item images : 1112


### Create our Neural Network Model
Since we are facing a two-class classification problem, we will end our Neural Network with a 'sigmoid' activation parameter. This will give us a scalar between 0 and 1. Essentially the probability that our image is of Class 1. 

In [31]:
# Create model

model = tf.keras.models.Sequential([
    # First Layer
    tf.keras.layers.Conv2D(16, (3,3), activation = 'relu', input_shape = (150, 150, 3)),
    tf.keras.layers.MaxPool2D(2, 2),
    
    # Second layer
    tf.keras.layers.Conv2D(32, (3,3), activation = 'relu'),
    tf.keras.layers.MaxPool2D(2, 2),
    
    # Third Layer 
    tf.keras.layers.Conv2D(64, (3,3), activation = 'relu'),
    tf.keras.layers.MaxPool2D(2, 2),

    # Fourth Layer 
    tf.keras.layers.Conv2D(128, (3,3), activation = 'relu'),
    tf.keras.layers.MaxPool2D(2, 2),
    
    # Final Layer
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation = 'relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
    
])

We can check the summary of our Neural Network and see how it is broken down by Convolutional Layers

In [32]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 148, 148, 16)      448       
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 74, 74, 16)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 72, 72, 32)        4640      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 34, 34, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 17, 17, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 15, 15, 128)      

####  Now we configure specfications on our Model

We will use __Binary Cross Entropy__ for the loss function since its a Binary Classification Problem at hand and our final activation in our Neural Network is a Sigmoid. We are using the Metrics parameter to measure our accuracy of our model. 

In [33]:
model.compile(optimizer = RMSprop(lr=0.01),
             loss = 'binary_crossentropy',
             metrics = ['accuracy'])

### Preprocessing the Data

This step allows for the images to be of the same width, height. Since data that goes into the CNN should be normalized we will normalize our pixels going from a range of [0, 255] to [0, 1]. 

In [34]:
# Rescaling all images in given Dataset
train_datagen = ImageDataGenerator( rescale = 1.0/255.)
test_datagen = ImageDataGenerator( rescale = 1.0/255. )

# --------------------
# Flow training images in batches of 20 using train_datagen generator
# --------------------
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    batch_size=15,
                                                    class_mode='binary',
                                                    target_size=(150, 150))     
# --------------------
# Flow validation images in batches of 20 using test_datagen generator
# --------------------
validation_generator =  test_datagen.flow_from_directory(validation_dir,
                                                         batch_size=15,
                                                         class_mode  = 'binary',
                                                         target_size = (150, 150))

Found 22564 images belonging to 2 classes.
Found 2513 images belonging to 2 classes.


## Training our Model


In [35]:
history = model.fit(train_generator,
                    validation_data=validation_generator,
                    steps_per_epoch=80,
                    epochs=20,
                    validation_steps=50,
                    verbose=2)

Epoch 1/20
100/100 - 20s - loss: 30.0763 - accuracy: 0.5625 - val_loss: 0.6542 - val_accuracy: 0.5860
Epoch 2/20
100/100 - 21s - loss: 1.7065 - accuracy: 0.5355 - val_loss: 0.6869 - val_accuracy: 0.5570
Epoch 3/20
100/100 - 21s - loss: 0.9216 - accuracy: 0.5580 - val_loss: 0.6859 - val_accuracy: 0.5620
Epoch 4/20
100/100 - 20s - loss: 0.7408 - accuracy: 0.5565 - val_loss: 0.6845 - val_accuracy: 0.5650
Epoch 5/20
100/100 - 20s - loss: 1.1042 - accuracy: 0.5750 - val_loss: 0.6364 - val_accuracy: 0.6420
Epoch 6/20
100/100 - 20s - loss: 1.2157 - accuracy: 0.5735 - val_loss: 0.7351 - val_accuracy: 0.5630
Epoch 7/20
100/100 - 20s - loss: 0.7100 - accuracy: 0.6880 - val_loss: 0.5485 - val_accuracy: 0.7890
Epoch 8/20
100/100 - 20s - loss: 0.6145 - accuracy: 0.7525 - val_loss: 0.5508 - val_accuracy: 0.7250
Epoch 9/20
100/100 - 20s - loss: 0.5745 - accuracy: 0.7435 - val_loss: 0.4126 - val_accuracy: 0.8380
Epoch 10/20
100/100 - 20s - loss: 0.5427 - accuracy: 0.7685 - val_loss: 0.4441 - val_accur

In [47]:
# Save our model so we can integrate it into the Web Application
model.save('model.h5')

## Predicting on New Data
We will ask user for new input and will classify whether it is Organic Waste or Recyclable. 

<font color = 'red'> The Downloaded file should be in same **DIRECTORY** or have to give **FULL PATH**</font> 

In [45]:
file_name = 'O_12792.jpg'

new_content = os.path.join(content_dir, file_name)
img = image.load_img(new_content, target_size=(150, 150))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
images = np.vstack([x])
classes = model.predict(images, batch_size=10)

if classes[0]>0:
    print(file_name + ' is Recyclable')
else:
    print(file_name + ' is Organic Waste')

O_12792.jpg is Organic Waste
