<a href="https://colab.research.google.com/github/vrhughes/DS4002-Project3/blob/main/VGG16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing

In [1]:
# Basic importing
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [14]:
# Importing from example code
import os
from glob import glob
import tensorflow as tf
import keras

from tensorflow.keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, MaxPool2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras import layers
from tensorflow.keras import Model
from keras.models import Sequential
from tensorflow.keras.applications import VGG16

from PIL import Image

## Getting Data

In [4]:
# Getting kaggle path
import kagglehub

# Download latest version
path = kagglehub.dataset_download("puneet6060/intel-image-classification")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/intel-image-classification


In [5]:
# paths of data
test_path = '/kaggle/input/intel-image-classification/seg_test/seg_test'
train_path = '/kaggle/input/intel-image-classification/seg_train/seg_train'

In [6]:
# Checking what is in the folders
for folder in  os.listdir(train_path):
    files = glob(pathname= str(train_path + '/' + folder + '/*.jpg'))
    print(f'Found {len(files)} in folder {folder}')


Found 2512 in folder mountain
Found 2382 in folder street
Found 2191 in folder buildings
Found 2274 in folder sea
Found 2271 in folder forest
Found 2404 in folder glacier


In [7]:
# Seeing what folders are in the directory
os.listdir(train_path)

['mountain', 'street', 'buildings', 'sea', 'forest', 'glacier']

## Deleting 'street'

In [8]:
# Deleting the street folders from the train and test data

# Define the folders you want to include
folders = ['buildings', 'forest', 'glacier', 'mountain', 'sea']

folder_labels = {folder_name:i for i, folder_name in enumerate(folders)}
print(folder_labels)

numbered_folders = len(folders)

IMAGE_SIZE = (150, 150)



{'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4}


## ImageDataGenerator

In [9]:
# Set up ImageDataGenerator (can add augmentation later)
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

# train_datagen = ImageDataGenerator(rescale=1./255,
#                                   zoom_range=0.25,
#                                   validation_split=0.2,
#                                   width_shift_range=0.15,
#                                   height_shift_range=0.15,
#                                   horizontal_flip = True,
#                                   vertical_flip = False,
#                                   fill_mode='nearest')

In [10]:
# Training data loader (excluding street)
train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size = IMAGE_SIZE,     # sizing data
    batch_size = 32,              # this is default, can change if needed
    class_mode = 'categorical',
    subset = 'training',
    classes = folders, # <<<<<<< doesn't include street
    shuffle = True
)

# Validation loader
validation_generator = train_datagen.flow_from_directory(
    train_path,
    target_size = IMAGE_SIZE,
    batch_size = 32,              # try 128?
    class_mode = 'categorical',
    subset = 'validation',
    classes = folders, # <<<<<<< doesn't include street
    shuffle = False # ???????????????? why
)



Found 9324 images belonging to 5 classes.
Found 2328 images belonging to 5 classes.


In [11]:
# Test generator
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    test_path,
    target_size = IMAGE_SIZE,
    batch_size = 32,
    class_mode = 'categorical',
    classes = folders, # <<<<<<< doesn't include street
    shuffle = False
)

Found 2499 images belonging to 5 classes.


### Building VGG16 Model

https://www.kaggle.com/code/janvichokshi/transfer-learning-cnn-resnet-vgg16-iceptionv3#Preparing-the-dataset

In [17]:
def prep_dataset(path, label):
    x_train = []
    y_train = []
    all_images_path = glob(path+'/*.jpg')
    for img_path in all_images_path:
        img = load_img(img_path, target_size=(150, 150))
        img = img_to_array(img)
        img = img/255.0
        x_train.append(img)
        y_train.append(label)
    return np.array(x_train), np.array(y_train)

In [18]:
trainX_building, trainY_building  = prep_dataset("../input/intel-image-classification/seg_train/seg_train/buildings/",0)
trainX_forest, trainY_forest  = prep_dataset("../input/intel-image-classification/seg_train/seg_train/forest/",1)
trainX_glacier, trainY_glacier  = prep_dataset("../input/intel-image-classification/seg_train/seg_train/glacier/",2)
trainX_mount, trainY_mount  = prep_dataset("../input/intel-image-classification/seg_train/seg_train/mountain/",3)
trainX_sea, trainY_sea  = prep_dataset("../input/intel-image-classification/seg_train/seg_train/sea/",4)

print('train building shape ', trainX_building.shape, trainY_building.shape)
print('train forest shape ', trainX_forest.shape, trainY_forest.shape)
print('train glacier shape ', trainX_glacier.shape, trainY_glacier.shape)
print('train mountain shape ', trainX_mount.shape, trainY_mount.shape)
print('train sea shape ', trainX_sea.shape, trainY_sea.shape)

train building shape  (0,) (0,)
train forest shape  (0,) (0,)
train glacier shape  (0,) (0,)
train mountain shape  (0,) (0,)
train sea shape  (0,) (0,)


In [13]:
# Getting pretrained model
pretrained_model = VGG16(
    input_shape = (150, 150, 3),
    include_top = False,
    weights = 'imagenet'
)

for layer in pretrained_model.layers:
    layer.trainable = False

In [None]:
# pretained_model.summary()
last_layer = pretrained_model.get_layer('block5_pool')
print('last layer of vgg : output shape: ', last_layer.output_shape)
last_output= last_layer.output

x = layers.Flatten()(last_output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(6, activation='softmax')(x)

model_vgg = Model(pretrained_model.input, x)


model_vgg.compile(optimizer = RMSprop(lr=0.0001),
              loss = 'sparse_categorical_crossentropy',
              metrics = ['acc'])