# AI Assignment 3, part 2

## Import Libraries

In [4]:
%pip install kagglehub
%pip install tensorflow
%pip install pandas
%pip install numpy
%pip install matplotlib
%pip install nltk
%pip install scikit-learn
%pip install opencv-python

--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_internal/utils/logging.py", line 177, in emit
    self.console.print(renderable, overflow="ignore", crop=False, style=style)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/console.py", line 1673, in print
    extend(render(renderable, render_options))
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/console.py", line 1305, in render
    for render_output in iter_render:
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_internal/utils/logging.py", line 134, in __rich_console__
    for line in lines:
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pip/_vendor/rich/segment.py", line 249, in split_lines
    for segment in segments:
  File "/Library/

In [5]:
import os
import kagglehub
import pandas as pd
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

## Load and Preprocess Data

### Download the data, pull the directories

In [6]:
path = kagglehub.dataset_download("puneet6060/intel-image-classification")
print("Path to dataset files:", path)

train_dir = os.path.join(path, 'seg_train/seg_train')
test_dir = os.path.join(path, 'seg_test/seg_test')

print("Train directory:", train_dir)
print("Test directory:", test_dir)

Downloading from https://www.kaggle.com/api/v1/datasets/download/puneet6060/intel-image-classification?dataset_version_number=2...


100%|██████████| 346M/346M [00:08<00:00, 44.7MB/s] 

Extracting files...





Path to dataset files: /Users/tylerteufel/.cache/kagglehub/datasets/puneet6060/intel-image-classification/versions/2
Train directory: /Users/tylerteufel/.cache/kagglehub/datasets/puneet6060/intel-image-classification/versions/2/seg_train/seg_train
Test directory: /Users/tylerteufel/.cache/kagglehub/datasets/puneet6060/intel-image-classification/versions/2/seg_test/seg_test


### Define load_images function for identifying by class

In [7]:
#
def load_images(folder):

  images = []
  labels = []
  class_names = os.listdir(folder)
  class_indices = {class_name: i for i, class_name in enumerate(class_names)}

  for class_name in class_names:
    class_folder = os.path.join(folder, class_name)
    if os.path.isdir(class_folder):
      for image_name in os.listdir(class_folder):
        image_path = os.path.join(class_folder, image_name)
        image = cv2.imread(image_path)
        if image is not None:
          image = cv2.resize(image, (128, 128))
          images.append(image)
          labels.append(class_indices[class_name])

  return np.array(images), np.array(labels), class_names

X_train, Y_train, train_class_names = load_images(train_dir)

X_val, Y_val, val_class_names = load_images(test_dir)

print('Training data shape:', X_train.shape)
print('Validation data shape:', X_val.shape)
print('Classes:', train_class_names)

print("Shape of Y_train:", Y_train.shape)  # Should be (14034, num_classes)
print("Shape of Y_val:", Y_val.shape)      # Should be (3000, num_classes)


Training data shape: (14034, 128, 128, 3)
Validation data shape: (3000, 128, 128, 3)
Classes: ['forest', 'buildings', 'glacier', 'street', 'mountain', 'sea']
Shape of Y_train: (14034,)
Shape of Y_val: (3000,)


### Normalize image pixel values, use one-hot encoding

In [8]:
X_train = X_train / 255.0
X_val = X_val / 255.0

Y_train = to_categorical(Y_train, num_classes=len(train_class_names))
Y_val = to_categorical(Y_val, num_classes=len(val_class_names))

# Verify the shapes after encoding
print("Shape of Y_train after encoding:", Y_train.shape)  # Should be (14034, 6)
print("Shape of Y_val after encoding:", Y_val.shape)      # Should be (3000, 6)


Shape of Y_train after encoding: (14034, 6)
Shape of Y_val after encoding: (3000, 6)


## Train CNN Models

### Model 1: 3 convolution layers

In [9]:
model_3conv = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(train_class_names), activation='softmax')

])

model_3conv.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_3conv = model_3conv.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=10, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 220ms/step - accuracy: 0.4917 - loss: 1.2632 - val_accuracy: 0.6943 - val_loss: 0.8348
Epoch 2/10
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 199ms/step - accuracy: 0.6876 - loss: 0.8373 - val_accuracy: 0.7420 - val_loss: 0.6808
Epoch 3/10
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 220ms/step - accuracy: 0.7744 - loss: 0.6399 - val_accuracy: 0.8003 - val_loss: 0.5624
Epoch 4/10
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 209ms/step - accuracy: 0.8007 - loss: 0.5511 - val_accuracy: 0.8330 - val_loss: 0.4785
Epoch 5/10
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 198ms/step - accuracy: 0.8453 - loss: 0.4373 - val_accuracy: 0.8377 - val_loss: 0.4809
Epoch 6/10
[1m439/439[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 191ms/step - accuracy: 0.8684 - loss: 0.3787 - val_accuracy: 0.8237 - val_loss: 0.5461
Epoch 7/10