# Model Analysis:
This file reads in images and trains the training data by running it on ResNet50 to classify images. For each epoch trained, it will identify loss and accuracy throughout, and outputs the information given in the final_training_CNN.csv file.

In [None]:
# import statements
import os
import pandas as pd
import requests
import csv

In [None]:
# import statements cont.
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import RMSprop

2024-11-18 15:57:39.800047: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-18 15:57:39.845777: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-18 15:57:41.356416: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-18 15:57:41.655143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731963461.986267   83796 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731963462.17

In [3]:
# Load the CSV data
train_df = pd.read_csv('train_updated_book_covers.csv')
test_df = pd.read_csv('test_updated_book_covers.csv')

# Split the training data into train and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.15, random_state=42, stratify=train_df['Genre'])

In [4]:
# Directory to save downloaded images
image_dir = 'book_cover_images'
os.makedirs(image_dir, exist_ok=True)

# Function to download an image and save it locally
def download_image(url, save_path):
    try:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            with open(save_path, 'wb') as f:
                for chunk in response:
                    f.write(chunk)
        return save_path
    except Exception as e:
        print(f"Failed to download {url}: {e}")
        return None

# Download each image and save the local path in the DataFrame
train_df['Local Image Path'] = [
    download_image(url, os.path.join(image_dir, f"book_cover_{idx}.jpg"))
    for idx, url in tqdm(enumerate(train_df['Book Cover Image URL']))
]

val_df['Local Image Path'] = [
    download_image(url, os.path.join(image_dir, f"val_book_cover_{idx}.jpg"))
    for idx, url in tqdm(enumerate(val_df['Book Cover Image URL']))
]

test_df['Local Image Path'] = [
    download_image(url, os.path.join(image_dir, f"test_book_cover_{idx}.jpg"))
    for idx, url in tqdm(enumerate(test_df['Book Cover Image URL']))
]


1087it [00:54, 19.84it/s]
192it [00:10, 18.52it/s]
260it [00:10, 24.10it/s]


In [5]:
#print(test_df)

In [6]:
# Define image dimensions and batch size
img_height, img_width = 224, 224
batch_size = 32

# Data augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Only rescale validation and test data
val_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Load training data from the DataFrame
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='Local Image Path',  # Use local paths
    y_col='Genre',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Load validation data from the DataFrame
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='Local Image Path',
    y_col='Genre',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

# Load test data from the DataFrame
test_generator = val_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='Local Image Path',
    y_col='Genre',
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)


Found 1087 validated image filenames belonging to 13 classes.
Found 192 validated image filenames belonging to 13 classes.
Found 260 validated image filenames belonging to 13 classes.


In [7]:
# Load pre-trained ResNet50 without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = False  # Freeze ResNet50 layers


#numclasses manually:
num_classes = len(train_generator.class_indices)

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dense(512, activation='relu')(x)
#predictions = Dense(train_generator.num_classes, activation='softmax')(x)  # Output layer for genres
predictions = Dense(num_classes, activation='softmax')(x)
# Build the model
model = Model(inputs=base_model.input, outputs=predictions)


2024-11-18 15:59:21.497381: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


In [8]:
#compiling the model
model.compile(optimizer=RMSprop(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
#training model
epochs = 100  # Set the number of epochs
#100 epochs to ensure sufficient training

history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=val_generator
)

  self._warn_if_super_not_called()


Epoch 1/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 3s/step - accuracy: 0.0761 - loss: 3.2630 - val_accuracy: 0.1094 - val_loss: 2.5763
Epoch 2/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3s/step - accuracy: 0.0910 - loss: 2.5747 - val_accuracy: 0.1042 - val_loss: 2.5697
Epoch 3/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3s/step - accuracy: 0.0950 - loss: 2.5525 - val_accuracy: 0.1198 - val_loss: 2.5729
Epoch 4/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 3s/step - accuracy: 0.1018 - loss: 2.5694 - val_accuracy: 0.1042 - val_loss: 2.5561
Epoch 5/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3s/step - accuracy: 0.1116 - loss: 2.5389 - val_accuracy: 0.0990 - val_loss: 2.5594
Epoch 6/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 3s/step - accuracy: 0.1018 - loss: 2.5487 - val_accuracy: 0.0938 - val_loss: 2.5433
Epoch 7/100
[1m34/34[0m [

In [None]:
#evaluating and printing accuracy
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy:.2f}")

In [None]:
# accessing the accuracy and loss values, saved in variable 'history'
training_loss = history.history['loss']
training_accuracy = history.history['accuracy']
validation_loss = history.history['val_loss']
validation_accuracy = history.history['val_accuracy']

# print or save these values for analysis
for epoch in range(len(training_loss)):
    print(f"Epoch {epoch+1}: "
          f"Train Loss = {training_loss[epoch]:.4f}, Train Accuracy = {training_accuracy[epoch]:.4f}, "
          f"Validation Loss = {validation_loss[epoch]:.4f}, Validation Accuracy = {validation_accuracy[epoch]:.4f}")


In [None]:
# write data into csv file
filepath = 'training_CNN.csv'
with open(filepath, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Epoch', 'Train Loss', 'Train Accuracy', 'Validation Loss', 'Validation Accuracy'])
    for epoch in range(len(training_loss)):
        writer.writerow([epoch+1, training_loss[epoch], training_accuracy[epoch], 
                         validation_loss[epoch], validation_accuracy[epoch]])