In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19
from tensorflow.keras import layers, models
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
# Set your dataset and batch size
data_path = '/content/drive/MyDrive/data1/100x100_overlap_0percent'
train_data_file = data_path + '/' + 'train6k.txt'
val_data_file = data_path + "/" + "val6k.txt"
test_data_file = data_path + "/" + "test6k.txt"
batch_size = 128

In [None]:
# Read the data using pandas
train_df = pd.read_csv(train_data_file, delimiter=' ', header=None)
val_df = pd.read_csv(val_data_file, delimiter=' ', header=None)
test_df = pd.read_csv(test_data_file, delimiter=' ', header=None)

# Convert the labels to strings
train_df[1] = train_df[1].astype(str)
val_df[1] = val_df[1].astype(str)
test_df[1] = test_df[1].astype(str)

# Add the pre-path to the file paths
train_df[0] = data_path +'/'+ train_df[0]
val_df[0] = data_path +'/'+ val_df[0]
test_df[0] = data_path +'/'+ test_df[0]

# Split the data to use 10% for experiment
train_df, _ = train_test_split(train_df, test_size=0.90, random_state=42)
val_df, _ = train_test_split(val_df, test_size=0.90, random_state=42)
test_df, _ = train_test_split(test_df, test_size=0.90, random_state=42)

# Create data generators for training, validation, and test datasets
train_datagen = ImageDataGenerator(rescale=1.0/255)
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col=0,
    y_col=1,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

val_datagen = ImageDataGenerator(rescale=1.0/255)
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col=0,
    y_col=1,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

test_datagen = ImageDataGenerator(rescale=1.0/255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col=0,
    y_col=1,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

In [None]:
# Count the occurrences of each class in the train, test, and val datasets
train_class_counts = train_df[1].value_counts()
test_class_counts = test_df[1].value_counts()
val_class_counts = val_df[1].value_counts()

# Get unique class labels
unique_labels = train_df[1].unique()

# Create subplots for train, test, and val histograms
plt.figure(figsize=(15, 5))
plt.subplot(131)
plt.bar(unique_labels, train_class_counts)
plt.title('Train Dataset Class Distribution')
plt.xlabel('Class Label')
plt.ylabel('Count')

plt.subplot(132)
plt.bar(unique_labels, test_class_counts)
plt.title('Test Dataset Class Distribution')
plt.xlabel('Class Label')
plt.ylabel('Count')

plt.subplot(133)
plt.bar(unique_labels, val_class_counts)
plt.title('Validation Dataset Class Distribution')
plt.xlabel('Class Label')
plt.ylabel('Count')

plt.tight_layout()
plt.show()

In [None]:
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(7, activation='softmax')  # 7 output classes
])

In [None]:
# Show model
tf.keras.utils.plot_model(model, show_shapes=True)

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

epochs = 5
history = model.fit(train_generator,
                    validation_data=val_generator,
                    epochs=epochs)
model.save(data_path + '/' + 'savemodel.h5')

In [None]:
# Loss and Accuracy over Epochs
training_loss = history.history['loss']
training_accuracy = history.history['accuracy']
validation_loss = history.history['val_loss']
validation_accuracy = history.history['val_accuracy']

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(training_loss, label='Training Loss')
plt.plot(validation_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(training_accuracy, label='Training Accuracy')
plt.plot(validation_accuracy, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Test loss and Test accuracy
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Calculate the confusion matrix
test_generator.reset()

y_true = test_generator.classes
y_pred = model.predict(test_generator)

y_pred_classes = np.argmax(y_pred, axis=1)

conf_matrix = confusion_matrix(y_true, y_pred_classes)

disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix,
                              display_labels=test_generator.class_indices.keys())

plt.figure(figsize=(8, 8))
disp.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix')
plt.show()
