In [5]:
import os
import pandas as pd

# Dictionary mapping folder names (relative or absolute) to categories
folder_paths = {
    'cane/': 'dog',
    'cavallo/': 'horse',
    'elefante/': 'elephant',
    'farfalla/': 'butterfly',
    'gallina/': 'chicken',
    'gatto/': 'cat',
    'mucca/': 'cow',
    'pecora/': 'sheep',
    'scoiattolo/': 'squirrel',
    'ragno/': 'spider'
}

# List to store file details
file_data = []

# Loop through each folder and its corresponding category
for folder_name, category in folder_paths.items():
    if os.path.exists(folder_name):  # Check if the directory exists
        for filename in os.listdir(folder_name):
            if filename.endswith(".jpeg"):  # Modify extension if needed
                file_path = os.path.join(folder_name, filename)
                print("Processing file:", file_path)
                # Append data to the list
                file_data.append({'file_name': file_path, 'category': category})
    else:
        print(f"Directory not found: {folder_name}")

# Convert the list of dictionaries to a DataFrame
df_data = pd.DataFrame(file_data)

# Save the DataFrame to a CSV file
output_csv = "file_categories.csv"
df_data.to_csv(output_csv, index=False)
print(f"Data saved to {output_csv}")


Processing file: cane/OIF-e2bexWrojgtQnAPPcUfOWQ.jpeg
Processing file: cane/OIP---A27bIBcUgX1qkbpZOPswHaFS.jpeg
Processing file: cane/OIP---cByAiEbIxIAleGo9AqOQAAAA.jpeg
Processing file: cane/OIP---ZIdwfUcJeVxnh47zppcQHaFj.jpeg
Processing file: cane/OIP---ZRsOF7zsMqhW30WeF8-AHaFj.jpeg
Processing file: cane/OIP---_cJbI6Ei26w5bW1urHewHaCf.jpeg
Processing file: cane/OIP--0C2kq6I68beB3DduLoxMgHaEf.jpeg
Processing file: cane/OIP--0CZnfLuBERVtzRm2QbB3gHaK4.jpeg
Processing file: cane/OIP--1QXriWyOTJg-9fEwbznmgHaI4.jpeg
Processing file: cane/OIP--2uTtzjaszBfEMCROuLN3wHaLH.jpeg
Processing file: cane/OIP--2z_zAuTMzgYM_KynUl9CQHaE7.jpeg
Processing file: cane/OIP--565fkAJy3DoFs81C9uE1QHaEY.jpeg
Processing file: cane/OIP--6mVvk2SFEi7TxqtyMbdvgHaEK.jpeg
Processing file: cane/OIP--6UY_QQC69uZF4CrwEcfUQHaHa.jpeg
Processing file: cane/OIP--7xBAVJkrjDLNCknR2fuKgHaHa.jpeg
Processing file: cane/OIP--8oCad9Nr69Ol4YEkvXJFQHaNK.jpeg
Processing file: cane/OIP--90_fw5Y28UC44KVSPuxdAHaJ3.jpeg
Processing file: c

In [2]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
# Load the dataset from CSV
data = pd.read_csv('file_categories.csv')  # Make sure to update the path to your CSV file

In [4]:
# Split the dataset into train and validation sets
train_df, valid_df = train_test_split(data, test_size=0.2, random_state=42, stratify=data['category'])

# Define data generators for training and validation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

valid_datagen = ImageDataGenerator(rescale=1./255)

# Create generators to read images from dataframe
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='file_name',  # Column in dataframe that contains the filenames
    y_col='category',  # Column in dataframe that contains the labels
    target_size=(128, 128),  # Resize images to 128x128
    batch_size=32,
    class_mode='sparse'  # Use 'sparse' for integer type class encoding
)

valid_generator = valid_datagen.flow_from_dataframe(
    dataframe=valid_df,
    x_col='file_name',
    y_col='category',
    target_size=(128, 128),
    batch_size=32,
    class_mode='sparse'
)

Found 19367 validated image filenames belonging to 10 classes.
Found 4842 validated image filenames belonging to 10 classes.


In [8]:
# Build the model
model = tf.keras.Sequential([
    # First convolutional block
    tf.keras.layers.Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),  # Dropout for regularization

    # Second convolutional block
    tf.keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),

    # Third convolutional block
    tf.keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),

    # Fourth convolutional block
    tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),

    # Fifth convolutional block
    tf.keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Dropout(0.25),

    # Flattening the layers
    tf.keras.layers.Flatten(),

    # Dense layers
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.5),  # Increased dropout for the fully connected layer
    tf.keras.layers.Dense(10, activation='softmax')  # 10 output nodes for 10 categories
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.n // train_generator.batch_size,
    validation_data=valid_generator,
    validation_steps=valid_generator.n // valid_generator.batch_size,
    epochs=20
)

# Save the model
model.save('enhanced_animal_classifier_model.h5')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 154ms/step - accuracy: 0.2271 - loss: 2.9165 - val_accuracy: 0.3235 - val_loss: 2.0082
Epoch 2/20
[1m  1/605[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:22[0m 136ms/step - accuracy: 0.1562 - loss: 2.1128



[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.1562 - loss: 2.1128 - val_accuracy: 0.3158 - val_loss: 2.0504
Epoch 3/20
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 153ms/step - accuracy: 0.3414 - loss: 1.8789 - val_accuracy: 0.4396 - val_loss: 1.5899
Epoch 4/20
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.2812 - loss: 1.8985 - val_accuracy: 0.4416 - val_loss: 1.5822
Epoch 5/20
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 153ms/step - accuracy: 0.4066 - loss: 1.7175 - val_accuracy: 0.5168 - val_loss: 1.4163
Epoch 6/20
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.5312 - loss: 1.4074 - val_accuracy: 0.5230 - val_loss: 1.3992
Epoch 7/20
[1m605/605[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 153ms/step - accuracy: 0.4455 - loss: 1.6078 - val_accuracy: 0.5072 - val_loss: 1.3828
Epoch 8/20
[1m605/605[0m [3



In [None]:
# Optionally, display training and validation progress
import matplotlib.pyplot as plt

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(10)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
