In [None]:
# AquaTerra Trash Detection with AI 🌊🏞
# Week 2 - Model Training & Testing

# --- Import Libraries ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical

# --- Load Preprocessed Data (From Week 1) ---
# Assume we have images preprocessed and stored in numpy arrays (X, y)

IMG_SIZE = 128
img_dir = "data/Images"
labels_csv = "data/annotations.csv"

labels = pd.read_csv(labels_csv)

X, y = [], []
for i, row in labels.iterrows():
    img_path = os.path.join(img_dir, row["filename"])
    if os.path.exists(img_path):
        img = cv2.imread(img_path)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        X.append(img)
        y.append(row["class"])

X = np.array(X) / 255.0
y = np.array(y)

# Encode labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42
)

print("Train:", X_train.shape, "Test:", X_test.shape)

# --- Build CNN Model ---
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),

    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(len(np.unique(y_encoded)), activation='softmax')
])

# Compile model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

# --- Train Model ---
history = model.fit(
    X_train, y_train,
    epochs=10,
    validation_data=(X_test, y_test),
    batch_size=32
)

# --- Evaluate Model ---
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")

# --- Plot Training Results ---
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.legend()
plt.title("Accuracy")

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.legend()
plt.title("Loss")

plt.show()

# --- Sample Prediction ---
sample = X_test[0].reshape(1, IMG_SIZE, IMG_SIZE, 3)
prediction = model.predict(sample)
pred_class = encoder.inverse_transform([np.argmax(prediction)])
print("Predicted Class:", pred_class[0])