In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True) #mounting my google drive

Mounted at /content/drive


In [None]:
!mkdir ~/.kaggle

In [None]:
!cp /content/drive/MyDrive/KAGGLE_API_CREDENTIALS/kaggle.json ~/.kaggle/kaggle.json

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d misrakahmed/vegetable-image-dataset

# link to dataset: https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset

In [None]:
!unzip '/content/vegetable-image-dataset.zip'

In [19]:
import torch
from torch import nn
from torch.optim import SGD, Adam
from torchvision import datasets, transforms, models
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import numpy as np



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [27]:
from torchvision.models import resnet50, ResNet50_Weights

weights = ResNet50_Weights.DEFAULT
model = resnet50(weights=weights).to(device)

In [None]:
preprocess = weights.transforms()

In [46]:
import os

data_dir = '/content/Vegetable Images'  # adjust to your path
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), transform=preprocess)
                  for x in ['train', 'validation', 'test']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=16, shuffle=True, num_workers=4)
               for x in ['train', 'validation', 'test']}

In [59]:
num_output_features = len(dataloaders['train'].dataset)

# Replace the last layer (classifier) output features with the number of classes of the dataset
model.fc = torch.nn.Linear(model.fc.in_features, num_output_features)
model.fc = model.fc.to(device)

for name, param in model.named_parameters():
    if not name.startswith('fc'):  # Freeze all layers except the last fully connected layer
        param.requires_grad = False
    else:
      param.requires_grad = True



In [62]:
def train_model(model, train_loader, loss_fn, optimizer, device):
    model.train()
    running_loss = 0.0
    preds_list = []
    labels_list = []

    for X, y in train_loader:
        X = X.to(device)
        y = y.to(device)

        # Forward pass
        outputs = model(X)

        loss = loss_fn(outputs, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Get predictions
        _, preds = torch.max(outputs, 1)

        # Save predictions and true labels
        preds_list.append(preds)
        labels_list.append(y)

        running_loss += loss.item() * X.size(0)

    # Concatenate all the tensors into a single tensor
    all_preds = torch.cat(preds_list).to(device)
    all_labels = torch.cat(labels_list).to(device)



    # Move tensors to CPU and convert to numpy arrays
    all_preds_np = all_preds.detach().cpu().numpy()
    all_labels_np = all_labels.cpu().numpy()

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = np.sum(all_preds_np == all_labels_np) / len(all_labels_np)

    return epoch_loss, epoch_acc


In [63]:
def validate_and_test_model(model, data_loader, loss_fn, device):
    model.eval()
    running_loss = 0.0
    preds_list = []
    labels_list = []

    with torch.inference_mode(): #switch off gradient computation
      for X, y in data_loader:
          X = X.to(device)
          y = y.to(device)

          # Forward pass
          outputs = model(X)
          loss = loss_fn(outputs, y)

          # Get predictions
          _, preds = torch.max(outputs, 1)

          # Save predictions and true labels
          preds_list.append(preds)
          labels_list.append(y)

          running_loss += loss.item() * X.size(0)

      # Concatenate all the tensors into a single tensor
      all_preds = torch.cat(preds_list).to(device)
      all_labels = torch.cat(labels_list).to(device)

    # Move tensors to CPU and convert to numpy arrays
    all_preds_np = all_preds.detach().cpu().numpy()
    all_labels_np = all_labels.cpu().numpy()

    epoch_loss = running_loss / len(data_loader.dataset)
    epoch_acc = np.sum(all_preds_np == all_labels_np) / len(all_labels_np)

    return epoch_loss, epoch_acc


In [66]:
def train_and_validate(model, train_loader, data_loader, loss_fn, optimizer, epochs, device, model_save_path):
    best_acc = 0.0
    for epoch in range(epochs):
        train_loss, train_acc = train_model(model, train_loader, loss_fn, optimizer, device)
        print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')

        if epoch % 5 == 0:
          validate_loss, validate_acc= validate_and_test_model(model, data_loader, loss_fn, device)
          print(f'Epoch {epoch+1}/{epochs},test Loss: {validate_loss:.4f}, validate Acc: {validate_acc:.4f}')


        # Save the model weights if this epoch gives us the highest validation accuracy
        if validate_acc > best_acc:
            best_acc = validate_acc
            torch.save(model.state_dict(), model_save_path)

    return best_acc

In [None]:
epochs = 50
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(params=model.parameters(), lr=0.003, weight_decay=0.007)

# Perform training and validation
train_and_validate(model, dataloaders['train'], dataloaders['validation'], loss_fn, optimizer, epochs, device, 'VEGATABLE_IMAGE_DETECTION_MODEL_weights.pth')

In [None]:
model.load_state_dict(torch.load('/content/VEGATABLE_IMAGE_DETECTION_MODEL_weights.pth'))

In [None]:
#testing stage

data_loader = dataloaders['test']
for epoch in range(epochs):
  test_loss, test_acc= validate_and_test_model(model, data_loader, loss_fn, device)
  print(f'Epoch {epoch+1}/{epochs},test Loss: {test_loss:.4f}, test Acc: {test_acc:.4f}')