### Question 1 - Train a deep learning model which would classify the vegetables based on the images provided. The dataset can be accessed from the given link.

Link-
https://www.kaggle.com/datasets/misrakahmed/vegetable-image-dataset

Note -
1. Use PyTorch as the framework for training model
2. Use Distributed Parallel Training technique to optimize training time.
3. Achieve an accuracy of at least 85% on the validation dataset.
4. Use albumentations library for image transformation
5. Use TensorBoard logging for visualizing training performance
6. Use custom modular Python scripts to train model
7. Only Jupyter notebooks will not be allowed
8. Write code comments wherever needed for understanding 

In [2]:
pip install torch torchvision albumentations

Collecting torchvision
  Downloading torchvision-0.15.2-cp310-cp310-win_amd64.whl (1.2 MB)
                                              0.0/1.2 MB ? eta -:--:--
     ----                                     0.1/1.2 MB 3.5 MB/s eta 0:00:01
     ----------                               0.3/1.2 MB 3.8 MB/s eta 0:00:01
     ----------------                         0.5/1.2 MB 3.8 MB/s eta 0:00:01
     ---------------------                    0.6/1.2 MB 3.7 MB/s eta 0:00:01
     ---------------------------              0.8/1.2 MB 3.6 MB/s eta 0:00:01
     -------------------------------          1.0/1.2 MB 3.5 MB/s eta 0:00:01
     ---------------------------------------  1.2/1.2 MB 4.0 MB/s eta 0:00:01
     ---------------------------------------  1.2/1.2 MB 4.0 MB/s eta 0:00:01
     ---------------------------------------  1.2/1.2 MB 4.0 MB/s eta 0:00:01
     ---------------------------------------- 1.2/1.2 MB 2.7 MB/s eta 0:00:00
Collecting albumentations
  Downloading albumentations-1.3

Error processing line 1 of C:\Users\user\anaconda3\Anaconda3\lib\site-packages\vision-1.0.0-py3.10-nspkg.pth:

  Traceback (most recent call last):
    File "C:\Users\user\anaconda3\Anaconda3\lib\site.py", line 186, in addpackage
      exec(line)
    File "<string>", line 1, in <module>
    File "<frozen importlib._bootstrap>", line 568, in module_from_spec
  AttributeError: 'NoneType' object has no attribute 'loader'

Remainder of file ignored


In [2]:
import os
from torchvision.datasets import ImageFolder

# Path to the downloaded and extracted dataset folder
dataset_folder = r"C:\Users\user\Downloads\archive (8).zip"

In [None]:

# Path to the train and validation folders within the dataset folder
train_folder = os.path.join(dataset_folder, "train")
val_folder = os.path.join(dataset_folder, "val")

# Define the transforms for data augmentation using Albumentations library
# Example transforms: random crop, horizontal flip
import albumentations as A
from torchvision.transforms import ToTensor

# Define the Albumentations transformations
transform = A.Compose([
    A.RandomCrop(height=224, width=224),
    A.HorizontalFlip(),
    ToTensor()
])

# Load the train and validation datasets using ImageFolder
train_dataset = ImageFolder(train_folder, transform=transform)
val_dataset = ImageFolder(val_folder, transform=transform)


In [None]:
import torch
import torch.nn as nn

class VegetableClassifier(nn.Module):
    def __init__(self, num_classes):
        super(VegetableClassifier, self).__init__()
        
        # Define the backbone architecture
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        # Define the fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Linear(64 * 28 * 28, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )
    
    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.utils.data import DataLoader

# Define the number of GPUs available for training
num_gpus = torch.cuda.device_count()

# Initialize the VegetableClassifier model
model = VegetableClassifier(num_classes=10)

# Check if multiple GPUs are available
if num_gpus > 1:
    # Spawn multiple processes for distributed training
    mp.spawn(train_fn, nprocs=num_gpus, args=(model,))
else:
    # Single GPU training
    train_fn(0, model)

# Define the training function to be executed on each GPU
def train_fn(rank, model):
    # Initialize the distributed backend
    dist.init_process_group(backend='nccl', init_method='env://')

    # Set the device based on the current process rank
    torch.cuda.set_device(rank)

    # Create a distributed model
    model = model.to(rank)
    model = nn.parallel.DistributedDataParallel(model, device_ids=[rank])

    # Define the optimizer and criterion
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()

    # Start the training loop
    for epoch in range(num_epochs):
        # Train the model
        model.train()
        for images, labels in train_loader:
            images = images.to(rank)
            labels = labels.to(rank)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Perform validation
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in val_loader:
                images = images.to(rank)
                labels = labels.to(rank)

                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            accuracy = 100 * correct / total

            # Print and log the accuracy
            print(f"Epoch [{epoch+1}/{num_epochs}], Accuracy: {accuracy:.2f}%")

    # Clean up the distributed training environment
    dist.destroy_process_group()


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

# Initialize the VegetableClassifier model
model = VegetableClassifier(num_classes=10)

# Define the optimizer and criterion
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Set the device for training
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define the number of training epochs
num_epochs = 10

# Create a TensorBoard writer for logging
writer = SummaryWriter(log_dir="logs")

# Start the training loop
for epoch in range(num_epochs):
    # Training phase
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss /= len(train_loader)
    train_accuracy = 100.0 * correct / total

    # Validation phase
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100.0 * correct / total

    # Print and log the training performance
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%, "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")

    # Log the training performance to TensorBoard
    writer.add_scalar("Loss/train", train_loss, epoch)
    writer.add_scalar("Accuracy/train", train_accuracy, epoch)
    writer.add_scalar("Loss/validation", val_loss, epoch)
    writer.add_scalar("Accuracy/validation", val_accuracy, epoch)

# Close the TensorBoard writer
writer.close()

In [None]:
import torch

# Save the trained model
torch.save(model.state_dict(), "vegetable_classifier.pt")

# Load the saved model for inference
loaded_model = VegetableClassifier(num_classes=10)
loaded_model.load_state_dict(torch.load("vegetable_classifier.pt"))
loaded_model.eval()

# Perform inference on a sample image
sample_image = ...  # Load or preprocess the sample image
input_tensor = torch.unsqueeze(sample_image, 0)  # Add batch dimension
output = loaded_model(input_tensor)
predicted_class = torch.argmax(output, dim=1)

# Convert predicted class to label
label = class_labels[predicted_class.item()]
print(f"Predicted label: {label}")

In [None]:
import boto3
import base64
import json

# Initialize AWS clients
lambda_client = boto3.client('lambda')
s3_client = boto3.client('s3')

# Define the AWS Lambda function name and input payload
function_name = 'vegetable_classifier_lambda'
payload = {
    'image': 'base64_encoded_image'
}

# Convert the image to base64
with open('sample_image.jpg', 'rb') as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
payload['image'] = encoded_image

# Invoke the AWS Lambda function for inference
response = lambda_client.invoke(
    FunctionName=function_name,
    InvocationType='RequestResponse',
    Payload=json.dumps(payload)
)

# Process the inference response
if response['StatusCode'] == 200:
    inference_result = json.loads(response['Payload'].read())
    predicted_class = inference_result['predicted_class']
    confidence = inference_result['confidence']
    print(f"Predicted class: {predicted_class}, Confidence: {confidence}")
else:
    print("Error occurred during inference")


### Question 2 - From Question 1, you would get a trained model which would classify the vegetables based on the classes. You need to convert the trained model to ONNX format and achieve faster inference
Note -
1. There is no set inference time, but try to achieve as low an inference time as
possible
2. Create a web app to interact with the model, where the user can upload the
image and get predictions
3. Try to reduce the model size considerably so that inference time can be faster
4. Use modular Python scripts to train and infer the model
5. Only Jupyter notebooks will not be allowed
6. Write code comments whenever needed for understanding

In [None]:
import torch
import torchvision.models as models
import torch.onnx as onnx

# Step 1: Train the Model (example code)
model = models.resnet18(pretrained=True)


# Step 2: Convert the Model to ONNX Format
# Load the trained model
model.load_state_dict(torch.load('model.pth'))
model.eval()

# Example input tensor
example_input = torch.randn(1, 3, 224, 224)

# Convert the model to ONNX format
onnx_path = 'model.onnx'
torch.onnx.export(model, example_input, onnx_path, export_params=True)

print(f"Model converted and saved as '{onnx_path}'")


In [None]:
import onnxruntime
import numpy as np
from PIL import Image
import torchvision.transforms as transforms

# Load the image and apply necessary transformations
image_path = 'test_image.jpg'
image = Image.open(image_path)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_data = preprocess(image)
input_data = input_data.unsqueeze(0)  # Add batch dimension

# Load the ONNX model
onnx_model = onnxruntime.InferenceSession('model.onnx')

# Run the inference
input_name = onnx_model.get_inputs()[0].name
output_name = onnx_model.get_outputs()[0].name
output = onnx_model.run([output_name], {input_name: input_data.numpy()})

# Get the predicted class
predicted_class = np.argmax(output[0])

print(f"Predicted class: {predicted_class}")


In [None]:
from flask import Flask, render_template, request
import onnxruntime
import numpy as np
from PIL import Image
import torchvision.transforms as transforms

app = Flask(__name__)

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    image = request.files['image']
    image_path = 'uploaded_image.jpg'
    image.save(image_path)

    # Load and preprocess the image
    image = Image.open(image_path)
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_data = preprocess(image)
    input_data = input_data.unsqueeze(0)  # Add batch dimension

    # Load the ONNX model
    onnx_model = onnxruntime.InferenceSession('model.onnx')

    # Run the inference
    input_name = onnx_model.get_inputs()[0].name
    output_name = onnx_model.get_outputs()[0].name
    output = onnx_model.run([output_name], {input_name: input_data.numpy()})

    # Get the predicted class
    predicted_class = np.argmax(output[0])

    return render_template('result.html', predicted_class=predicted_class)

if __name__ == '__main__':
    app.run()
