#### Key Steps in a Machine Learning Task

1. **Data Preprocessing**
   - Standardize data formats
   - Remove outliers and invalid data
   - Apply necessary data transformations
   - Split the dataset into training, validation, and test sets
     - Tools: `train_test_split` and `KFold` from scikit-learn

2. **Model Selection**
   - Choose an appropriate model for the task
   - Define the loss function and optimization method
   - Set relevant hyperparameters
   - Many machine learning libraries (e.g., scikit-learn) provide built-in loss functions and optimizers

3. **Model Training and Evaluation**
   - Fit the model to the training data
   - Evaluate model performance on the validation and/or test set


#### Extra Work for Deep Learning
- Data Loaders for batch training
- Models layer by layer, often using custom or specialized layers
- Ensure loss functions and optimizers support backpropagation for user-defined architectures

In [None]:
import os 
import numpy as np 
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optimizer

In [None]:
# Hyperparameters

batch_size = 16   # mach batch contains 16 samples
lr = 1e-4         # learning rate
max_epochs = 100
"""Hyperparameters can also be stored in yaml, json, dict or other files"""

In [None]:
# GPU calling

os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'  # PyTorch can only access to GPU 0 and 1
device = torch.device(                       # Prioritize using GPU 1
                    'cuda:1' \               
                    if torch.cuda.is_available() \
                    else 'cpu'
)

In [None]:
# Load data
from torchvision import datasets
train_data = datasets.ImageFolder(
    train_path,
    transform=data_transform)   # data_transform enables image cropping, flipping, etc.
val_data = datasets.ImageFolder(val_path, transform=data_transform)

# Customize Dataset
import os
import pandas as pd
from torchvision.io import read_image

class MyDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        """
        Args:
            annotations_file (string): Path to the csv file with annotations.
            img_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
            target_transform (callable, optional): Optional transform to be applied
                on the target.
        """
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):             # return the number of samples in the dataset
        return len(self.img_labels)

    def __getitem__(self, idx):    # allows the dataset object to be indexed like a list (e.g., dataset[idx]). It returns a single sample and its label.
        """
        Args:
            idx (int): Index
        """
        img_path = os.path.join(           # constructs the full file path to the image by joining the image directory and the image filename
            self.img_dir,                  # which is retrieved from the first column of the img_labels DataFrame at row idx
            self.img_labels.iloc[idx, 0]
            )   (self.img_dir) and the image filename 
        image = read_image(img_path)           
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [None]:
# Set up DataLoader
from torch.utils.data import DataLoader

train_loader = torch.utils.data.DataLoader(
    train_data,
    batch_size=batch_size,
    num_workers=4,  # 4 or 8 for Linux, 0 for Windows
    shuffle=True,   # normally True for training, False for validation
    drop_last=True  # forego the last sample if its size is less than batch_size
    )

val_loader = torch.utils.data.DataLoader(
    val_data,
    batch_size=batch_size,
    num_workers=4,
    shuffle=False
    )

### Construct Neural Network

In [None]:
from torch import nn

class MLP(nn.Module):   # MLP for "Multi-Layer Perceptron"
    # claim the layers with model parameters, here are two fully connected layers
    def __init__(self, **kwargs):
        # call the constructor of the parent class MLP to perform necessary initialization. This allows additional functions to be specified when constructing an instance
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Linear(784, 256)   # Linear means fully connected layer; 784 features from the input image, 256 features for the hidden layer
        self.act = nn.ReLU()
        self.output = nn.Linear(256,10)     # 256 features for the output layer, 10 features for the output

    def forward(self, x):
        o = self.act(self.hidden(x))
        return self.output(o)   
    
    '''
    No need to define the back propagation function, because
    PyTorch will automatically compute the gradients for the parameters during the forward pass.
    '''

In [None]:
X = torch.rand(2,784) # 
net = MLP()   # instantiate the model, i.e. create a new MLP object
print(net)    # get the model architecture
net(X)        # execute the forward feed