# NS_03 Group Project

## Part1

In [37]:
import torch
import os
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import datetime
import sklearn.metrics

In [38]:
# load data_set, and calculate the mean and std of dataset

# transform = transforms.Compose([
#     transforms.Resize((256, 256)),  # 缩放到224 * 224
#     transforms.ToTensor()
# ])

# # 0 -> mask  1-> nonmask  2 -> not a person
# train_dataset = ImageFolder('./data/train', transform=transform)

# # calculate mean of imgs in each RGB channel
# imgs = torch.stack([img_t for img_t, _ in train_dataset], dim=3)
# print(imgs.shape)

# imgs.view(3, -1).mean(dim=1)  # mean of (number of imgs) in each channel
# imgs.view(3, -1).std(dim=1) # std of (number of imgs) in each channel

In [39]:
# load dataset, normarlize it
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 缩放到224 * 224
    # transforms.CenterCrop(256)   #中心剪裁后四周padding补充 (后续可以padding)
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4990, 0.4567, 0.4188], std=[0.2913, 0.2778, 0.2836]) 
])

# 0 -> mask  1-> nonmask  2 -> not a person
train_dataset = ImageFolder('./data/train', transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = ImageFolder('./data/test', transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [40]:
# cnn model

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1), # in=3x256x256; out=32x256x256
            nn.ReLU(),
            nn.MaxPool2d(2), # out=32x128x128
            nn.Conv2d(16, 16, kernel_size=3, padding=1), # in=32x128x128, out=16x128x128
            nn.ReLU(),
            nn.MaxPool2d(2), # out=16x64x64
            nn.Conv2d(16, 32, kernel_size=3, padding=1), # in=16x64x64, out=8x64x64
            nn.ReLU(),
            nn.MaxPool2d(2), # out=8x32x32,
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2) # 16x16
        )
        
        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(64*16*16, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, 32),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(32, 3)
        )
    
    def forward(self, x):
        # conv layer
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        # fc layer
        x = self.fc_layer(x)
        
        return x


In [41]:
device = (torch.device('cuda') if torch.cuda.is_available() 
         else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cpu.


In [42]:
# define train_loop function
def train_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_dataloader: # loop over batches in dataset
            # move data to GPU if available
            imgs = imgs.to(device=device)  
            labels = labels.to(device=device)
            
            outputs = model(imgs)  # feed a batch through our model
            
            loss = loss_fn(outputs, labels)  # computes the loss
            
            optimizer.zero_grad()  # getting rid of the gradients from the last round
            
            loss.backward()  # performs backward step, compute the gradients of all parameters
            
            optimizer.step()  # updates the model
            
            loss_train += loss.item() # sums of losses we saw over the epoch
            
        # print the average loss per batch, in epoch%10 == 0 
        print('{} Epoch {}, Training loss {}'.format(
            datetime.datetime.now(), epoch, loss_train/len(train_loader)
        ))

In [43]:
model = CNN().to(device=device)  # instantiates cnn model

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list   # number of parameters, and their shapes

(16837043,
 [432, 16, 2304, 16, 4608, 32, 18432, 64, 16777216, 1024, 32768, 32, 96, 3])

In [44]:
# perform training

learning_rate = 0.001
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()  # use cross entropy loss function

if 'my_model_2.pkl' not in os.listdir():
    # call train_loop() function
    train_loop(
        n_epochs = 15,
        optimizer = optimizer,
        model = model,
        loss_fn = loss_fn,
        train_loader = train_dataloader
    )
    torch.save(model, 'my_model_2.pkl')
else:
    model = torch.load('my_model_2.pkl')

In [45]:
# define validate function
def validate(model, loaders, names):
    model.eval()
    # accuracy on training data and test data
    for name, loader in zip(names, loaders):
        correct = 0
        total = 0
        total_predicted = []
        total_labels = []
        
        with torch.no_grad(): # do not want gradients here, as we will not want to update parameters
            for imgs, labels in loader:
                # move data to GPU if available
                imgs = imgs.to(device=device)  
                labels = labels.to(device=device)
                total_labels.append(labels)
                
                # feed input to models
                outputs = model(imgs)  
                
                # gives the index of the highest value as output
                _, predicted = torch.max(outputs, dim=1)  
                total_predicted.append(predicted)
                
                # counts the number of example, total is increased by the batch size
                total += labels.shape[0]  
            
                # the prediction and ground truth agree
                correct += int((predicted == labels).sum()) 
                
        total_predicted = torch.hstack(total_predicted).cpu()
        total_labels = torch.hstack(total_labels).cpu()
        
        print("Accuracy {}: {:.2f}".format(name, correct / total))  
        print(sklearn.metrics.classification_report(total_labels, total_predicted))
        print(sklearn.metrics.confusion_matrix(total_labels, total_predicted))

In [46]:
# measuring accuracy
validate(model, [train_dataloader, test_dataloader], ['train', 'test'])

Accuracy train: 1.00
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       284
           1       0.99      1.00      1.00       338
           2       1.00      1.00      1.00       576

    accuracy                           1.00      1198
   macro avg       1.00      1.00      1.00      1198
weighted avg       1.00      1.00      1.00      1198

[[281   1   2]
 [  0 338   0]
 [  0   2 574]]
Accuracy test: 0.84
              precision    recall  f1-score   support

           0       0.93      0.69      0.79       123
           1       0.88      0.94      0.91       124
           2       0.74      0.91      0.82       102

    accuracy                           0.84       349
   macro avg       0.85      0.85      0.84       349
weighted avg       0.86      0.84      0.84       349

[[ 85  11  27]
 [  2 116   6]
 [  4   5  93]]


## Part 2

- Run model on Female test dataset and Male dataset, evaluate their performances.
- Compare their performances, check whether our model has gender bias.

In [47]:
device = (torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))

male_dataset = ImageFolder('./data/test_male', transform=transform)
male_dataloader = DataLoader(male_dataset, batch_size=32, shuffle=True)


female_dataset = ImageFolder('./data/test_female', transform=transform)
female_dataloader = DataLoader(female_dataset, batch_size=32, shuffle=True)

validate(model, [male_dataloader, female_dataloader], ['Male', 'Female'])


Accuracy Male: 0.81
              precision    recall  f1-score   support

           0       1.00      0.62      0.77        93
           1       0.84      0.98      0.91       105
           2       0.00      0.00      0.00         0

    accuracy                           0.81       198
   macro avg       0.61      0.53      0.56       198
weighted avg       0.92      0.81      0.84       198

[[ 58  19  16]
 [  0 103   2]
 [  0   0   0]]
Accuracy Female: 0.84
              precision    recall  f1-score   support

           0       0.97      0.73      0.83        91
           1       0.94      0.94      0.94       109
           2       0.00      0.00      0.00         0

    accuracy                           0.84       200
   macro avg       0.64      0.56      0.59       200
weighted avg       0.96      0.84      0.89       200

[[ 66   6  19]
 [  2 103   4]
 [  0   0   0]]


Accuracy of female is 0.84, while accuracy of male is 0.81. Our model has a bias of predicting male with mask as not wearing mask incorrectly.

The reason for this gender bias is that our training dataset is not balance in terms of gender. For example, the training images in Mask folder contains 164 female images, and only 120 male images.

To address this bias, we modified our training dataset, and balanced the number of male images and female images. And then re-training our cnn model on this balanced dataset.

In [49]:
# load balanced dataset
train_balance_dataset = ImageFolder('./data/train_balance', transform=transform)
train_balance_dataloader = DataLoader(train_balance_dataset, batch_size=32, shuffle=True)

# perform training
model_retrained = CNN().to(device=device) 
learning_rate = 0.001
optimizer = optim.Adam(model_retrained.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()  # use cross entropy loss function

if 'my_model_balance.pkl' not in os.listdir():
    # call train_loop() function
    train_loop(
        n_epochs = 15,
        optimizer = optimizer,
        model = model_retrained,
        loss_fn = loss_fn,
        train_loader = train_balance_dataloader
    )
    torch.save(model_retrained, 'my_model_balance.pkl')
else:
    model_retrained = torch.load('my_model_balance.pkl')

In [50]:
# evaluation on training and test dataset
validate(model_retrained, [train_balance_dataloader, test_dataloader], ['train', 'test'])

Accuracy train: 0.99
              precision    recall  f1-score   support

           0       0.99      0.97      0.98       327
           1       0.99      0.99      0.99       338
           2       0.99      1.00      0.99       576

    accuracy                           0.99      1241
   macro avg       0.99      0.99      0.99      1241
weighted avg       0.99      0.99      0.99      1241

[[316   3   8]
 [  2 336   0]
 [  1   1 574]]
Accuracy test: 0.89
              precision    recall  f1-score   support

           0       0.92      0.84      0.88       123
           1       0.93      0.91      0.92       124
           2       0.81      0.91      0.86       102

    accuracy                           0.89       349
   macro avg       0.88      0.89      0.88       349
weighted avg       0.89      0.89      0.89       349

[[103   6  14]
 [  3 113   8]
 [  6   3  93]]


In [51]:
# evaluation on Female and Male test dataset
validate(model_retrained, [male_dataloader, female_dataloader], ['Male', 'Female'])

Accuracy Male: 0.85
              precision    recall  f1-score   support

           0       0.99      0.75      0.85        93
           1       0.92      0.93      0.93       105
           2       0.00      0.00      0.00         0

    accuracy                           0.85       198
   macro avg       0.64      0.56      0.59       198
weighted avg       0.95      0.85      0.89       198

[[70  8 15]
 [ 1 98  6]
 [ 0  0  0]]
Accuracy Female: 0.91
              precision    recall  f1-score   support

           0       0.97      0.86      0.91        91
           1       0.96      0.95      0.96       109
           2       0.00      0.00      0.00         0

    accuracy                           0.91       200
   macro avg       0.65      0.60      0.62       200
weighted avg       0.97      0.91      0.94       200

[[ 78   4   9]
 [  2 104   3]
 [  0   0   0]]
