In [208]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import datetime
import sklearn.metrics

In [209]:
# load data_set, and calculate the mean and std of dataset

# transform = transforms.Compose([
#     transforms.Resize((256, 256)),  # 缩放到224 * 224
#     transforms.ToTensor()
# ])

# # 0 -> mask  1-> nonmask  2 -> not a person
# train_dataset = ImageFolder('./data/train', transform=transform)

# # calculate mean of imgs in each RGB channel
# imgs = torch.stack([img_t for img_t, _ in train_dataset], dim=3)
# print(imgs.shape)

# imgs.view(3, -1).mean(dim=1)  # mean of (number of imgs) in each channel
# imgs.view(3, -1).std(dim=1) # std of (number of imgs) in each channel

In [210]:
# load dataset, normarlize it
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 缩放到224 * 224
    # transforms.CenterCrop(256)   #中心剪裁后四周padding补充 (后续可以padding)
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4990, 0.4567, 0.4188], std=[0.2913, 0.2778, 0.2836]) 
])

# 0 -> mask  1-> nonmask  2 -> not a person
train_dataset = ImageFolder('./data/train', transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = ImageFolder('./data/test', transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [211]:
# cnn model

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), # in=3x256x256; out=32x256x256
            nn.ReLU(),
            nn.MaxPool2d(2), # out=64x128x128
            nn.Conv2d(32, 16, kernel_size=3, padding=1), # in=32x128x128, out=16x128x128
            nn.ReLU(),
            nn.MaxPool2d(2), # out=16x64x64
            nn.Conv2d(16, 8, kernel_size=3, padding=1), # in=16x64x64, out=8x64x64
            nn.ReLU(),
            nn.MaxPool2d(2) # out=8x32x32
        )
        
        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(8*32*32, 32*32),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(32*32, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, 128),
            nn.ReLU(),
            nn.Dropout(p=0.1),
            nn.Linear(128, 3)
        )
    
    def forward(self, x):
        # conv layer
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        # fc layer
        x = self.fc_layer(x)
        
        return x


In [212]:
device = (torch.device('cuda') if torch.cuda.is_available() 
         else torch.device('cpu'))
print(f"Training on device {device}.")

Training on device cuda.


In [213]:
# define train_loop function
def train_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_dataloader: # loop over batches in dataset
            # move data to GPU if available
            imgs = imgs.to(device=device)  
            labels = labels.to(device=device)
            
            outputs = model(imgs)  # feed a batch through our model
            
            loss = loss_fn(outputs, labels)  # computes the loss
            
            optimizer.zero_grad()  # getting rid of the gradients from the last round
            
            loss.backward()  # performs backward step, compute the gradients of all parameters
            
            optimizer.step()  # updates the model
            
            loss_train += loss.item() # sums of losses we saw over the epoch
            
        # print the average loss per batch, in epoch%10 == 0 
        print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch, loss_train/len(train_loader)
            ))
#         if epoch == 1 or epoch % 10 == 0:
#             print('{} Epoch {}, Training loss {}'.format(
#                 datetime.datetime.now(), epoch, loss_train/len(train_loader)
#             ))

In [214]:
model = CNN().to(device=device)  # instantiates cnn model

numel_list = [p.numel() for p in model.parameters()]
sum(numel_list), numel_list   # number of parameters, and their shapes

(9577499,
 [864,
  32,
  4608,
  16,
  1152,
  8,
  8388608,
  1024,
  1048576,
  1024,
  131072,
  128,
  384,
  3])

In [215]:
# perform training

learning_rate = 0.02
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()  # use cross entropy loss function

# call train_loop() function
train_loop(
    n_epochs = 50,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_dataloader
)

2021-03-22 21:30:11.931694 Epoch 1, Training loss 1.074864489691598
2021-03-22 21:30:26.257691 Epoch 2, Training loss 1.0409454703330994
2021-03-22 21:30:39.946695 Epoch 3, Training loss 1.0242044636181422
2021-03-22 21:30:53.323691 Epoch 4, Training loss 1.0109878046172005
2021-03-22 21:31:06.813691 Epoch 5, Training loss 0.9713035259928022
2021-03-22 21:31:20.031692 Epoch 6, Training loss 0.8964085817337036
2021-03-22 21:31:33.213692 Epoch 7, Training loss 0.8518054434231349
2021-03-22 21:31:46.465693 Epoch 8, Training loss 0.8142822163445609
2021-03-22 21:31:59.948693 Epoch 9, Training loss 0.793664596761976
2021-03-22 21:32:13.066691 Epoch 10, Training loss 0.7271166571549007
2021-03-22 21:32:26.780691 Epoch 11, Training loss 0.6971478249345507
2021-03-22 21:32:40.412691 Epoch 12, Training loss 0.6784423385347639
2021-03-22 21:32:54.736693 Epoch 13, Training loss 0.6349072916167123
2021-03-22 21:33:09.017693 Epoch 14, Training loss 0.6046038210391999
2021-03-22 21:33:22.652208 Epoc

In [216]:
# define validate function
def validate(model, train_loader, test_loader):
    model.eval()
    # accuracy on training data and test data
    for name, loader in [("train", train_loader), ("test", test_loader)]:
        correct = 0
        total = 0
        total_predicted = []
        total_labels = []
        
        with torch.no_grad(): # do not want gradients here, as we will not want to update parameters
            for imgs, labels in loader:
                # move data to GPU if available
                imgs = imgs.to(device=device)  
                labels = labels.to(device=device)
                total_labels.append(labels)
                
                outputs = model(imgs)  # feed input to models
                
                _, predicted = torch.max(outputs, dim=1)  # gives the index of the highest value as output
                total_predicted.append(predicted)
                
                total += labels.shape[0]  # counts the number of example, total is increased by the batch size
                
                # comparing the predicted class that had the maximum probability and the ground-truth labels,
                # we first get a Boolean array. Taking the sum gives the number of items in the batch where 
                # the prediction and ground truth agree
                correct += int((predicted == labels).sum()) 
                
        total_predicted = torch.hstack(total_predicted).cpu()
        total_labels = torch.hstack(total_labels).cpu()
        print("Accuracy {}: {:.2f}".format(name, correct / total))  
        print(sklearn.metrics.classification_report(total_labels, total_predicted))

In [217]:
# measuring accuracy
validate(model, train_dataloader, test_dataloader)

35
35
Accuracy train: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       270
           1       1.00      1.00      1.00       270
           2       1.00      1.00      1.00       576

    accuracy                           1.00      1116
   macro avg       1.00      1.00      1.00      1116
weighted avg       1.00      1.00      1.00      1116

4
4
Accuracy test: 0.75
              precision    recall  f1-score   support

           0       0.61      0.63      0.62        30
           1       1.00      0.67      0.80        30
           2       0.74      0.85      0.79        60

    accuracy                           0.75       120
   macro avg       0.78      0.72      0.74       120
weighted avg       0.77      0.75      0.75       120



In [192]:
############################# ignore below ##########################################################

# # !pip install skorch
import skorch
from skorch import NeuralNetClassifier
import sklearn.metrics

In [193]:
class SkorchDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.d = dataset

    def __getitem__(self, idx):
        return self.d[idx][0], self.d[idx][1]

    def __len__(self):
        return len(self.d)

In [196]:
nn_classifier = NeuralNetClassifier(
    module=CNN,
    max_epochs=5,
    criterion=nn.CrossEntropyLoss,
    lr=0.01,
    batch_size=32,
    optimizer=optim.SGD,
    device=torch.device('cuda'),
    train_split=None
)

# call fit(X, y) to train data
train_dataset = ImageFolder('./data/train', transform=transform)
train_dataset_skorch = SkorchDataset(train_dataset)

nn_classifier.fit(train_dataset_skorch, y=None);

  epoch    train_loss      dur
-------  ------------  -------
      1        [36m1.0775[0m  22.5910
      2        [36m1.0611[0m  22.1830
      3        [36m1.0497[0m  22.4300
      4        [36m1.0406[0m  23.2660
      5        [36m1.0350[0m  23.0540


In [197]:
# predict on training data
y_pred_train = nn_classifier.predict(train_dataset_skorch)
print(y_pred_train)
y_train = np.array([y for x, y in iter(train_dataset)])

# print classification report
print(sklearn.metrics.classification_report(y_train, y_pred_train));

[2 2 2 ... 2 2 2]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       270
           1       0.00      0.00      0.00       270
           2       0.52      1.00      0.68       576

    accuracy                           0.52      1116
   macro avg       0.17      0.33      0.23      1116
weighted avg       0.27      0.52      0.35      1116



In [195]:
# predict on test data
y_pred = nn_classifier.predict(SkorchDataset(test_dataset))
print(y_pred)
y_test = np.array([y for x, y in iter(test_dataset)])

# print classification report
print(sklearn.metrics.classification_report(y_test, y_pred));

[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        30
           1       0.00      0.00      0.00        30
           2       0.50      1.00      0.67        60

    accuracy                           0.50       120
   macro avg       0.17      0.33      0.22       120
weighted avg       0.25      0.50      0.33       120

