In [94]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import datetime

In [95]:
# read img files into a list
# data_dir = './test_data/Mask/'
# # os.listdir(data_dir)

# filenames = [name for name in os.listdir(data_dir)
#             if os.path.splitext(name)[-1] == '.jpg']

# img_arr = imageio.imread(os.path.join(data_dir, filenames[0]))
# img_arr.shape

# batch_size = 5 # number of imgs in a batch
# batch = torch.zeros(batch_size, 3, 256, 256)

# for i, filename in enumerate(filenames):
#     img_arr = imageio.imread(os.path.join(data_dir, filename))
#     imt_t = torch.from_numpy(img_arr)
#     img_t = img_t.permute(2, 0, 1)
#     img_t = img_t[:3]
    

In [96]:
# load data_set

transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 缩放到224 * 224
    # transforms.CenterCrop(256)   #中心剪裁后四周padding补充 (后续可以padding)
     transforms.ToTensor()
#     transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])  # 均值为0 方差为1 的正态分布
])

# 0 -> mask  1-> nonmask  2 -> not a person
train_dataset = ImageFolder('./sample_data/train_data', transform=transform)



# for img, label in train_dataset:
#     if (label == 0):
#         print(img.shape)

# # train 每四个为一组
# train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

# # 显示图片
# to_pil_image = transforms.ToPILImage()
# for image, label in train_dataloader:
#     img = to_pil_image(image[0])
#     img.show()

In [97]:
# calculate mean of imgs in each RGB channel
imgs = torch.stack([img_t for img_t, _ in train_dataset], dim=3)
print(imgs.shape)

imgs.view(3, -1).mean(dim=1)  # mean of (number of imgs) in each channel

torch.Size([3, 256, 256, 17])


tensor([0.4793, 0.4304, 0.3820])

In [98]:
imgs.view(3, -1).std(dim=1) # std of (number of imgs) in each channel

tensor([0.2940, 0.2931, 0.2957])

In [99]:
# load dataset, normarlize it
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 缩放到224 * 224
    # transforms.CenterCrop(256)   #中心剪裁后四周padding补充 (后续可以padding)
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4793, 0.4304, 0.3820], std=[0.2940, 0.2931, 0.2957]) 
])

# 0 -> mask  1-> nonmask  2 -> not a person
train_dataset = ImageFolder('./sample_data/train_data', transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True)

In [100]:
for img, label in train_dataset:
    print(img.shape)
    print(label)

torch.Size([3, 256, 256])
0
torch.Size([3, 256, 256])
0
torch.Size([3, 256, 256])
0
torch.Size([3, 256, 256])
0
torch.Size([3, 256, 256])
0
torch.Size([3, 256, 256])
0
torch.Size([3, 256, 256])
1
torch.Size([3, 256, 256])
1
torch.Size([3, 256, 256])
1
torch.Size([3, 256, 256])
1
torch.Size([3, 256, 256])
1
torch.Size([3, 256, 256])
2
torch.Size([3, 256, 256])
2
torch.Size([3, 256, 256])
2
torch.Size([3, 256, 256])
2
torch.Size([3, 256, 256])
2
torch.Size([3, 256, 256])
2


In [101]:
for imgs, labels in train_dataloader:
    print(imgs.shape)
    print(labels.shape)

torch.Size([4, 3, 256, 256])
torch.Size([4])
torch.Size([4, 3, 256, 256])
torch.Size([4])
torch.Size([4, 3, 256, 256])
torch.Size([4])
torch.Size([4, 3, 256, 256])
torch.Size([4])
torch.Size([1, 3, 256, 256])
torch.Size([1])


In [102]:
# cnn model

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layer = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), # in=3x256x256; out=32x256x256
            nn.Tanh(),
            nn.MaxPool2d(2), # out=64x128x128
            nn.Conv2d(32, 16, kernel_size=3, padding=1), # in=32x128x128, out=16x128x128
            nn.Tanh(),
            nn.MaxPool2d(2), # out=16x64x64
            nn.Conv2d(16, 8, kernel_size=3, padding=1), # in=16x64x64, out=8x64x64
            nn.Tanh(),
            nn.MaxPool2d(2) # out=8x32x32
        )
        
        self.fc_layer = nn.Sequential(
            nn.Dropout(p=0.1),
            nn.Linear(8*32*32, 32*32),
            nn.Tanh(),
            nn.Dropout(p=0.1),
            nn.Linear(32*32, 1024),
            nn.Tanh(),
            nn.Dropout(p=0.1),
            nn.Linear(1024, 128),
            nn.Tanh(),
            nn.Dropout(p=0.1),
            nn.Linear(128, 3)
        )
    
    def forward(self, x):
        # conv layer
        x = self.conv_layer(x)
        
        # flatten
        x = x.view(x.size(0), -1)
        
        # fc layer
        x = self.fc_layer(x)
        
        return x


In [103]:
# define train_loop function
def train_loop(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs + 1):
        loss_train = 0.0
        for imgs, labels in train_dataloader: # loop over batches in dataset
            
            outputs = model(imgs)  # feed a batch through our model
            
            loss = loss_fn(outputs, labels)  # computes the loss
            
            optimizer.zero_grad()  # getting rid of the gradients from the last round
            
            loss.backward()  # performs backward step, compute the gradients of all parameters
            
            optimizer.step()  # updates the model
            
            loss_train += loss.item() # sums of losses we saw over the epoch
            
        # print the average loss per batch, in epoch%10 == 0 
        if epoch == 1 or epoch % 10 == 0:
            print('{} Epoch {}, Training loss {}'.format(
                datetime.datetime.now(), epoch, loss_train/len(train_loader)
            ))
            

In [104]:
# perform training, overfitting on small sample data with large amount of parameters
model = CNN() # instantiates cnn model
learning_rate = 0.02
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
loss_fn = nn.CrossEntropyLoss()  # use cross entropy loss function

# call train_loop() function
train_loop(
    n_epochs = 100,
    optimizer = optimizer,
    model = model,
    loss_fn = loss_fn,
    train_loader = train_dataloader
)

2021-03-16 16:21:32.602623 Epoch 1, Training loss 1.1146415710449218
2021-03-16 16:21:43.684629 Epoch 10, Training loss 0.2000491663813591
2021-03-16 16:21:53.587626 Epoch 20, Training loss 0.017601943388581277
2021-03-16 16:22:04.575631 Epoch 30, Training loss 0.010226001031696796
2021-03-16 16:22:15.344626 Epoch 40, Training loss 0.005695029115304351
2021-03-16 16:22:24.989631 Epoch 50, Training loss 0.0043560173362493515
2021-03-16 16:22:34.597628 Epoch 60, Training loss 0.0031558499671518804
2021-03-16 16:22:44.253632 Epoch 70, Training loss 0.0036084646359086036
2021-03-16 16:22:53.933620 Epoch 80, Training loss 0.0021841708570718766
2021-03-16 16:23:05.868623 Epoch 90, Training loss 0.002263228758238256
2021-03-16 16:23:16.211620 Epoch 100, Training loss 0.0015468414523638785


In [109]:
# define validate function
def validate(model, train_loader, test_loader):
    model.eval()
    # accuracy on training data and test data
    for name, loader in [("train", train_loader), ("test", test_loader)]:
        correct = 0
        total = 0
        
        with torch.no_grad(): # do not want gradients here, as we will not want to update parameters
            for imgs, labels in loader:
                
                outputs = model(imgs)  # feed input to models
                
                _, predicted = torch.max(outputs, dim=1)  # gives the index of the highest value as output
                
                total += labels.shape[0]  # counts the number of example, total is increased by the batch size
                
                # comparing the predicted class that had the maximum probability and the ground-truth labels,
                # we first get a Boolean array. Taking the sum gives the number of items in the batch where 
                # the prediction and ground truth agree
                correct += int((predicted == labels).sum()) 
        
        print("Accuracy {}: {:.2f}".format(name, correct / total))
                

In [110]:
# measuring accuracy
test_dataset = ImageFolder('./sample_data/test_data', transform=transform)
test_dataloader = DataLoader(test_dataset, batch_size=4, shuffle=True)

validate(model, train_dataloader, test_dataloader)


Accuracy train: 1.00
Accuracy test: 0.40
