In [1]:
import numpy as np
import pandas as pd
import csv
from PIL import Image
import numpy as np
import random
import os
from tqdm import tqdm
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
alphabets = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
alphabets2index = {alphabet:i for i, alphabet in enumerate(alphabets)}

In [3]:
class TaskDataset(Dataset):
    def __init__(self, data, root, return_filename=False):
        self.data = data
        self.return_filename = return_filename
        self.root = root
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        path = f"{self.root}/{filename}"
        img = Image.open(path)
        # img = cv2.resize(img, (32, 32))
        # img = np.mean(img, axis=2)
        preprocess = transforms.Compose([
            #transforms.Resize(32),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        input_tensor = preprocess(img)
        input_tensor = input_tensor.to(device)
        
        if self.return_filename:
            return input_tensor, filename
        else:
            return input_tensor, label

    def __len__(self):
        return len(self.data)


# Model
implement ResNet34

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel,
                      outchannel,
                      kernel_size=3,
                      stride=stride,
                      padding=1,
                      bias=False),
            nn.BatchNorm2d(outchannel, track_running_stats=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel,
                      outchannel,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=False),
            nn.BatchNorm2d(outchannel, track_running_stats=True))
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel,
                          outchannel,
                          kernel_size=1,
                          stride=stride,
                          bias=False),
                nn.BatchNorm2d(outchannel, track_running_stats=True))

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet34_1(nn.Module):
    def __init__(self, ResidualBlock, num_classes=62):
        super(ResNet34_1, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64, track_running_stats=True),
            nn.ReLU(),
        )
        # res18 2 2 2 2
        # res34 3 4 6 3
        self.layer1 = self.make_layer(ResidualBlock, 64, 3, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 4, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 6, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 3, stride=2)
        self.drop = nn.Dropout(0.5)
        self.fc1 = nn.Linear(512, num_classes)
        #self.fc2 = nn.Linear(512, num_classes)
        #self.fc3 = nn.Linear(512, num_classes)
        #self.fc4 = nn.Linear(512, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)  # strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = nn.AdaptiveAvgPool2d(1)(x)
        x = x.view(-1, 512)
        x = self.drop(x)
        y1 = self.fc1(x)
        #y2 = self.fc2(x)
        #y3 = self.fc3(x)
        #y4 = self.fc4(x)
        return y1
    
class ResNet34_2(nn.Module):
    def __init__(self, ResidualBlock, num_classes=62):
        super(ResNet34_2, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64, track_running_stats=True),
            nn.ReLU(),
        )
        # res18 2 2 2 2
        # res34 3 4 6 3
        self.layer1 = self.make_layer(ResidualBlock, 64, 3, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 4, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 6, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 3, stride=2)
        self.drop = nn.Dropout(0.5)
        self.fc1 = nn.Linear(512, num_classes)
        self.fc2 = nn.Linear(512, num_classes)
        #self.fc3 = nn.Linear(512, num_classes)
        #self.fc4 = nn.Linear(512, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)  # strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = nn.AdaptiveAvgPool2d(1)(x)
        x = x.view(-1, 512)
        x = self.drop(x)
        y1 = self.fc1(x)
        y2 = self.fc2(x)
        #y3 = self.fc3(x)
        #y4 = self.fc4(x)
        return y1, y2

class ResNet34_3(nn.Module):
    def __init__(self, ResidualBlock, num_classes=62):
        super(ResNet34_3, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64, track_running_stats=True),
            nn.ReLU(),
        )
        # res18 2 2 2 2
        # res34 3 4 6 3
        self.layer1 = self.make_layer(ResidualBlock, 64, 3, stride=1)
        self.layer2 = self.make_layer(ResidualBlock, 128, 4, stride=2)
        self.layer3 = self.make_layer(ResidualBlock, 256, 6, stride=2)
        self.layer4 = self.make_layer(ResidualBlock, 512, 3, stride=2)
        self.drop = nn.Dropout(0.5)
        self.fc1 = nn.Linear(512, num_classes)
        self.fc2 = nn.Linear(512, num_classes)
        self.fc3 = nn.Linear(512, num_classes)
        self.fc4 = nn.Linear(512, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)  # strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = nn.AdaptiveAvgPool2d(1)(x)
        x = x.view(-1, 512)
        x = self.drop(x)
        y1 = self.fc1(x)
        y2 = self.fc2(x)
        y3 = self.fc3(x)
        y4 = self.fc4(x)
        return y1, y2, y3, y4


In [5]:
TRAIN_PATH = "dataset/train"
TEST_PATH = "dataset/test"
# train_data_t1 = []
# val_data_t1 = []
# train_data_t2 = []
# val_data_t2 = []
# train_data_t3 = []
# val_data_t3 = []

# np.random.seed(0)
# with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
#     for row in csv.reader(csvfile, delimiter=','):
#         if row[0].startswith("task1"):
#             if np.random.random() < 0.8:
#                 train_data_t1.append(row)
#             else:
#                 val_data_t1.append(row)
        
#         if row[0].startswith("task2"):
#             if np.random.random() < 0.8:
#                 train_data_t2.append(row)
#             else:
#                 val_data_t2.append(row)
        
#         if row[0].startswith("task3"):
#             if np.random.random() < 0.8:
#                 train_data_t3.append(row)
#             else:
#                 val_data_t3.append(row)

# print(len(train_data_t1))
# print(len(val_data_t1))
# print(len(train_data_t2))
# print(len(val_data_t2))
# print(len(train_data_t3))
# print(len(val_data_t3))

# Task 1 Data Augmented

In [6]:
total_data_t1 = []
with open(f'{TRAIN_PATH}/task1/output/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[1].startswith("task1"):
            file_path = 'task1/output/' + row[1]
            total_data_t1.append([file_path, row[2]])

print(len(total_data_t1))

with open(f'{TRAIN_PATH}/task1/gen/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        file_path = 'task1/gen/' + row[0]
        total_data_t1.append([file_path, row[1]])

print(len(total_data_t1))

21000
41000


# Task 2 Data Augmented

In [7]:
total_data_t2 = []
with open(f'{TRAIN_PATH}/task2/output/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[1].startswith("task2"):
            file_path = 'task2/output/' + row[1]
            total_data_t2.append([file_path, row[2]])

print(len(total_data_t2))

with open(f'{TRAIN_PATH}/task2/gen/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        file_path = 'task2/gen/' + row[0]
        total_data_t2.append([file_path, row[1]])

print(len(total_data_t2))

21000
41000


# Task 3 Data Augmented

In [8]:
total_data_t3 = []
with open(f'{TRAIN_PATH}/task3/output/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[1].startswith("task3"):
            file_path = 'task3/output/' + row[1]
            total_data_t3.append([file_path, row[2]])

print(len(total_data_t3))

with open(f'{TRAIN_PATH}/task3/gen/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        file_path = 'task3/gen/' + row[0]
        total_data_t3.append([file_path, row[1]])

print(len(total_data_t3))

36000
71000


In [9]:
print(len(total_data_t1))
print(len(total_data_t2))
print(len(total_data_t3))
print()

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[0].startswith("task1"):
            total_data_t1.append(row)
        
        if row[0].startswith("task2"):
            total_data_t2.append(row)
            
        if row[0].startswith("task3"):
            total_data_t3.append(row)

np.random.seed(0)
train_data_t1 = []
val_data_t1 = []
train_data_t2 = []
val_data_t2 = []
train_data_t3 = []
val_data_t3 = []

print(len(total_data_t1))
print(len(total_data_t2))
print(len(total_data_t3))
print()

for i in range(len(total_data_t1)):
    if np.random.random() < 0.8:
        train_data_t1.append(total_data_t1[i])
    else:
        val_data_t1.append(total_data_t1[i])
    
for i in range(len(total_data_t2)):
    if np.random.random() < 0.8:
        train_data_t2.append(total_data_t2[i])
    else:
        val_data_t2.append(total_data_t2[i])
    
for i in range(len(total_data_t3)):
    if np.random.random() < 0.8:
        train_data_t3.append(total_data_t3[i])
    else:
        val_data_t3.append(total_data_t3[i])
            

print(len(train_data_t1))
print(len(val_data_t1))
print(len(train_data_t2))
print(len(val_data_t2))
print(len(train_data_t3))
print(len(val_data_t3))

41000
41000
71000

43000
43500
75000

34446
8554
34774
8726
59902
15098


# Task 1

In [10]:
bs = 64

train_ds = TaskDataset(train_data_t1, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

val_ds = TaskDataset(val_data_t1, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=bs, shuffle=False)

In [11]:
model = ResNet34_1(ResidualBlock).to(device)

lr = 1e-3
totalEpoch = 50
#optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.RAdam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=6.5e-4)
#optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
#optimizer = torch.optim.NAdam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=6.5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(totalEpoch):
    print(f"Epoch {epoch+1}:")
    total_loss = 0
    train_sample_count = 0
    train_correct_count = 0
    model.train()
    for image, label in train_dl:
        image = image.to(device)
        label = np.array([alphabets2index[l] for l in label])
        label = torch.from_numpy(label)
        label = label.to(device, dtype=torch.long)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        total_loss += loss.item()
        pred = torch.argmax(pred, dim=1)
        
        train_sample_count += len(image)
        train_correct_count += (label == pred).sum().item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step()
        
    sample_count = 0
    correct_count = 0
    model.eval()
    for image, label in val_dl:
        image = image.to(device)
        label = np.array([alphabets2index[l] for l in label])
        label = torch.from_numpy(label)
        label = label.to(device, dtype=torch.long)
        
        pred = model(image)
        loss = loss_fn(pred, label)
        
        pred = torch.argmax(pred, dim=1)
        
        sample_count += len(image)
        correct_count += (label == pred).sum().item()
    
    print("loss:", total_loss)
    print("accuracy (train):", train_correct_count / train_sample_count)
    print("accuracy (validation):", correct_count / sample_count)

torch.save(model.state_dict(), 'models/task1.pt')
print('task1 model saved')

Epoch 1:
loss: 1069.8796004652977
accuracy (train): 0.4814492248737154
accuracy (validation): 0.5176525602057517
Epoch 2:
loss: 177.52292348444462
accuracy (train): 0.897549788074087
accuracy (validation): 0.7718026654196867
Epoch 3:
loss: 131.54620141535997
accuracy (train): 0.9231550833188179
accuracy (validation): 0.8265139116202946
Epoch 4:
loss: 116.07230725884438
accuracy (train): 0.9323578935144864
accuracy (validation): 0.7408230067804535
Epoch 5:
loss: 105.79117263481021
accuracy (train): 0.9404575277245544
accuracy (validation): 0.7947159223754968
Epoch 6:
loss: 98.53949984163046
accuracy (train): 0.9451605411368519
accuracy (validation): 0.8235913023147066
Epoch 7:
loss: 90.15432460978627
accuracy (train): 0.9485281309876328
accuracy (validation): 0.787117138180968
Epoch 8:
loss: 86.66740427538753
accuracy (train): 0.9509086686407711
accuracy (validation): 0.8681318681318682
Epoch 9:
loss: 75.03829645551741
accuracy (train): 0.9578760959182488
accuracy (validation): 0.930324

# Task 2

In [12]:
bs = 64

train_ds = TaskDataset(train_data_t2, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

val_ds = TaskDataset(val_data_t2, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=bs, shuffle=False)

In [13]:
model = ResNet34_2(ResidualBlock).to(device)

lr = 1e-3
totalEpoch = 50
#optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.RAdam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=6.5e-4)
#optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
#optimizer = torch.optim.NAdam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=6.5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(totalEpoch):
    print(f"Epoch {epoch+1}:")
    total_loss = 0
    train_sample_count = 0
    train_correct_count = 0
    model.train()
    for image, label in train_dl:
        image = image.to(device)
        
        # csv file will ignore string start with 0, need to deal with it.
        tmp = []
        for i in range(len(label)):
            if len(label[i]) == 1:
                tmp.append("0" + label[i][0])
            else:
                tmp.append(label[i])
        
        label = tmp
        
        label_1 = np.array([alphabets2index[l[0]] for l in label])
        label_1 = torch.from_numpy(label_1)
        label_1 = label_1.to(device, dtype=torch.long)
        label_2 = np.array([alphabets2index[l[1]] for l in label])
        label_2 = torch.from_numpy(label_2)
        label_2 = label_2.to(device, dtype=torch.long)
        
        pred_1, pred_2 = model(image)
        loss1 = loss_fn(pred_1, label_1)
        loss2 = loss_fn(pred_2, label_2)
        loss = loss1 + loss2
        total_loss += loss.item()
        pred_1 = torch.argmax(pred_1, dim=1)
        pred_2 = torch.argmax(pred_2, dim=1)
        
        train_sample_count += len(image)
        
        for i in range(len(label_1)):
            if (label_1[i] == pred_1[i]) and (label_2[i] == pred_2[i]):
                train_correct_count += 1
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step()
        
    sample_count = 0
    correct_count = 0
    model.eval()
    for image, label in val_dl:
        image = image.to(device)
        
        tmp = []
        for i in range(len(label)):
            if len(label[i]) == 1:
                tmp.append("0" + label[i][0])
            else:
                tmp.append(label[i])
        
        label = tmp
        
        label_1 = np.array([alphabets2index[l[0]] for l in label])
        label_1 = torch.from_numpy(label_1)
        label_1 = label_1.to(device, dtype=torch.long)
        label_2 = np.array([alphabets2index[l[1]] for l in label])
        label_2 = torch.from_numpy(label_2)
        label_2 = label_2.to(device, dtype=torch.long)
        
        pred_1, pred_2 = model(image)
        
        pred_1 = torch.argmax(pred_1, dim=1)
        pred_2 = torch.argmax(pred_2, dim=1)
        
        sample_count += len(image)
        
        for i in range(len(label_1)):
            if (label_1[i] == pred_1[i]) and (label_2[i] == pred_2[i]):
                correct_count += 1
    
    print("loss:", total_loss)
    print("accuracy (train):", train_correct_count / train_sample_count)
    print("accuracy (validation):", correct_count / sample_count)

torch.save(model.state_dict(), 'models/task2.pt')
print('task2 model saved')

Epoch 1:
loss: 4262.9072341918945
accuracy (train): 0.005032495542646805
accuracy (validation): 0.052143020857208344
Epoch 2:
loss: 1159.7389877438545
accuracy (train): 0.49462241904871457
accuracy (validation): 0.7764153105661242
Epoch 3:
loss: 379.31259605288506
accuracy (train): 0.8101167538965894
accuracy (validation): 0.8072427228970892
Epoch 4:
loss: 287.90983855724335
accuracy (train): 0.8544602289066544
accuracy (validation): 0.7094888837955535
Epoch 5:
loss: 241.81379851698875
accuracy (train): 0.8732961407948467
accuracy (validation): 0.8713041485216594
Epoch 6:
loss: 225.64079688489437
accuracy (train): 0.8833898889975269
accuracy (validation): 0.9012147604859042
Epoch 7:
loss: 195.57864293456078
accuracy (train): 0.8973370909300051
accuracy (validation): 0.8788677515471006
Epoch 8:
loss: 191.6329801082611
accuracy (train): 0.9003565882555933
accuracy (validation): 0.9069447627779051
Epoch 9:
loss: 162.67111755907536
accuracy (train): 0.914275033070685
accuracy (validation):

# Task 3

In [14]:
bs = 64

train_ds = TaskDataset(train_data_t3, root=TRAIN_PATH)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

val_ds = TaskDataset(val_data_t3, root=TRAIN_PATH)
val_dl = DataLoader(val_ds, batch_size=bs, shuffle=False)

In [15]:
model = ResNet34_3(ResidualBlock).to(device)

lr = 1e-3
totalEpoch = 75
#optimizer = torch.optim.Adam(model.parameters(), lr=lr)
optimizer = torch.optim.RAdam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=6.5e-4)
#optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
#optimizer = torch.optim.NAdam(model.parameters(), lr=lr, betas=(0.9, 0.999), weight_decay=6.5e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(totalEpoch):
    print(f"Epoch {epoch+1}:")
    total_loss = 0
    train_sample_count = 0
    train_correct_count = 0
    model.train()
    for image, label in train_dl:
        # csv file will ignore string start with 0, need to deal with it.
        tmp = []
        for i in range(len(label)):
            if len(label[i]) == 3:
                tmp.append("0" + label[i][0])
            else:
                tmp.append(label[i])
        
        label = tmp
        
        label_1 = []
        label_2 = []
        label_3 = []
        label_4 = []
        error_index = []
        for i in range(len(label)):
            try:
                letter1 = alphabets2index[label[i][0]]
                letter2 = alphabets2index[label[i][1]]
                letter3 = alphabets2index[label[i][2]]
                letter4 = alphabets2index[label[i][3]]
            except:
                error_index.append(i)
            else:
                label_1.append(letter1)
                label_2.append(letter2)
                label_3.append(letter3)
                label_4.append(letter4)
        
        label_1 = np.array(label_1)
        label_1 = torch.from_numpy(label_1)
        label_1 = label_1.to(device, dtype=torch.long)
        label_2 = np.array(label_2)
        label_2 = torch.from_numpy(label_2)
        label_2 = label_2.to(device, dtype=torch.long)
        label_3 = np.array(label_3)
        label_3 = torch.from_numpy(label_3)
        label_3 = label_3.to(device, dtype=torch.long)
        label_4 = np.array(label_4)
        label_4 = torch.from_numpy(label_4)
        label_4 = label_4.to(device, dtype=torch.long)
                
        
        #label_1 = np.array([alphabets2index[l[0]] for l in label])
        #label_1 = torch.from_numpy(label_1)
        #label_1 = label_1.to(device, dtype=torch.long)
        #label_2 = np.array([alphabets2index[l[1]] for l in label])
        #label_2 = torch.from_numpy(label_2)
        #label_2 = label_2.to(device, dtype=torch.long)
        #label_3 = np.array([alphabets2index[l[2]] for l in label])
        #label_3 = torch.from_numpy(label_3)
        #label_3 = label_3.to(device, dtype=torch.long)
        #label_4 = np.array([alphabets2index[l[3]] for l in label])
        #label_4 = torch.from_numpy(label_4)
        #label_4 = label_4.to(device, dtype=torch.long)
        
        image = image.cpu().numpy()
        image_buffer = []
        for i in range(len(image)):
            if i not in error_index:
                image_buffer.append(image[i])
        
        image_buffer = np.array(image_buffer)
        image = torch.from_numpy(image_buffer)
        image = image.to(device)
        
        pred_1, pred_2, pred_3, pred_4 = model(image)
        loss1 = loss_fn(pred_1, label_1)
        loss2 = loss_fn(pred_2, label_2)
        loss3 = loss_fn(pred_3, label_3)
        loss4 = loss_fn(pred_4, label_4)
        loss = loss1 + loss2 + loss3 + loss4
        total_loss += loss.item()
        pred_1 = torch.argmax(pred_1, dim=1)
        pred_2 = torch.argmax(pred_2, dim=1)
        pred_3 = torch.argmax(pred_3, dim=1)
        pred_4 = torch.argmax(pred_4, dim=1)
        
        train_sample_count += len(image)
        
        for i in range(len(label_1)):
            if (label_1[i] == pred_1[i]) and (label_2[i] == pred_2[i]) and (label_3[i] == pred_3[i]) and (label_4[i] == pred_4[i]):
                train_correct_count += 1
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    scheduler.step()
        
    sample_count = 0
    correct_count = 0
    model.eval()
    for image, label in val_dl:
        # csv file will ignore string start with 0, need to deal with it.
        tmp = []
        for i in range(len(label)):
            if len(label[i]) == 3:
                tmp.append("0" + label[i][0])
            else:
                tmp.append(label[i])
        
        label = tmp
        
        label_1 = []
        label_2 = []
        label_3 = []
        label_4 = []
        error_index = []
        for i in range(len(label)):
            try:
                letter1 = alphabets2index[label[i][0]]
                letter2 = alphabets2index[label[i][1]]
                letter3 = alphabets2index[label[i][2]]
                letter4 = alphabets2index[label[i][3]]
            except:
                error_index.append(i)
            else:
                label_1.append(letter1)
                label_2.append(letter2)
                label_3.append(letter3)
                label_4.append(letter4)
        
        label_1 = np.array(label_1)
        label_1 = torch.from_numpy(label_1)
        label_1 = label_1.to(device, dtype=torch.long)
        label_2 = np.array(label_2)
        label_2 = torch.from_numpy(label_2)
        label_2 = label_2.to(device, dtype=torch.long)
        label_3 = np.array(label_3)
        label_3 = torch.from_numpy(label_3)
        label_3 = label_3.to(device, dtype=torch.long)
        label_4 = np.array(label_4)
        label_4 = torch.from_numpy(label_4)
        label_4 = label_4.to(device, dtype=torch.long)
                
        
        #label_1 = np.array([alphabets2index[l[0]] for l in label])
        #label_1 = torch.from_numpy(label_1)
        #label_1 = label_1.to(device, dtype=torch.long)
        #label_2 = np.array([alphabets2index[l[1]] for l in label])
        #label_2 = torch.from_numpy(label_2)
        #label_2 = label_2.to(device, dtype=torch.long)
        #label_3 = np.array([alphabets2index[l[2]] for l in label])
        #label_3 = torch.from_numpy(label_3)
        #label_3 = label_3.to(device, dtype=torch.long)
        #label_4 = np.array([alphabets2index[l[3]] for l in label])
        #label_4 = torch.from_numpy(label_4)
        #label_4 = label_4.to(device, dtype=torch.long)
        
        image = image.cpu().numpy()
        image_buffer = []
        for i in range(len(image)):
            if i not in error_index:
                image_buffer.append(image[i])
        
        image_buffer = np.array(image_buffer)
        image = torch.from_numpy(image_buffer)
        image = image.to(device)
        
        pred_1, pred_2, pred_3, pred_4 = model(image)
        
        pred_1 = torch.argmax(pred_1, dim=1)
        pred_2 = torch.argmax(pred_2, dim=1)
        pred_3 = torch.argmax(pred_3, dim=1)
        pred_4 = torch.argmax(pred_4, dim=1)
        
        sample_count += len(image)
        
        for i in range(len(label_1)):
            if (label_1[i] == pred_1[i]) and (label_2[i] == pred_2[i]) and (label_3[i] == pred_3[i]) and (label_4[i] == pred_4[i]):
                correct_count += 1
    
    print("loss:", total_loss)
    print("accuracy (train):", train_correct_count / train_sample_count)
    print("accuracy (validation):", correct_count / sample_count)

torch.save(model.state_dict(), 'models/task3.pt')
print('task3 model saved')

Epoch 1:
loss: 15233.403171539307
accuracy (train): 0.0
accuracy (validation): 0.0
Epoch 2:
loss: 11428.269597530365
accuracy (train): 0.017932278100581046
accuracy (validation): 0.18640699523052465
Epoch 3:
loss: 2637.840287208557
accuracy (train): 0.4230949041608228
accuracy (validation): 0.7082008479067302
Epoch 4:
loss: 1232.0390899181366
accuracy (train): 0.6799572563948441
accuracy (validation): 0.7215156332803392
Epoch 5:
loss: 909.5706135034561
accuracy (train): 0.7597842783677285
accuracy (validation): 0.8436009538950715
Epoch 6:
loss: 752.7179427742958
accuracy (train): 0.8002738262205303
accuracy (validation): 0.8533386327503975
Epoch 7:
loss: 661.1449235379696
accuracy (train): 0.8245675549322113
accuracy (validation): 0.8771860095389508
Epoch 8:
loss: 593.718759894371
accuracy (train): 0.8418820543645228
accuracy (validation): 0.8991123476417594
Epoch 9:
loss: 562.4475064873695
accuracy (train): 0.8513490950377346
accuracy (validation): 0.8856650768415474
Epoch 10:
loss: 5

loss: 63.85357578098774
accuracy (train): 0.9977459426968543
accuracy (validation): 0.9726417594064652
Epoch 75:
loss: 62.062156330794096
accuracy (train): 0.9977960328591464
accuracy (validation): 0.9699920508744038
task3 model saved
