In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import models
from torchvision.transforms import transforms
from PIL import Image
import numpy as np
import random
import os
import pandas as pd

In [3]:
seed = 2023
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

In [5]:
img_size = (112, 112)
norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]
trf = transforms.Compose([
    transforms.Resize(img_size), 
    transforms.ToTensor(),
    transforms.Normalize(mean=norm_mean, std=norm_std)
])
class ImgDataset(Dataset):
    
    def __init__(self, img_paths, labels, trf):
        self.img_paths = img_paths
        self.labels = labels
        self.trf = trf
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        path = self.img_paths[idx]
        img = Image.open(path).convert('RGB')
        img = self.trf(img)
        target = self.labels[idx]
        target = torch.tensor(target, dtype=torch.long)
        
        return img, target

In [6]:
batch_size = 32

train_df = pd.read_csv('./data/train_img_path.csv')
val_df = pd.read_csv('./data/val_img_path.csv')
test_df = pd.read_csv('./data/test_img_path.csv')

train_dataset = ImgDataset(train_df['img_path'].values, train_df['target'].values, trf)
val_dataset = ImgDataset(val_df['img_path'].values, val_df['target'].values, trf)
test_dataset = ImgDataset(test_df['img_path'].values, test_df['target'].values, trf)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size//2, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size//2, shuffle=False)

In [7]:
for img, label in train_loader:
    print(img.shape)
    print(label.shape)
    break

torch.Size([32, 3, 112, 112])
torch.Size([32])


In [8]:
class ImgClassifyModel(nn.Module):
    
    def __init__(self, class_num, pretrained=None):
        super().__init__()
        self.model = models.efficientnet_b5(pretrained=False)
        # self.model = models.efficientnet_b7(pretrained=False)
        if pretrained:
            self.model.load_state_dict(torch.load(pretrained))
        # self.model.classifier.add_module('3', nn.Linear(1000, class_num))
        self.model.classifier[1] = nn.Linear(2048, class_num)
        # self.model.classifier[1] = nn.Linear(2560, class_num)
    
    def forward(self, x):
        x = self.model(x)
        
        return x

class_num = 899
model_path = './model/efficientnet_b5_lukemelas-b6417697.pth'
# model_path = None
model = ImgClassifyModel(class_num=class_num, pretrained=model_path)

inputs = torch.zeros((32, 3, 56, 56))
outputs = model(inputs)
print(outputs.shape)

torch.Size([32, 899])


In [8]:
def eval_acc(y, pred_y):
    pred_y = pred_y.detach().argmax(dim=-1)
    acc = (y == pred_y).cpu().numpy()
    return acc.mean()

def train(epoch, model, iterator, optimizer, loss_fct, scheduler=None, device='cpu'):
    model.train()
    step = 0
    all_loss = 0
    all_acc = 0
    for img, label in iterator:
        step += 1
        img = img.to(device)
        label = label.to(device)
        
        pred = model(img)
        loss = loss_fct(pred, label)
        all_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()
        optimizer.zero_grad()
        
        acc = eval_acc(label, pred)
        all_acc += acc
    
    print("Epoch: {}, Train Loss: {:.4f}, Train Acc: {:.4f}".format(epoch, all_loss / step, all_acc / step))

def validate(epoch, model, iterator, loss_fct, device):
    model.eval()
    step = 0
    all_loss = 0
    all_acc = 0
    with torch.no_grad():
        for img, label in iterator:
            step += 1
            img = img.to(device)
            label = label.to(device)

            pred = model(img)
            loss = loss_fct(pred, label)
            all_loss += loss.item()

            acc = eval_acc(label, pred)
            all_acc += acc
    
    print("Epoch: {}, Val Loss: {:.4f}, Val Acc: {:.4f}".format(epoch, all_loss / step, all_acc / step))
    return model, all_loss / step, all_acc / step

In [9]:
%%time
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = model.to(device)
epochs = 30
lr = 0.0005
# opt = torch.optim.Adam(model.parameters(), lr=lr)
opt = torch.optim.RMSprop(model.parameters(), alpha=0.9, momentum=0.9, lr=lr, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer=opt, step_size=6, gamma=0.1)
loss_func = nn.CrossEntropyLoss()

best_model = None
best_val_loss = 1e10
best_val_acc = 1e-10

print("start train=============================")
for epoch in range(1, epochs+1):
    train(epoch, model=model, iterator=train_loader, loss_fct=loss_func, optimizer=opt, device=device)
    candidate_model, loss, acc = validate(epoch, model=model, iterator=val_loader, loss_fct=loss_func, device=device)
    scheduler.step()
    
    if loss < best_val_loss and acc > best_val_acc:
        best_model = candidate_model
        best_val_loss, best_val_acc = loss, acc
    
    print("===========================================")
print("train finish=============================")

Epoch: 1, Train Loss: 6.7634, Train Acc: 0.0090
Epoch: 1, Val Loss: 6.9430, Val Acc: 0.0031
Epoch: 2, Train Loss: 6.4127, Train Acc: 0.0380
Epoch: 2, Val Loss: 6.1673, Val Acc: 0.0824
Epoch: 3, Train Loss: 5.6817, Train Acc: 0.1256
Epoch: 3, Val Loss: 4.9236, Val Acc: 0.2132
Epoch: 4, Train Loss: 4.3129, Train Acc: 0.2947
Epoch: 4, Val Loss: 3.0310, Val Acc: 0.4459
Epoch: 5, Train Loss: 2.7447, Train Acc: 0.5046
Epoch: 5, Val Loss: 1.6125, Val Acc: 0.6693
Epoch: 6, Train Loss: 1.6824, Train Acc: 0.6654
Epoch: 6, Val Loss: 0.8928, Val Acc: 0.7970
Epoch: 7, Train Loss: 0.6219, Train Acc: 0.8852
Epoch: 7, Val Loss: 0.2303, Val Acc: 0.9619
Epoch: 8, Train Loss: 0.4058, Train Acc: 0.9410
Epoch: 8, Val Loss: 0.1720, Val Acc: 0.9747
Epoch: 9, Train Loss: 0.3350, Train Acc: 0.9568
Epoch: 9, Val Loss: 0.1581, Val Acc: 0.9765
Epoch: 10, Train Loss: 0.2855, Train Acc: 0.9647
Epoch: 10, Val Loss: 0.1416, Val Acc: 0.9792
Epoch: 11, Train Loss: 0.2618, Train Acc: 0.9713
Epoch: 11, Val Loss: 0.1271, 

In [10]:
best_model.eval()
test_acc = 0
step = 0
with torch.no_grad():
    for img, label in test_loader:
        step += 1
        img = img.to(device)
        label = label.to(device)
        
        pred = best_model(img)
        
        acc = eval_acc(label, pred)
        test_acc += acc

test_acc / step

0.9920212765957447

In [11]:
best_val_acc

0.9889184397163121

In [12]:
2244 * 0.01

22.44

In [14]:
# torch.save(best_model.state_dict(), "./model/model_0802.pt")

In [9]:
chars = os.listdir('./classes/')
label_to_char = dict(enumerate(chars))
char_to_label = dict([(j, i) for i, j in label_to_char.items()])

In [10]:
test_imgs = None
for i in os.listdir('./test/'):
    img = Image.open(f'./test/{i}').convert('RGB')
    img = trf(img)
    img = img.unsqueeze(0)
    if test_imgs is None:
        test_imgs = img
    else:
        test_imgs = torch.cat((test_imgs, img), dim=0)

In [11]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
test_imgs = test_imgs.to(device)
best_model = ImgClassifyModel(class_num=class_num, pretrained=None)
best_model.load_state_dict(torch.load('./model/model_0802.pt'))
best_model = best_model.to(device)

In [12]:
best_model.eval()
pred_y = best_model(test_imgs)

In [13]:
pred_y = pred_y.detach().argmax(dim=-1).cpu().numpy()

In [15]:
y = [label_to_char[y] for y in pred_y]