In [1]:
import os
import random
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.nn.init as init
import torch.optim as optim
import torch.utils.data
from torch.utils.data import *
import numpy as np
import time
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import easydict
import sys
import pickle
sys.path.append('./Whatiswrong')
sys.path.append('./Scatter')
sys.path.append('./RobustScanner')
sys.path.append('./Nchar_clf')

import re
import six
import math
import torchvision.transforms as transforms

import utils
from utils import *
import augs
import augs2
import BaseModel
import torch.distributed as dist
import en_dataset
import ko_dataset
from albumentations import GaussNoise, IAAAdditiveGaussianNoise, Compose, OneOf
from albumentations.pytorch import ToTensor
import albumentations
import evaluate
import cv2
from efficientnet_pytorch import EfficientNet
import Nchar_utils
import torch.multiprocessing as mp
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# os.environ["CUDA_VISIBLE_DEVICES"]= '2,3'

In [2]:
import importlib
# importlib.reload(EfficientNet)

TypeError: reload() argument must be a module

In [3]:
# opt
opt = easydict.EasyDict({
    "experiment_name" : f'{utils.SaveDir_maker(base_model = "RobustScanner_nchar", base_model_dir = "./models")}',
    'saved_model' : 'RobustScanner_nchar_1106/1/best_accuracy_83.53.pth',
#     'saved_model' : '',
    "manualSeed" : 1111,
    "imgH" : 64 ,
    "imgW" :  320, # 64/4 * 20(assume 20 is max seq)
    "PAD" : True ,
    'batch_size' : 128,
    'top_char' : ' !"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉ',
    'middle_char' : ' ㅏㅑㅓㅕㅗㅛㅜㅠㅡㅣㅐㅒㅔㅖㅘㅙㅚㅝㅞㅟㅢ',
    'bottom_char' : ' ㄱㄴㄷㄹㅁㅂㅅㅇㅈㅊㅋㅌㅍㅎㄲㄸㅃㅆㅉㄳㄵㄶㄺㄻㄼㄽㄾㄿㅀㅄ',
    'batch_max_length' : 23,
    'num_fiducial' : 20,
    'output_channel' : 512,
    'hidden_size' : 256,
    'num_processes' : 2,
    'lr' : 1,
    'rho' : 0.95,
    'eps' : 1e-8,
    'grad_clip' : 5,
    'valInterval' : 1500,
    'num_epoch' : 10,
    'input_channel' : 3,
    'FT' : True,
    'extract' : 'resnet',
#     'extract' : 'efficientnet-b6',
    'pred' : ' ',
    'nchar_name' : 'efficientnet-b0',
    'nchar_pretrained_path' : './Nchar_clf/models',
    })
device = torch.device('cuda')
top_converter = utils.AttnLabelConverter(opt.top_char, device)
middle_converter = utils.AttnLabelConverter(opt.middle_char, device)
bottom_converter = utils.AttnLabelConverter(opt.bottom_char, device)
opt.top_n_cls = len(top_converter.character)
opt.mid_n_cls = len(middle_converter.character)
opt.bot_n_cls = len(bottom_converter.character)

In [4]:
with open('./dataset_180', 'rb') as file:
    data = pickle.load(file)
# with open('./dataset_light', 'wb') as file:
#     pickle.dump(data[:1000], file)
train_data = data[ : int(len(data) * 0.998)]
test_data = data[ int(len(data) * 0.998): ]

In [5]:
# N char Classifier
## Nchar dataloader
nchar_dataset = Nchar_utils.CustomDataset_clf(train_data, device=device, is_module=True, is_train=False)
nchar_dataloader = DataLoader(nchar_dataset, batch_size = opt.batch_size , pin_memory=True, drop_last=True, num_workers = 10)
valid_dataset = Nchar_utils.CustomDataset_clf(test_data, device=device, is_module=True, is_train=False)
valid_loader = DataLoader(valid_dataset, batch_size = opt.batch_size , pin_memory=True, drop_last=True, num_workers = 5)

In [None]:
# nchar_iterer = iter(nchar_dataloader)

In [None]:
# img, top, mid, bot = next(nchar_iterer)
# plt.imshow(img[0].permute(1,2,0))

In [7]:
def train(opt):
    model = BaseModel.model(opt, device)
    print('model parameters. height {}, width {}, num of fiducial {}, input channel {}, output channel {}, hidden size {}, \
    batch max length {}'.format(opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.batch_max_length))
    
    # weight initialization
    for name, param, in model.named_parameters():
        if 'localization_fc2' in name:
            print(f'Skip {name} as it is already initializaed')
            continue
        try:
            if 'bias' in name:
                init.constant_(param, 0.0)
            elif 'weight' in name:
                init.kaiming_normal_(param)
                
        except Exception as e :
            if 'weight' in name:
                param.data.fill_(1)
            continue
            
    # load pretrained model
    if opt.saved_model != '':
        base_path = './models'
        print(f'looking for pretrained model from {os.path.join(base_path, opt.saved_model)}')
        
        try :
            model.load_state_dict(torch.load(os.path.join(base_path, opt.saved_model)))
            print('loading complete ')    
        except Exception as e:
            print(e)
            print('coud not find model')
            
    #data parallel for multi GPU
    model = torch.nn.DataParallel(model, device_ids=[0,1,2]).to(device)
    model.train() 
     
    # loss
    criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) #ignore [GO] token = ignore index 0
    log_avg = utils.Averager()
    
    # filter that only require gradient descent
    filtered_parameters = []
    params_num = []
    for p in filter(lambda p : p.requires_grad, model.parameters()):
        filtered_parameters.append(p)
        params_num.append(np.prod(p.size()))
    print('Tranable params : ', sum(params_num))
    
    # optimizer
    
    optimizer = optim.Adadelta(filtered_parameters, lr= opt.lr, rho = opt.rho, eps = opt.eps)
#     optimizer = torch.optim.Adam(filtered_parameters, lr=0.0001)
#     optimizer = SWA(base_opt)
#     optimizer = torch.optim.AdamW(filtered_parameters)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', verbose=True, patience = 2, factor= 0.5 )
#     optimizer = adabound.AdaBound(filtered_parameters, lr=1e-3, final_lr=0.1)
    
    # opt log
    with open(f'./models/{opt.experiment_name}/opt.txt', 'a') as opt_file:
        opt_log = '---------------------Options-----------------\n'
        args = vars(opt)
        for k, v in args.items():
            opt_log +=f'{str(k)} : {str(v)}\n'
        opt_log +='---------------------------------------------\n'
        opt_file.write(opt_log)
        
    #start training
    start_time = time.time()
    best_accuracy = -1
    best_norm_ED = -1
    swa_count = 0
    
    for n_epoch, epoch in enumerate(range(opt.num_epoch)):
        for n_iter, data_point in enumerate(nchar_dataloader):
            
            image_tensors, top, mid, bot = data_point 

            image = image_tensors.to(device)
            text_top, length_top = top_converter.encode(top, batch_max_length = opt.batch_max_length)
            text_mid, length_mid = middle_converter.encode(mid, batch_max_length = opt.batch_max_length)
            text_bot, length_bot = bottom_converter.encode(bot, batch_max_length = opt.batch_max_length)
            batch_size = image.size(0)
          
            pred_top, pred_mid, pred_bot = model(image, text_top[:,:-1], text_mid[:,:-1], text_bot[:,:-1])
            
            cost_top = criterion(pred_top.view(-1, pred_top.shape[-1]), text_top[:, 1:].contiguous().view(-1))
            cost_mid = criterion(pred_mid.view(-1, pred_mid.shape[-1]), text_mid[:, 1:].contiguous().view(-1))
            cost_bot = criterion(pred_bot.view(-1, pred_bot.shape[-1]), text_bot[:, 1:].contiguous().view(-1))
        
#             cost_top = utils.reduced_focal_loss(pred_top.view(-1, pred_top.shape[-1]), text_top[:, 1:].contiguous().view(-1), gamma=2, alpha=0.25, threshold=0.5)
#             cost_mid = utils.reduced_focal_loss(pred_mid.view(-1, pred_mid.shape[-1]), text_mid[:, 1:].contiguous().view(-1), gamma=2, alpha=0.25, threshold=0.5)
#             cost_bot = utils.reduced_focal_loss(pred_bot.view(-1, pred_bot.shape[-1]), text_bot[:, 1:].contiguous().view(-1), gamma=2, alpha=0.25, threshold=0.5)
            
#             cost_top = utils.CB_loss(text_top[:, 1:].contiguous().view(-1), pred_top.view(-1, pred_top.shape[-1]), top_per_cls, opt.top_n_cls, 'focal', 0.99, 2)
#             cost_mid = utils.CB_loss(text_mid[:, 1:].contiguous().view(-1), pred_mid.view(-1, pred_mid.shape[-1]), mid_per_cls, opt.mid_n_cls, 'focal', 0.99, 2)
#             cost_bot = utils.CB_loss(text_bot[:, 1:].contiguous().view(-1), pred_bot.view(-1, pred_bot.shape[-1]), bot_per_cls, opt.bot_n_cls, 'focal', 0.99, 2)
           
            cost = cost_top + cost_mid + cost_bot

            loss_avg = utils.Averager()
            loss_avg.add(cost)
            
            model.zero_grad()
            cost.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) #gradient clipping with 5
            optimizer.step()
#             print(loss_avg.val())

            #validation
            if (n_iter % opt.valInterval == 0) & (n_iter!=0)  :
#              & (n_iter!=0)
                elapsed_time = time.time() - start_time
                with open(f'./models/{opt.experiment_name}/log_train.txt', 'a') as log:
                    model.eval()
                    with torch.no_grad():
                        valid_loss, current_accuracy, current_norm_ED, pred_top_str, pred_mid_str, pred_bot_str, label_top, label_mid, label_bot, infer_time, length_of_data = evaluate.validation_jamo(model, criterion, valid_loader, top_converter, middle_converter, bottom_converter, opt)
                    scheduler.step(current_accuracy)
                    model.train()

                    present_time = time.localtime()
                    loss_log = f'[epoch : {n_epoch}/{opt.num_epoch}] [iter : {n_iter*opt.batch_size} / {int(len(data) * 0.955)}]\n'+\
                    f'Train loss : {loss_avg.val():0.5f}, Valid loss : {valid_loss:0.5f}, Elapsed time : {elapsed_time:0.5f}, Present time : {present_time[1]}/{present_time[2]}, {present_time[3]+9} : {present_time[4]}'
                    loss_avg.reset()

                    current_model_log = f'{"Current_accuracy":17s}: {current_accuracy:0.3f}, {"current_norm_ED":17s}: {current_norm_ED:0.2f}'

                    #keep the best
                    if current_accuracy > best_accuracy:
                        best_accuracy = current_accuracy
                        torch.save(model.module.state_dict(), f'./models/{opt.experiment_name}/best_accuracy_{round(current_accuracy,2)}.pth')

                    if current_norm_ED > best_norm_ED:
                        best_norm_ED = current_norm_ED
                        torch.save(model.module.state_dict(), f'./models/{opt.experiment_name}/best_norm_ED.pth')

                    best_model_log = f'{"Best accuracy":17s}: {best_accuracy:0.3f}, {"Best_norm_ED":17s}: {best_norm_ED:0.2f}'
                    loss_model_log = f'{loss_log}\n{current_model_log}\n{best_model_log}'
                    print(loss_model_log)
                    log.write(loss_model_log+'\n')

                    dashed_line = '-'*80
                    head = f'{"Ground Truth":25s} | {"Prediction" :25s}| T/F'
                    predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n'

                    random_idx  = np.random.choice(range(len(label_top)), size= 5, replace=False)
                    label_concat = np.concatenate([np.asarray(label_top).reshape(1,-1), np.asarray(label_mid).reshape(1,-1), np.asarray(label_bot).reshape(1,-1)], axis=0).reshape(3,-1)
                    pred_concat = np.concatenate([np.asarray(pred_top_str).reshape(1,-1), np.asarray(pred_mid_str).reshape(1,-1), np.asarray(pred_bot_str).reshape(1,-1)], axis=0).reshape(3,-1)
                    
                    for i in random_idx:
                        label_sample = label_concat[:, i]
                        pred_sample = pred_concat[:, i]

                        gt_str = utils.str_combine(label_sample[0], label_sample[1], label_sample[2])
                        pred_str = utils.str_combine(pred_sample[0], pred_sample[1], pred_sample[2])
                        predicted_result_log += f'{gt_str:25s} | {pred_str:25s} | \t{str(pred_str == gt_str)}\n'
                    predicted_result_log += f'{dashed_line}'
                    print(predicted_result_log)
                    log.write(predicted_result_log+'\n')
                    
                # Stochastic weight averaging
#                 optimizer.update_swa()
#                 swa_count+=1
#                 if swa_count % 3 ==0:
#                     optimizer.swap_swa_sgd()
#                     torch.save(model.module.state_dict(), f'./models/{opt.experiment_name}/swa_{swa_count}.pth')

        if (n_epoch) % 5 ==0:
            torch.save(model.module.state_dict(), f'./models/{opt.experiment_name}/{n_epoch}.pth')

In [None]:
os.makedirs(f'./models/{opt.experiment_name}', exist_ok=True)

# set seed
random.seed(opt.manualSeed)
np.random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)
torch.cuda.manual_seed(opt.manualSeed)

# set GPU
cudnn.benchmark = True
cudnn.deterministic = True
opt.num_gpu = torch.cuda.device_count()

# if opt.num_gpu > 1:
#     print('-------- Use multi GPU setting --------')
#     opt.workers = opt.workers * opt.num_gpu
#     opt.batch_size = opt.batch_size * opt.num_gpu

train(opt)

model parameters. height 64, width 320, num of fiducial 20, input channel 3, output channel 512, hidden size 256,     batch max length 23
Skip Trans.LocalizationNetwork.localization_fc2.weight as it is already initializaed
Skip Trans.LocalizationNetwork.localization_fc2.bias as it is already initializaed
looking for pretrained model from ./models/RobustScanner_nchar_1106/1/best_accuracy_83.53.pth
loading complete 
Tranable params :  55712128


-----------

In [5]:
def train(rank, opt, model, device):
#     model, 
#     torch.manual_seed(opt.manualSeed + rank)
    print('이거는됨?')
#     train_loader = DataLoader(dataset, **dataloader_kwargs)
    
#     # filter that only require gradient descent
#     filtered_parameters = []
#     params_num = []
#     for p in filter(lambda p : p.requires_grad, model.parameters()):
#         filtered_parameters.append(p)
#         params_num.append(np.prod(p.size()))

#     # optimizer
#     optimizer = optim.Adadelta(filtered_parameters, lr= opt.lr, rho = opt.rho, eps = opt.eps)
    
#     # loss
#     criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) #ignore [GO] token = ignore index 0
#     log_avg = utils.Averager()
    
#     # label encoder
#     top_converter = utils.AttnLabelConverter(opt.top_char, device)
#     middle_converter = utils.AttnLabelConverter(opt.middle_char, device)
#     bottom_converter = utils.AttnLabelConverter(opt.bottom_char, device)
    
#     for epoch in range(1, opt.num_epoch + 1):
#         model.train()
#         pid = os.getpid()
#         for idx, (image_tensors, top, mid, bot) in enumerate(train_loader):
#             optimizer.zero_grad()
            
#             image = image_tensors.to(device)
#             text_top, length_top = top_converter.encode(top, batch_max_length = opt.batch_max_length)
#             text_mid, length_mid = middle_converter.encode(mid, batch_max_length = opt.batch_max_length)
#             text_bot, length_bot = bottom_converter.encode(bot, batch_max_length = opt.batch_max_length)
#             batch_size = image.size(0)
          
#             pred_top, pred_mid, pred_bot = model(image, text_top[:,:-1], text_mid[:,:-1], text_bot[:,:-1])
            
#             cost_top = criterion(pred_top.view(-1, pred_top.shape[-1]), text_top[:, 1:].contiguous().view(-1))
#             cost_mid = criterion(pred_mid.view(-1, pred_mid.shape[-1]), text_mid[:, 1:].contiguous().view(-1))
#             cost_bot = criterion(pred_bot.view(-1, pred_bot.shape[-1]), text_bot[:, 1:].contiguous().view(-1))
            
#             cost = cost_top + cost_mid + cost_bot

#             loss_avg = utils.Averager()
#             loss_avg.add(cost)
            
#             model.zero_grad()
#             cost.backward()
#             torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) #gradient clipping with 5
#             optimizer.step()
            
#             if batch_idx % 5 == 0:
#                 print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#                 pid, epoch, idx * len(image_tensors), len(data_loader.dataset),
#                 100. * idx / len(data_loader), loss_avg.val()))

In [None]:
# def test(opt, model, device, dataset, dataloader_kwargs):
#     torch.manual_seed(opt.manualSeed)
#     test_loader = DataLoader(dataset, **dataloader_kwargs)
    
#     model.eval()
    

In [7]:
# with open('./dataset_180', 'rb') as file:
#     data = pickle.load(file)

# train_data = data[ : int(len(data) * 0.998)]
# test_data = data[ int(len(data) * 0.998): ]

In [6]:
each_length = len(train_data)// opt.num_processes
train_split = []
for prc_idx in range(opt.num_processes):
    splited_data = train_data[prc_idx * each_length : (prc_idx+1) * each_length]
    train_split.append(splited_data)

In [6]:
torch.manual_seed(opt.manualSeed)
dataloader_kwargs = {'batch_size' : opt.batch_size, 'pin_memory' : True, 'drop_last':True, 'num_workers' : 1}
mp.set_start_method('spawn')

In [7]:
model = BaseModel.model(opt, device)
# load pretrained model
if opt.saved_model != '':
    base_path = './models'
    print(f'looking for pretrained model from {os.path.join(base_path, opt.saved_model)}')

    try :
        model.load_state_dict(torch.load(os.path.join(base_path, opt.saved_model)))
        print('loading complete ')    
    except Exception as e:
        print(e)
        print('coud not find model')

#data parallel for multi GPU
# model = torch.nn.DataParallel(model, device_ids=[0,1,2,3]).to(device)
model = model.to(device)
_ = model.share_memory() 

looking for pretrained model from ./models/RobustScanner_1105/1/best_accuracy_69.23.pth
loading complete 


In [8]:
processes = []
for rank in range(opt.num_processes):
#     dataset = train_split[rank]
    p = mp.Process(target= train, args = ( rank, opt, model , device ) ) # model, 
#     p = mp.Process(target= train2, )
    p.start()
    print('start!')
    processes.append(p)
for p in processes:
    p.join()

KeyboardInterrupt: 

--------