In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from models import *
from collections import namedtuple

import tokenization
import models
import optim
import train
from utils import set_seeds, get_device, truncate_tokens_pair
from classify_hardware import dataset_class,Tokenizing,AddSpecialTokensWithTruncation,TokenIndexing,Classifier

In [2]:
task='mrpc'

train_cfg= f'config/train_mrpc.json'
model_cfg='config/bert_base.json'
if task == 'mnli':
    data_file=f'./GLUE_DATA/{task.upper()}/dev_mismatched.tsv'
else:
    train_data_file = f'./GLUE_DATA/{task.upper()}/train.tsv'
    test_data_file=f'./GLUE_DATA/{task.upper()}/dev.tsv'
if task == 'mnli':
    model_file='./SAVE/model_steps_36818.pt'
else:
    model_file='./SAVE/model_steps_345.pt'
pretrain_file='../uncased_L-12_H-768_A-12/bert_model.ckpt'
data_parallel=False
vocab='./PRE_TRAINED_MODEL/vocab.txt'
save_dir=f'../../exp/bert/{task}'
max_len=128
mode='eval'
thres = 0.0
prun = True
quant = True
early_stop = False


cfg = train.Config.from_json(train_cfg)
model_cfg = models.Config.from_json(model_cfg)
set_seeds(cfg.seed)

tokenizer = tokenization.FullTokenizer(vocab_file=vocab, do_lower_case=True)
TaskDataset = dataset_class(task) # task dataset class according to the task
pipeline = [Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
            AddSpecialTokensWithTruncation(max_len),
            TokenIndexing(tokenizer.convert_tokens_to_ids,
                            TaskDataset.labels, max_len)]

train_dataset = TaskDataset(train_data_file, pipeline)
test_dataset = TaskDataset(test_data_file, pipeline)
train_data_iter = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True)
test_data_iter = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=True)

model = Classifier(model_cfg, len(TaskDataset.labels), thres, prun, quant, early_stop).cuda()
criterion = nn.CrossEntropyLoss()
def get_loss(model, batch, global_step): # make sure loss is a scalar tensor
            input_ids, segment_ids, input_mask, label_id = batch
            logits, var = model(input_ids, segment_ids, input_mask)
            # print('original loss: ', criterion(logits, label_id))
            # print((1 / model.var).sum())
            # loss = criterion(logits, label_id)
            # var.retain_grad()
            loss = criterion(logits, label_id) + 5e-1 * (1 / var.mean())
            # loss = 1e4 * (1 / var.mean())
            # var.detach()
            return loss

def evaluate(model, batch):
    input_ids, segment_ids, input_mask, label_id = batch
    (logits, var), hist = model(input_ids, segment_ids, input_mask), model.hist
    sparsity.append(hist)
    _, label_pred = logits.max(1)
    result = (label_pred == label_id).float() #.cpu().numpy()
    accuracy = result.mean()
    return accuracy, result

In [3]:
sparsity = []
trainer = train.Trainer(cfg,model,train_data_iter, test_data_iter, optim.optim4GPU(cfg, model),save_dir, get_device())
trained_model = trainer.train(get_loss, model_file = model_file, pretrain_file=None, data_parallel = data_parallel)
# print('testing...')
if not os.path.isdir(save_dir+'/best_model'):
        os.makedirs(save_dir+'/best_model')
print(os.path.join(save_dir+'/best_model'))
torch.save(trained_model.state_dict(), save_dir+'/best_model/best_model')
# results = trainer.eval(evaluate, model_file, data_parallel, model = None)
results = trainer.eval(evaluate, model_file, data_parallel, model = trained_model)
total_accuracy = torch.cat(results).mean().item()
print('Accuracy:', total_accuracy)
print('Sparsity:', np.mean(sparsity))

cuda (2 GPUs)
Loading the model from ./SAVE/model_steps_345.pt


FileNotFoundError: [Errno 2] No such file or directory: './SAVE/model_steps_345.pt'