# Train the model
Adapted from the original `main.py`. Intergrated with AWS SageMaker.

## Install dependencies
(actually only tqdm, since other packages are pre-installed in aws pytorch environments)

In [None]:
!pip install -r requirements.txt

In [None]:
from collections import defaultdict
import time
import json
import numpy as np
from random import choice
from tqdm import tqdm
import model
import torch
import torch.nn as nn
from torch.autograd import Variable
#import data_prepare
import os
import torch.utils.data as Data
import torch.nn.functional as F


Define a tensorboard logger

In [11]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir='./logs')

In [12]:
# os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3,4,5,6,7"

In [13]:
# for macOS compatibility
#os.environ['KMP_DUPLICATE_LIB_OK']='True'

torch.backends.cudnn.benchmark = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

CHAR_SIZE = 128
SENT_LENGTH = 4
HIDDEN_SIZE = 64
EPOCH_NUM = 400
BATCH_SIZE = 5096

In [14]:
device

device(type='cuda')

Helper functions:

In [15]:
def get_now_time():
    a = time.time()
    return time.ctime(a)


def seq_padding(X):
    L = [len(x) for x in X]
    ML = max(L)
    # print("ML",ML)
    return [x + [0] * (ML - len(x)) for x in X]


def seq_padding_vec(X):
    L = [len(x) for x in X]
    ML = max(L)
    # print("ML",ML)
    return [x + [[1, 0]] * (ML - len(x)) for x in X]

In [16]:
class DataGenerator:
    def __init__(self, data, batch_size=64):
        self.data = data
        self.batch_size = batch_size
        self.steps = len(self.data) // self.batch_size
        if len(self.data) % self.batch_size != 0:
            self.steps += 1

    def __len__(self):
        return self.steps

    def pro_res(self):
        idxs = list(range(len(self.data)))
        # print(idxs)
        np.random.shuffle(idxs)
        T, S1, S2, K1, K2, O1, O2, = [], [], [], [], [], [], []
        for i in idxs:
            d = self.data[i]
            text = d['text']
            items = {}
            items = defaultdict(list)
            for sp in d['spo_list']:
                subjectid = text.find(sp[0])
                objectid = text.find(sp[2])
                if subjectid != -1 and objectid != -1:
                    key = (subjectid, subjectid+len(sp[0])) # key is the span(start, end) of the subject
                    # items is {(S_start, S_end): list of (O_start_pos, O_end_pos, predicate_id)}
                    items[key].append(
                        (objectid, objectid+len(sp[2]), predicate2id[sp[1]]))
            if items:
                # T is list of text tokens(ids)
                T.append([char2id.get(c, 1) for c in text])  # 1是unk，0是padding
         
                # s1: one-hot vector where start of subject is 1
                # s2: one-hot vector where end of subject is 1
                s1, s2 = [0] * len(text), [0] * len(text)
                for j in items:
                    s1[j[0]] = 1
                    s2[j[1]-1] = 1
                # print(items.keys())
                # k1, k2: randomly sampled (S_start, S_end) pair?
                k1, k2 = choice(list(items.keys()))
                # o1: zero vector, the start of each O is marked with its predicate ID
                # o2: zero vector, the end of each O is marked with its predicate ID
                o1, o2 = [0] * len(text), [0] * len(text)  # 0是unk类（共49+1个类）
                for j in items[(k1, k2)]:
                    o1[j[0]] = j[2]
                    o2[j[1]-1] = j[2]
                S1.append(s1)
                S2.append(s2)
                K1.append([k1])
                K2.append([k2-1])
                O1.append(o1)
                O2.append(o2)

        T = np.array(seq_padding(T))
        S1 = np.array(seq_padding(S1))
        S2 = np.array(seq_padding(S2))
        O1 = np.array(seq_padding(O1))
        O2 = np.array(seq_padding(O2))
        K1, K2 = np.array(K1), np.array(K2)
        return [T, S1, S2, K1, K2, O1, O2]


class MyDataset(Data.Dataset):
    """
        下载数据、初始化数据，都可以在这里完成
    """

    def __init__(self, _T, _S1, _S2, _K1, _K2, _O1, _O2):
        # xy = np.loadtxt('../dataSet/diabetes.csv.gz', delimiter=',', dtype=np.float32) # 使用numpy读取数据
        self.x_data = _T
        self.y1_data = _S1
        self.y2_data = _S2
        self.k1_data = _K1
        self.k2_data = _K2
        self.o1_data = _O1
        self.o2_data = _O2
        self.len = len(self.x_data)

    def __getitem__(self, index):
        return self.x_data[index], self.y1_data[index], self.y2_data[index], self.k1_data[index], self.k2_data[index], self.o1_data[index], self.o2_data[index]

    def __len__(self):
        return self.len


def collate_fn(data):
    t = np.array([item[0] for item in data], np.int32)
    s1 = np.array([item[1] for item in data], np.int32)
    s2 = np.array([item[2] for item in data], np.int32)
    k1 = np.array([item[3] for item in data], np.int32)

    k2 = np.array([item[4] for item in data], np.int32)
    o1 = np.array([item[5] for item in data], np.int32)
    o2 = np.array([item[6] for item in data], np.int32)
    return {
        'T': torch.LongTensor(t),  # targets_i
        'S1': torch.FloatTensor(s1),
        'S2': torch.FloatTensor(s2),
        'K1': torch.LongTensor(k1),
        'K2': torch.LongTensor(k2),
        'O1': torch.LongTensor(o1),
        'O2': torch.LongTensor(o2),
    }

In [17]:
def extract_items(text_in, s_m, po_m):
    R = []
    _s = [char2id.get(c, 1) for c in text_in]
    _s = np.array([_s])
    _k1, _k2, t, t_max, mask = s_m(torch.LongTensor(_s).to(device))
    _k1, _k2 = _k1[0, :, 0], _k2[0, :, 0]
    _kk1s = []
    for i, _kk1 in enumerate(_k1):
        if _kk1 > 0.5:
            _subject = ''
            for j, _kk2 in enumerate(_k2[i:]):
                if _kk2 > 0.5:
                    _subject = text_in[i: i+j+1]
                    break
            if _subject:
                _k1, _k2 = torch.LongTensor([[i]]), torch.LongTensor(
                    [[i+j]])  # np.array([i]), np.array([i+j])
                _o1, _o2 = po_m(t.to(device), t_max.to(
                    device), _k1.to(device), _k2.to(device))
                _o1, _o2 = _o1.cpu().data.numpy(), _o2.cpu().data.numpy()

                _o1, _o2 = np.argmax(_o1[0], 1), np.argmax(_o2[0], 1)

                for i, _oo1 in enumerate(_o1):
                    if _oo1 > 0:
                        for j, _oo2 in enumerate(_o2[i:]):
                            if _oo2 == _oo1:
                                _object = text_in[i: i+j+1]
                                _predicate = id2predicate[_oo1]
                                # print((_subject, _predicate, _object))
                                R.append((_subject, _predicate, _object))
                                break
        _kk1s.append(_kk1.data.cpu().numpy())
    _kk1s = np.array(_kk1s)
    return list(set(R))

def para_extract_items(loader_res):
    t_s = loader_res["T"].to(device)
    k1 = loader_res["K1"].to(device)
    k2 = loader_res["K2"].to(device)
    s1 = loader_res["S1"].to(device)
    s2 = loader_res["S2"].to(device)
    o1 = loader_res["O1"].to(device)
    o2 = loader_res["O2"].to(device)

    ps_1, ps_2, t, t_max, mask = s_m(t_s)

    t, t_max, k1, k2 = t.to(device), t_max.to(
        device), k1.to(device), k2.to(device)
    po_1, po_2 = po_m(t, t_max, k1, k2)

    ps_1 = ps_1.to(device)
    ps_2 = ps_2.to(device)
    po_1 = po_1.to(device)
    po_2 = po_2.to(device)

    s1 = torch.unsqueeze(s1, 2)
    s2 = torch.unsqueeze(s2, 2)
    
    
    
            
def para_evaluate():
    A, B, C = 1e-10, 1e-10, 1e-10
    cnt = 0
    s_m.eval()
    po_m.eval()
    with torch.no_grad():
        for step, loader_res in tqdm(iter(enumerate(loader))):
            R = set(para_extract_items(loader_res))
            T = None
            A += len(R & T)
            B += len(R)
            C += len(T)
            cnt += 1
    return 2 * A / (B + C), A / B, A / C
    

def evaluate(s_m, po_m, dev_data):
    A, B, C = 1e-10, 1e-10, 1e-10
    cnt = 0
    s_m.eval()
    po_m.eval()
    for d in tqdm(iter(dev_data)):
        if cnt > 1000:
            break
        R = set(extract_items(d['text'], s_m, po_m))
        T = set([tuple(i) for i in d['spo_list']])
        A += len(R & T)
        B += len(R)
        C += len(T)
        # if cnt % 1000 == 0:
        #     print('iter: %d f1: %.4f, precision: %.4f, recall: %.4f\n' % (cnt, 2 * A / (B + C), A / B, A / C))
        cnt += 1
    return 2 * A / (B + C), A / B, A / C


# Download training data
Skip the downloading step if you have alreay done it.

In [18]:
#!wget https://dataset-bj.cdn.bcebos.com/qianyan/DuIE_2_0.zip

In [19]:
#!unzip -j DuIE_2_0.zip -d data

Transofm raw data to easier usable format

In [20]:
# !mkdir generated
# !python trans.py

## Load training data

In [None]:
train_path = 'generated/train_data_me.json'
dev_path = 'generated/dev_data_me.json'
generated_schema_path =  'generated/schemas_me.json'
generated_char_path = 'generated/all_chars_me.json'
train_data = json.load(open(train_path))
dev_data = json.load(open(dev_path))
id2predicate, predicate2id = json.load(open(generated_schema_path))
id2predicate = {int(i): j for i, j in id2predicate.items()}
id2char, char2id = json.load(open(generated_char_path))
num_classes = len(id2predicate)

In [None]:
dg = DataGenerator(train_data)
T, S1, S2, K1, K2, O1, O2 = dg.pro_res()
# print("len",len(T))

torch_dataset = MyDataset(T, S1, S2, K1, K2, O1, O2)

In [None]:
loader = Data.DataLoader(
    dataset=torch_dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=64,
    collate_fn=collate_fn,      # subprocesses for loading data
)

In [None]:
dev_dg = DataGenerator(dev_data)
T_dev, S1_dev, S2_dev, K1_dev, K2_dev, O1_dev, O2_dev = dev_dg.pro_res()
dev_dataset = MyDataset(T_dev, S1_dev, S2_dev, K1_dev, K2_dev, O1_dev, O2_dev)
dev_loader = Data.DataLoader(
    dataset=dev_dataset,      # torch TensorDataset format
    batch_size=BATCH_SIZE,      # mini batch size
    shuffle=True,               # random shuffle for training
    num_workers=64,
    collate_fn=collate_fn,      # subprocesses for loading data
)

### Define model and loss
Data are parallimised  to multiple GPUs

In [None]:
s_m = model.s_model(len(char2id)+2, CHAR_SIZE, HIDDEN_SIZE)
po_m = model.po_model(len(char2id)+2, CHAR_SIZE, HIDDEN_SIZE, 49)

if torch.cuda.device_count() > 1:
    print('Using', torch.cuda.device_count(), "GPUs!")
    s_m = nn.DataParallel(s_m)
    po_m = nn.DataParallel(po_m)

s_m = s_m.to(device)
po_m = po_m.to(device)

### Load model if needed
Uncomment lines below to load pre-trained model

In [None]:
def load_model(model_dir, epoch, device):
    s_m = torch.load(os.path.join(model_dir, "s_{}.pkl".format(epoch)), map_location=device)
    po_m = torch.load(os.path.join(model_dir, "po_{}.pkl".format(epoch)), map_location=device)
    # reload the model with DataParallel (this will 
    # be helpful when num of GPUs changes)
    s_m = nn.DataParallel(s_m.module)
    po_m = nn.DataParallel(po_m.module)
    return s_m, po_m

In [None]:
breakpoint_epoch = 210
model_dir = 'models_real'

In [None]:
s_m, po_m = load_model(model_dir, breakpoint_epoch, device)

### Define loss metrics

**Run this after reloading the model and before training**.

In [44]:
params = list(s_m.parameters())
params += list(po_m.parameters())
optimizer = torch.optim.Adam(params, lr=0.001)

loss = torch.nn.CrossEntropyLoss().to(device)
b_loss = torch.nn.BCEWithLogitsLoss().to(device)

## Training

In [None]:
best_f1 = 0
best_epoch = 0

In [None]:
starting_epoch = 210

# try:
#     breakpoint_epoch
# except NameError:
#     print("breakpoint epoch not defined, start training from epoch 0")
# else:
#     print("continue training from epoch", breakpoint_epoch)
#     starting_epoch = breakpoint_epoch



for i in range(starting_epoch, EPOCH_NUM):
    epoch_start_time = time.time()
    s_m.train()
    po_m.train()
    for step, loader_res in tqdm(iter(enumerate(loader))):
        # print(get_now_time())
        t_s = loader_res["T"].to(device)
        k1 = loader_res["K1"].to(device)
        k2 = loader_res["K2"].to(device)
        s1 = loader_res["S1"].to(device)
        s2 = loader_res["S2"].to(device)
        o1 = loader_res["O1"].to(device)
        o2 = loader_res["O2"].to(device)

        ps_1, ps_2, t, t_max, mask = s_m(t_s)

        t, t_max, k1, k2 = t.to(device), t_max.to(
            device), k1.to(device), k2.to(device)
        po_1, po_2 = po_m(t, t_max, k1, k2)

        ps_1 = ps_1.to(device)
        ps_2 = ps_2.to(device)
        po_1 = po_1.to(device)
        po_2 = po_2.to(device)

        s1 = torch.unsqueeze(s1, 2)
        s2 = torch.unsqueeze(s2, 2)

        s1_loss = b_loss(ps_1, s1)
        s1_loss = torch.sum(s1_loss.mul(mask))/torch.sum(mask)
        s2_loss = b_loss(ps_2, s2)
        s2_loss = torch.sum(s2_loss.mul(mask))/torch.sum(mask)

        po_1 = po_1.permute(0, 2, 1)
        po_2 = po_2.permute(0, 2, 1)

        o1_loss = loss(po_1, o1)
        o1_loss = torch.sum(o1_loss.mul(mask[:, :, 0])) / torch.sum(mask)
        o2_loss = loss(po_2, o2)
        o2_loss = torch.sum(o2_loss.mul(mask[:, :, 0])) / torch.sum(mask)

        loss_sum = 2.5 * (s1_loss + s2_loss) + (o1_loss + o2_loss)

        # if step % 500 == 0:
        # 	torch.save(s_m, 'models_real/s_'+str(step)+"epoch_"+str(i)+'.pkl')
        # 	torch.save(po_m, 'models_real/po_'+str(step)+"epoch_"+str(i)+'.pkl')

        optimizer.zero_grad()

        loss_sum.backward()
        optimizer.step()


    torch.save(s_m, 'models_real/s_'+str(i)+'.pkl')
    torch.save(po_m, 'models_real/po_'+str(i)+'.pkl')
    f1, precision, recall = evaluate(s_m, po_m, dev_data)

    print("epoch:", i, "loss:", loss_sum.data)
    epoch_end_time = time.time()
    epoch_time_elapsed = epoch_end_time - epoch_start_time
    print("epoch {} used {} seconds (with bsz={})".format(i, epoch_time_elapsed, BATCH_SIZE))
    writer.add_scalar('Loss/train', loss_sum.data, i)
    writer.add_scalar('f1', f1, i)
    writer.add_scalar('precision', precision, i)
    writer.add_scalar('recall', recall, i)

    if f1 >= best_f1:
        best_f1 = f1
        best_epoch = i

    print('f1: %.4f, precision: %.4f, recall: %.4f, bestf1: %.4f, bestepoch: %d \n ' % (
        f1, precision, recall, best_f1, best_epoch))

writer.flush()

34it [02:15,  3.98s/it]
1001it [00:16, 59.86it/s]

epoch: 210 loss: tensor(0.0265, device='cuda:0')
epoch 210 used 155.96729707717896 seconds (with bsz=5096)
f1: 0.5974, precision: 0.7745, recall: 0.4862, bestf1: 0.5974, bestepoch: 210 
 



34it [02:15,  3.98s/it]
1001it [00:16, 61.88it/s]

epoch: 211 loss: tensor(0.0265, device='cuda:0')
epoch 211 used 155.5171353816986 seconds (with bsz=5096)
f1: 0.5858, precision: 0.7856, recall: 0.4670, bestf1: 0.5974, bestepoch: 210 
 



10it [00:40,  4.03s/it]Traceback (most recent call last):
  File "/home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/multiprocessing/queues.py", line 230, in _feed
    close()
  File "/home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/home/ec2-user/anaconda3/envs/pytorch_latest_p36/lib/python3.6/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
34it [02:15,  3.99s/it]
1001it [00:16, 61.54it/s]

epoch: 212 loss: tensor(0.0257, device='cuda:0')
epoch 212 used 155.75757551193237 seconds (with bsz=5096)
f1: 0.5880, precision: 0.7826, recall: 0.4710, bestf1: 0.5974, bestepoch: 210 
 



34it [02:15,  3.98s/it]
1001it [00:16, 61.86it/s]

epoch: 213 loss: tensor(0.0262, device='cuda:0')
epoch 213 used 155.47865533828735 seconds (with bsz=5096)
f1: 0.5910, precision: 0.7761, recall: 0.4772, bestf1: 0.5974, bestepoch: 210 
 



34it [02:15,  3.99s/it]
1001it [00:16, 60.17it/s]

epoch: 214 loss: tensor(0.0254, device='cuda:0')
epoch 214 used 156.18386721611023 seconds (with bsz=5096)
f1: 0.5930, precision: 0.7684, recall: 0.4828, bestf1: 0.5974, bestepoch: 210 
 



34it [02:16,  4.00s/it]
1001it [00:16, 61.04it/s]

epoch: 215 loss: tensor(0.0268, device='cuda:0')
epoch 215 used 156.4516041278839 seconds (with bsz=5096)
f1: 0.5883, precision: 0.7670, recall: 0.4772, bestf1: 0.5974, bestepoch: 210 
 



34it [02:15,  3.99s/it]
1001it [00:16, 60.13it/s]

epoch: 216 loss: tensor(0.0266, device='cuda:0')
epoch 216 used 156.2406644821167 seconds (with bsz=5096)
f1: 0.5758, precision: 0.7671, recall: 0.4608, bestf1: 0.5974, bestepoch: 210 
 



11it [00:44,  4.03s/it]

In [19]:
writer.close()

## Test the trained model on some texts
Extract plain model from Dataparalell

In [None]:
s_m = s_m.module
po_m = po_m.module

In [32]:
to_print = 20
for cnt, d in enumerate(dev_data):
    if cnt > to_print:
        break
    print('Text: ', d['text'])
    print('Predicted SPOs: ', extract_items(d['text'], s_m, po_m))
    print('Ground Truth SPOs: ', d['spo_list'])

Text:  《步步惊心》改编自著名作家桐华的同名清穿小说《甄嬛传》改编自流潋紫所著的同名小说电视剧《何以笙箫默》改编自顾漫同名小说《花千骨》改编自fresh果果同名小说《裸婚时代》是月影兰析创作的一部情感小说《琅琊榜》是根据海宴同名网络小说改编电视剧《宫锁心玉》，又名《宫》《雪豹》，该剧改编自网络小说《特战先驱》《我是特种兵》由红遍网络的小说《最后一颗子弹留给我》改编电视剧《来不及说我爱你》改编自匪我思存同名小说《来不及说我爱你》
Predicted SPOs:  [('步步惊心', '作者', '桐华'), ('步步惊心', '改编自', '来不及说我爱你'), ('步步惊心', '改编自', '步步惊心》改编自著名作家桐华的同名清穿小说《甄嬛传'), ('步步惊心', '改编自', '特战先驱'), ('步步惊心', '改编自', '花千骨'), ('步步惊心', '改编自', '何以笙箫默'), ('步步惊心', '改编自', '最后一颗子弹留给我'), ('步步惊心', '改编自', '裸婚时代'), ('步步惊心', '改编自', '甄嬛传')]
Ground Truth SPOs:  [['何以笙箫默', '作者', '顾漫'], ['我是特种兵', '改编自', '最后一颗子弹留给我'], ['步步惊心', '作者', '桐华'], ['甄嬛传', '作者', '流潋紫'], ['花千骨', '作者', 'fresh果果'], ['裸婚时代', '作者', '月影兰析'], ['琅琊榜', '作者', '海宴'], ['雪豹', '改编自', '特战先驱'], ['来不及说我爱你', '改编自', '来不及说我爱你'], ['来不及说我爱你', '作者', '匪我思存']]
Text:  摩尔多瓦共和国（摩尔多瓦语：Republica Moldova，英语：Republic of Moldova），简称摩尔多瓦，是位于东南欧的内陆国，与罗马尼亚和乌克兰接壤，首都基希讷乌
Predicted SPOs:  []
Ground Truth SPOs:  [['摩尔多瓦', '首都', '基希讷乌']]
Text:  2月19日，96岁的资深演员侯焕玲离世，候婆婆一生未嫁，但一直热爱电影，她曾在《回魂夜》和《喜剧之王》等电影饰演婆婆一角，而临终前候婆婆一直

In [None]:
# visualize the model
writer.add_graph(s_m, torch.from_numpy(x).float())
writer.add_graph(po_m, torch.fr)

# Find the best model saved
First iterate trough 0 to 195

In [36]:
best_epoch = 0
best_f1 = 0
for e in range(0, 200, 5):
    s_m, po_m = load_model('models_real', e, device)
    f1, precision, recall = evaluate(s_m, po_m, dev_data)
    if f1 > best_f1:
        best_f1 = f1
        best_epoch = e
    print('Epoch %d: f1: %.4f, precision: %.4f, recall: %.4f, bestf1: %.4f, bestepoch: %d \n ' % (
        e, f1, precision, recall, best_f1, best_epoch))
    writer.

1001it [00:15, 62.95it/s]


Epoch 0: f1: 0.1442, precision: 0.5526, recall: 0.0829, bestf1: 0.1442, bestepoch: 0 
 


1001it [00:15, 62.82it/s]
0it [00:00, ?it/s]

Epoch 5: f1: 0.1995, precision: 0.6325, recall: 0.1184, bestf1: 0.1995, bestepoch: 5 
 


1001it [00:15, 65.41it/s]


Epoch 10: f1: 0.2649, precision: 0.7066, recall: 0.1630, bestf1: 0.2649, bestepoch: 10 
 


1001it [00:15, 65.30it/s]
6it [00:00, 53.04it/s]

Epoch 15: f1: 0.3178, precision: 0.7458, recall: 0.2019, bestf1: 0.3178, bestepoch: 15 
 


1001it [00:15, 65.18it/s]
6it [00:00, 53.24it/s]

Epoch 20: f1: 0.3289, precision: 0.7396, recall: 0.2115, bestf1: 0.3289, bestepoch: 20 
 


1001it [00:15, 62.86it/s]
0it [00:00, ?it/s]

Epoch 25: f1: 0.3539, precision: 0.7473, recall: 0.2318, bestf1: 0.3539, bestepoch: 25 
 


1001it [00:15, 64.50it/s]
5it [00:00, 48.76it/s]

Epoch 30: f1: 0.3743, precision: 0.7355, recall: 0.2510, bestf1: 0.3743, bestepoch: 30 
 


1001it [00:15, 64.52it/s]
0it [00:00, ?it/s]

Epoch 35: f1: 0.3955, precision: 0.7465, recall: 0.2690, bestf1: 0.3955, bestepoch: 35 
 


1001it [00:15, 64.67it/s]
0it [00:00, ?it/s]

Epoch 40: f1: 0.4160, precision: 0.7754, recall: 0.2843, bestf1: 0.4160, bestepoch: 40 
 


1001it [00:16, 62.46it/s]
0it [00:00, ?it/s]

Epoch 45: f1: 0.4284, precision: 0.7861, recall: 0.2944, bestf1: 0.4284, bestepoch: 45 
 


1001it [00:15, 64.17it/s]
0it [00:00, ?it/s]

Epoch 50: f1: 0.4493, precision: 0.7778, recall: 0.3158, bestf1: 0.4493, bestepoch: 50 
 


1001it [00:15, 63.81it/s]
0it [00:00, ?it/s]

Epoch 55: f1: 0.4614, precision: 0.7956, recall: 0.3249, bestf1: 0.4614, bestepoch: 55 
 


1001it [00:15, 63.40it/s]
0it [00:00, ?it/s]

Epoch 60: f1: 0.4884, precision: 0.8094, recall: 0.3497, bestf1: 0.4884, bestepoch: 60 
 


1001it [00:15, 63.79it/s]
5it [00:00, 49.10it/s]

Epoch 65: f1: 0.5078, precision: 0.8201, recall: 0.3677, bestf1: 0.5078, bestepoch: 65 
 


1001it [00:15, 63.24it/s]


Epoch 70: f1: 0.5264, precision: 0.8032, recall: 0.3914, bestf1: 0.5264, bestepoch: 70 
 


1001it [00:15, 63.77it/s]


Epoch 75: f1: 0.5417, precision: 0.8020, recall: 0.4089, bestf1: 0.5417, bestepoch: 75 
 


1001it [00:15, 62.87it/s]


Epoch 80: f1: 0.5337, precision: 0.8045, recall: 0.3993, bestf1: 0.5417, bestepoch: 75 
 


1001it [00:15, 63.22it/s]


Epoch 85: f1: 0.5639, precision: 0.7765, recall: 0.4428, bestf1: 0.5639, bestepoch: 85 
 


1001it [00:15, 63.11it/s]


Epoch 90: f1: 0.5586, precision: 0.8017, recall: 0.4287, bestf1: 0.5639, bestepoch: 85 
 


1001it [00:15, 63.43it/s]
0it [00:00, ?it/s]

Epoch 95: f1: 0.5663, precision: 0.7734, recall: 0.4467, bestf1: 0.5663, bestepoch: 95 
 


1001it [00:15, 63.45it/s]
0it [00:00, ?it/s]

Epoch 100: f1: 0.5567, precision: 0.7919, recall: 0.4292, bestf1: 0.5663, bestepoch: 95 
 


1001it [00:15, 63.44it/s]


Epoch 105: f1: 0.5590, precision: 0.7806, recall: 0.4354, bestf1: 0.5663, bestepoch: 95 
 


1001it [00:16, 59.83it/s]
0it [00:00, ?it/s]

Epoch 110: f1: 0.5738, precision: 0.7861, recall: 0.4518, bestf1: 0.5738, bestepoch: 110 
 


1001it [00:15, 62.64it/s]
0it [00:00, ?it/s]

Epoch 115: f1: 0.5554, precision: 0.7884, recall: 0.4287, bestf1: 0.5738, bestepoch: 110 
 


1001it [00:16, 62.24it/s]


Epoch 120: f1: 0.5620, precision: 0.7798, recall: 0.4394, bestf1: 0.5738, bestepoch: 110 
 


1001it [00:15, 62.85it/s]
0it [00:00, ?it/s]

Epoch 125: f1: 0.5695, precision: 0.7836, recall: 0.4473, bestf1: 0.5738, bestepoch: 110 
 


1001it [00:15, 62.95it/s]


Epoch 130: f1: 0.5842, precision: 0.7829, recall: 0.4659, bestf1: 0.5842, bestepoch: 130 
 


1001it [00:16, 60.96it/s]
0it [00:00, ?it/s]

Epoch 135: f1: 0.5767, precision: 0.7970, recall: 0.4518, bestf1: 0.5842, bestepoch: 130 
 


1001it [00:15, 62.70it/s]
0it [00:00, ?it/s]

Epoch 140: f1: 0.5844, precision: 0.7837, recall: 0.4659, bestf1: 0.5844, bestepoch: 140 
 


1001it [00:16, 60.32it/s]
0it [00:00, ?it/s]

Epoch 145: f1: 0.5848, precision: 0.7852, recall: 0.4659, bestf1: 0.5848, bestepoch: 145 
 


1001it [00:16, 62.49it/s]
5it [00:00, 46.93it/s]

Epoch 150: f1: 0.5922, precision: 0.7657, recall: 0.4828, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 60.47it/s]
0it [00:00, ?it/s]

Epoch 155: f1: 0.5802, precision: 0.7998, recall: 0.4552, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.25it/s]


Epoch 160: f1: 0.5904, precision: 0.7740, recall: 0.4772, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 60.32it/s]
0it [00:00, ?it/s]

Epoch 165: f1: 0.5750, precision: 0.7805, recall: 0.4552, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.44it/s]


Epoch 170: f1: 0.5806, precision: 0.7815, recall: 0.4619, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.53it/s]
0it [00:00, ?it/s]

Epoch 175: f1: 0.5780, precision: 0.7719, recall: 0.4619, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.25it/s]
0it [00:00, ?it/s]

Epoch 180: f1: 0.5848, precision: 0.7742, recall: 0.4698, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.19it/s]


Epoch 185: f1: 0.5910, precision: 0.7718, recall: 0.4788, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 61.34it/s]
5it [00:00, 47.15it/s]

Epoch 190: f1: 0.5855, precision: 0.7815, recall: 0.4681, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.25it/s]

Epoch 195: f1: 0.5795, precision: 0.7974, recall: 0.4552, bestf1: 0.5922, bestepoch: 150 
 





In [37]:
for e in range(196, 210):
    s_m, po_m = load_model('models_real', e, device)
    f1, precision, recall = evaluate(s_m, po_m, dev_data)
    if f1 > best_f1:
        best_f1 = f1
        best_epoch = e
    print('Epoch %d: f1: %.4f, precision: %.4f, recall: %.4f, bestf1: %.4f, bestepoch: %d \n ' % (
        e, f1, precision, recall, best_f1, best_epoch))
    writer.add_scalar('f1', f1, i)
    writer.add_scalar('precision', precision, i)
    writer.add_scalar('recall', recall, i)

1001it [00:16, 62.30it/s]
5it [00:00, 47.46it/s]

Epoch 196: f1: 0.5732, precision: 0.7907, recall: 0.4495, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.18it/s]
5it [00:00, 48.37it/s]

Epoch 197: f1: 0.5837, precision: 0.7861, recall: 0.4642, bestf1: 0.5922, bestepoch: 150 
 


1001it [00:16, 62.22it/s]
5it [00:00, 47.48it/s]

Epoch 198: f1: 0.5924, precision: 0.7824, recall: 0.4766, bestf1: 0.5924, bestepoch: 198 
 


1001it [00:16, 62.13it/s]
5it [00:00, 46.33it/s]

Epoch 199: f1: 0.5868, precision: 0.7738, recall: 0.4726, bestf1: 0.5924, bestepoch: 198 
 


1001it [00:16, 62.11it/s]
5it [00:00, 46.49it/s]

Epoch 200: f1: 0.5899, precision: 0.7738, recall: 0.4766, bestf1: 0.5924, bestepoch: 198 
 


1001it [00:16, 61.74it/s]
5it [00:00, 46.76it/s]

Epoch 201: f1: 0.5974, precision: 0.7788, recall: 0.4845, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 61.80it/s]
5it [00:00, 46.55it/s]

Epoch 202: f1: 0.5943, precision: 0.7757, recall: 0.4817, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 62.49it/s]
5it [00:00, 46.77it/s]

Epoch 203: f1: 0.5843, precision: 0.7804, recall: 0.4670, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 62.25it/s]
5it [00:00, 46.50it/s]

Epoch 204: f1: 0.5846, precision: 0.7766, recall: 0.4687, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 62.56it/s]
5it [00:00, 47.80it/s]

Epoch 205: f1: 0.5941, precision: 0.7793, recall: 0.4800, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 62.25it/s]
5it [00:00, 48.12it/s]

Epoch 206: f1: 0.5898, precision: 0.7707, recall: 0.4777, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 61.95it/s]
5it [00:00, 48.41it/s]

Epoch 207: f1: 0.5771, precision: 0.7868, recall: 0.4557, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 62.29it/s]
5it [00:00, 48.47it/s]

Epoch 208: f1: 0.5860, precision: 0.7830, recall: 0.4681, bestf1: 0.5974, bestepoch: 201 
 


1001it [00:16, 62.22it/s]

Epoch 209: f1: 0.5855, precision: 0.7751, recall: 0.4704, bestf1: 0.5974, bestepoch: 201 
 



