In [1]:
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import BertModel, BertTokenizer

# Implemented by myself
from config import *
from data_processer import CSCDataset, split_torch_dataset
from models import CombineBertModel, DecoderBaseRNN, DecoderTransformer, Trainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = BertTokenizer.from_pretrained(checkpoint)

In [3]:
train_dataset = CSCDataset([SIGHAN_train_dir_err, SIGHAN_train_dir_corr], tokenizer)

test_dataset = CSCDataset([SIGHAN_train_dir_err14, SIGHAN_train_dir_corr14], tokenizer)

preprocessing sighan dataset: 2339it [00:00, 900208.94it/s]
preprocessing sighan dataset: 100%|██████████| 2339/2339 [00:00<00:00, 1338948.69it/s]


共2339句，共73264字，最长的句子有171字


preprocessing sighan dataset: 3437it [00:00, 847889.83it/s]
preprocessing sighan dataset: 100%|██████████| 3437/3437 [00:00<00:00, 1356145.14it/s]

共3437句，共170330字，最长的句子有258字





In [4]:
train_data, dev_data = split_torch_dataset(train_dataset, 0.3)

train_data_loader = DataLoader(train_data, num_workers=4, shuffle=True, batch_size=16)

dev_data_loader = DataLoader(dev_data, num_workers=4, shuffle=True, batch_size=16)

test_data_loader = DataLoader(test_dataset, num_workers=4, shuffle=True, batch_size=32)

In [5]:
epochs = 35

# tokenizer = BertTokenizer.from_pretrained(checkpoint)
encoder_model = BertModel.from_pretrained(checkpoint)

# The Hyperparameters can be defined in config.py
hidden_size = 1024
num_layers = 2

decoder_model = DecoderBaseRNN(
    model=nn.LSTM,
    input_size=encoder_model.config.hidden_size,
    hidden_size=hidden_size,
    num_layers=num_layers,
)

model = CombineBertModel(encoder_model=encoder_model, decoder_model=decoder_model)

optimizer = AdamW(model.parameters(), lr=learning_rate)
trainer = Trainer(model=model, tokenizer=tokenizer, optimizer=optimizer)

In [6]:
trainer.train(
    dataloader=train_data_loader, epoch=epochs, test_dataloader=dev_data_loader
)
trainer.test(test_data_loader)

train Epoch:1/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=5.974]

Epoch 1 Loss: 7.091787435476062



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.95it/s, loss=5.753]

Epoch 1 Loss: 5.790225950154391
5.790225950154391 {'over_corr': 20517, 'total_err': 952, 'true_corr': tensor(0, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:2/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=5.533]

Epoch 2 Loss: 5.67191012854715



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.92it/s, loss=5.626]

Epoch 1 Loss: 5.654939033768394
5.654939033768394 {'over_corr': 20473, 'total_err': 952, 'true_corr': tensor(10, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:3/35: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=5.667]

Epoch 3 Loss: 5.609221476953007



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.98it/s, loss=5.765]

Epoch 1 Loss: 5.632788983258334
5.632788983258334 {'over_corr': 20341, 'total_err': 952, 'true_corr': tensor(1, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:4/35: 100%|██████████| 103/103 [00:28<00:00,  3.66it/s, loss=5.467]

Epoch 4 Loss: 5.5867605024171105



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.99it/s, loss=5.716]

Epoch 1 Loss: 5.623335859992287
5.623335859992287 {'over_corr': 20341, 'total_err': 952, 'true_corr': tensor(11, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:5/35: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=5.467]

Epoch 5 Loss: 5.575716041824193



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.99it/s, loss=5.562]

Epoch 1 Loss: 5.608856277032332
5.608856277032332 {'over_corr': 20157, 'total_err': 952, 'true_corr': tensor(2, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:6/35: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=5.523]

Epoch 6 Loss: 5.552440522943885



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.99it/s, loss=5.492]

Epoch 1 Loss: 5.568623055111278
5.568623055111278 {'over_corr': 19399, 'total_err': 952, 'true_corr': tensor(14, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:7/35: 100%|██████████| 103/103 [00:28<00:00,  3.65it/s, loss=5.435]

Epoch 7 Loss: 5.470109860873917



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.92it/s, loss=5.492]

Epoch 1 Loss: 5.421677123416554
5.421677123416554 {'over_corr': 18940, 'total_err': 952, 'true_corr': tensor(10, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:8/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=5.367]

Epoch 8 Loss: 5.299611906403476



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.91it/s, loss=5.447]

Epoch 1 Loss: 5.349788795817982
5.349788795817982 {'over_corr': 19511, 'total_err': 952, 'true_corr': tensor(0, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:9/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=4.999]

Epoch 9 Loss: 5.176911548503394



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.87it/s, loss=5.026]

Epoch 1 Loss: 5.127721320499074
5.127721320499074 {'over_corr': 19154, 'total_err': 952, 'true_corr': tensor(12, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:10/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=4.873]

Epoch 10 Loss: 4.986876534026804



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.90it/s, loss=4.883]

Epoch 1 Loss: 4.913593996654857
4.913593996654857 {'over_corr': 18402, 'total_err': 952, 'true_corr': tensor(18, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:11/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=4.797]

Epoch 11 Loss: 4.73960325555894



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.95it/s, loss=4.606]

Epoch 1 Loss: 4.660015127875588
4.660015127875588 {'over_corr': 17273, 'total_err': 952, 'true_corr': tensor(13, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:12/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=4.503]

Epoch 12 Loss: 4.507762348767623



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.88it/s, loss=4.495]

Epoch 1 Loss: 4.436561866240068
4.436561866240068 {'over_corr': 16739, 'total_err': 952, 'true_corr': tensor(23, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:13/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=4.289]

Epoch 13 Loss: 4.247316578059521



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.90it/s, loss=4.157]

Epoch 1 Loss: 4.145923630757765
4.145923630757765 {'over_corr': 15722, 'total_err': 952, 'true_corr': tensor(25, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:14/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=3.804]

Epoch 14 Loss: 3.955876051800922



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.90it/s, loss=3.711]

Epoch 1 Loss: 3.8640137260610405
3.8640137260610405 {'over_corr': 13986, 'total_err': 952, 'true_corr': tensor(39, device='cuda:0')} {'over_corr': 701, 'total_err': 701, 'true_corr': 0}



train Epoch:15/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=3.849]

Epoch 15 Loss: 3.6614417242772372



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.85it/s, loss=3.783]

Epoch 1 Loss: 3.5700910850004717
3.5700910850004717 {'over_corr': 12332, 'total_err': 952, 'true_corr': tensor(41, device='cuda:0')} {'over_corr': 700, 'total_err': 701, 'true_corr': 0}



train Epoch:16/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=3.305]

Epoch 16 Loss: 3.3476978899205774



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.85it/s, loss=2.955]

Epoch 1 Loss: 3.249348456209356
3.249348456209356 {'over_corr': 10833, 'total_err': 952, 'true_corr': tensor(61, device='cuda:0')} {'over_corr': 700, 'total_err': 701, 'true_corr': 0}



train Epoch:17/35: 100%|██████████| 103/103 [00:28<00:00,  3.61it/s, loss=3.310]

Epoch 17 Loss: 3.0431797874784006



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.88it/s, loss=2.924]

Epoch 1 Loss: 2.956985343586315
2.956985343586315 {'over_corr': 9588, 'total_err': 952, 'true_corr': tensor(84, device='cuda:0')} {'over_corr': 700, 'total_err': 701, 'true_corr': 0}



train Epoch:18/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=2.592]

Epoch 18 Loss: 2.7457571168547696



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.95it/s, loss=2.774]

Epoch 1 Loss: 2.67686667767438
2.67686667767438 {'over_corr': 8469, 'total_err': 952, 'true_corr': tensor(102, device='cuda:0')} {'over_corr': 700, 'total_err': 701, 'true_corr': 0}



train Epoch:19/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=2.262]

Epoch 19 Loss: 2.477522512084072



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.90it/s, loss=2.490]

Epoch 1 Loss: 2.427527492696589
2.427527492696589 {'over_corr': 7359, 'total_err': 952, 'true_corr': tensor(116, device='cuda:0')} {'over_corr': 700, 'total_err': 701, 'true_corr': 0}



train Epoch:20/35: 100%|██████████| 103/103 [00:28<00:00,  3.61it/s, loss=1.820]

Epoch 20 Loss: 2.2290879349106723



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.88it/s, loss=2.355]

Epoch 1 Loss: 2.197992506352338
2.197992506352338 {'over_corr': 6278, 'total_err': 952, 'true_corr': tensor(134, device='cuda:0')} {'over_corr': 695, 'total_err': 701, 'true_corr': 1}



train Epoch:21/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.836]

Epoch 21 Loss: 2.009399608500953



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.81it/s, loss=1.855]

Epoch 1 Loss: 1.9957235130396755
1.9957235130396755 {'over_corr': 5338, 'total_err': 952, 'true_corr': tensor(143, device='cuda:0')} {'over_corr': 688, 'total_err': 701, 'true_corr': 1}



train Epoch:22/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.720]

Epoch 22 Loss: 1.8122936302018398



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.84it/s, loss=1.738]

Epoch 1 Loss: 1.817115599458868
1.817115599458868 {'over_corr': 4692, 'total_err': 952, 'true_corr': tensor(165, device='cuda:0')} {'over_corr': 684, 'total_err': 701, 'true_corr': 2}



train Epoch:23/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.233]

Epoch 23 Loss: 1.637212137574131



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.90it/s, loss=2.146]

Epoch 1 Loss: 1.6610346192663366
1.6610346192663366 {'over_corr': 4085, 'total_err': 952, 'true_corr': tensor(181, device='cuda:0')} {'over_corr': 673, 'total_err': 701, 'true_corr': 3}



train Epoch:24/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.842]

Epoch 24 Loss: 1.4817193063717444



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.82it/s, loss=1.394]

Epoch 1 Loss: 1.5217626040632075
1.5217626040632075 {'over_corr': 3533, 'total_err': 952, 'true_corr': tensor(193, device='cuda:0')} {'over_corr': 660, 'total_err': 701, 'true_corr': 6}



train Epoch:25/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=1.087]

Epoch 25 Loss: 1.339822124508978



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.80it/s, loss=1.741]

Epoch 1 Loss: 1.401739545843818
1.401739545843818 {'over_corr': 3131, 'total_err': 952, 'true_corr': tensor(195, device='cuda:0')} {'over_corr': 650, 'total_err': 701, 'true_corr': 10}



train Epoch:26/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.811]

Epoch 26 Loss: 1.222990721174814



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.76it/s, loss=1.301]

Epoch 1 Loss: 1.2968732525001874
1.2968732525001874 {'over_corr': 2877, 'total_err': 952, 'true_corr': tensor(218, device='cuda:0')} {'over_corr': 640, 'total_err': 701, 'true_corr': 15}



train Epoch:27/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=1.297]

Epoch 27 Loss: 1.1269217995763983



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.77it/s, loss=0.981]

Epoch 1 Loss: 1.207281145182523
1.207281145182523 {'over_corr': 2559, 'total_err': 952, 'true_corr': tensor(231, device='cuda:0')} {'over_corr': 618, 'total_err': 701, 'true_corr': 19}



train Epoch:28/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.720]

Epoch 28 Loss: 1.014860153776928



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.75it/s, loss=1.236]

Epoch 1 Loss: 1.1263847283341668
1.1263847283341668 {'over_corr': 2287, 'total_err': 952, 'true_corr': tensor(240, device='cuda:0')} {'over_corr': 596, 'total_err': 701, 'true_corr': 26}



train Epoch:29/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.544]

Epoch 29 Loss: 0.9301591012084368



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.68it/s, loss=1.046]

Epoch 1 Loss: 1.0544014681469311
1.0544014681469311 {'over_corr': 2029, 'total_err': 952, 'true_corr': tensor(251, device='cuda:0')} {'over_corr': 580, 'total_err': 701, 'true_corr': 30}



train Epoch:30/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.747]

Epoch 30 Loss: 0.8519263336959394



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.72it/s, loss=1.212]

Epoch 1 Loss: 0.993639815937389
0.993639815937389 {'over_corr': 1817, 'total_err': 952, 'true_corr': tensor(269, device='cuda:0')} {'over_corr': 551, 'total_err': 701, 'true_corr': 41}



train Epoch:31/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.501]

Epoch 31 Loss: 0.7795204705405003



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.68it/s, loss=0.844]

Epoch 1 Loss: 0.9328018562360243
0.9328018562360243 {'over_corr': 1696, 'total_err': 952, 'true_corr': tensor(274, device='cuda:0')} {'over_corr': 533, 'total_err': 701, 'true_corr': 45}



train Epoch:32/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.451]

Epoch 32 Loss: 0.7157967964422356



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.63it/s, loss=0.963]

Epoch 1 Loss: 0.8869550783525814
0.8869550783525814 {'over_corr': 1557, 'total_err': 952, 'true_corr': tensor(288, device='cuda:0')} {'over_corr': 508, 'total_err': 701, 'true_corr': 60}



train Epoch:33/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.545]

Epoch 33 Loss: 0.6573651927767448



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.63it/s, loss=0.747]

Epoch 1 Loss: 0.8383112007921393
0.8383112007921393 {'over_corr': 1442, 'total_err': 952, 'true_corr': tensor(285, device='cuda:0')} {'over_corr': 493, 'total_err': 701, 'true_corr': 67}



train Epoch:34/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.199]

Epoch 34 Loss: 0.6031734813185572



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.61it/s, loss=1.125]

Epoch 1 Loss: 0.8023549792441454
0.8023549792441454 {'over_corr': 1373, 'total_err': 952, 'true_corr': tensor(302, device='cuda:0')} {'over_corr': 482, 'total_err': 701, 'true_corr': 73}



train Epoch:35/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.541]

Epoch 35 Loss: 0.5563845376945237



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.58it/s, loss=0.568]

Epoch 1 Loss: 0.7631391408768567
0.7631391408768567 {'over_corr': 1289, 'total_err': 952, 'true_corr': tensor(297, device='cuda:0')} {'over_corr': 468, 'total_err': 701, 'true_corr': 73}



dev Epoch:1/1: 100%|██████████| 108/108 [00:37<00:00,  2.91it/s, loss=0.766]

Epoch 1 Loss: 0.9268376799645247
0.9268376799645247 {'over_corr': 14167, 'total_err': 5278, 'true_corr': tensor(1205, device='cuda:0')} {'over_corr': 3004, 'total_err': 3436, 'true_corr': 107}





In [8]:
encoder_model = BertModel.from_pretrained(checkpoint)

nhead = 2
num_encoder_layers = 2
num_decoder_layers = 2

decoder_model = DecoderTransformer(
    input_size=encoder_model.config.hidden_size,
    nhead=nhead,
    num_encoder_layers=num_encoder_layers,
    num_decoder_layers=num_decoder_layers,
)

model = CombineBertModel(encoder_model=encoder_model, decoder_model=decoder_model)

In [9]:
trainer.train(
    dataloader=train_data_loader, epoch=epochs, test_dataloader=dev_data_loader
)
trainer.test(test_data_loader)

train Epoch:1/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.392]

Epoch 1 Loss: 0.512317058530826



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.47it/s, loss=0.735]

Epoch 1 Loss: 0.7329817278818651
0.7329817278818651 {'over_corr': 1204, 'total_err': 952, 'true_corr': tensor(312, device='cuda:0')} {'over_corr': 459, 'total_err': 701, 'true_corr': 84}



train Epoch:2/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.454]

Epoch 2 Loss: 0.4734466310264995



dev Epoch:1/1: 100%|██████████| 44/44 [00:07<00:00,  5.51it/s, loss=0.646]

Epoch 1 Loss: 0.7067901539531621
0.7067901539531621 {'over_corr': 1112, 'total_err': 952, 'true_corr': tensor(300, device='cuda:0')} {'over_corr': 442, 'total_err': 701, 'true_corr': 86}



train Epoch:3/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.434]

Epoch 3 Loss: 0.4360063301706777



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.44it/s, loss=0.640]

Epoch 1 Loss: 0.6767369637435133
0.6767369637435133 {'over_corr': 1033, 'total_err': 952, 'true_corr': tensor(310, device='cuda:0')} {'over_corr': 421, 'total_err': 701, 'true_corr': 98}



train Epoch:4/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.326]

Epoch 4 Loss: 0.40411875867149205



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.41it/s, loss=0.468]

Epoch 1 Loss: 0.659624996510419
0.659624996510419 {'over_corr': 1003, 'total_err': 952, 'true_corr': tensor(314, device='cuda:0')} {'over_corr': 405, 'total_err': 701, 'true_corr': 101}



train Epoch:5/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.256]

Epoch 5 Loss: 0.3751785225659898



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.42it/s, loss=0.610]

Epoch 1 Loss: 0.633555830879645
0.633555830879645 {'over_corr': 905, 'total_err': 952, 'true_corr': tensor(314, device='cuda:0')} {'over_corr': 383, 'total_err': 701, 'true_corr': 111}



train Epoch:6/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.471]

Epoch 6 Loss: 0.34918231075828515



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.37it/s, loss=0.827]

Epoch 1 Loss: 0.6147716512734239
0.6147716512734239 {'over_corr': 869, 'total_err': 952, 'true_corr': tensor(319, device='cuda:0')} {'over_corr': 372, 'total_err': 701, 'true_corr': 113}



train Epoch:7/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.291]

Epoch 7 Loss: 0.31950213912042597



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.36it/s, loss=0.507]

Epoch 1 Loss: 0.5991234962235797
0.5991234962235797 {'over_corr': 811, 'total_err': 952, 'true_corr': tensor(319, device='cuda:0')} {'over_corr': 362, 'total_err': 701, 'true_corr': 119}



train Epoch:8/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.227]

Epoch 8 Loss: 0.2975013040512511



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.27it/s, loss=0.472]

Epoch 1 Loss: 0.5883396593007174
0.5883396593007174 {'over_corr': 754, 'total_err': 952, 'true_corr': tensor(311, device='cuda:0')} {'over_corr': 344, 'total_err': 701, 'true_corr': 126}



train Epoch:9/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.133]

Epoch 9 Loss: 0.27505633307313454



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.26it/s, loss=0.528]

Epoch 1 Loss: 0.5746217180382122
0.5746217180382122 {'over_corr': 745, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 342, 'total_err': 701, 'true_corr': 122}



train Epoch:10/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.108]

Epoch 10 Loss: 0.25786475070471904



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.33it/s, loss=0.852]

Epoch 1 Loss: 0.5628205260092561
0.5628205260092561 {'over_corr': 671, 'total_err': 952, 'true_corr': tensor(298, device='cuda:0')} {'over_corr': 313, 'total_err': 701, 'true_corr': 125}



train Epoch:11/35: 100%|██████████| 103/103 [00:28<00:00,  3.61it/s, loss=0.099]

Epoch 11 Loss: 0.24046391031695802



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.27it/s, loss=0.755]

Epoch 1 Loss: 0.5451487299393524
0.5451487299393524 {'over_corr': 671, 'total_err': 952, 'true_corr': tensor(309, device='cuda:0')} {'over_corr': 316, 'total_err': 701, 'true_corr': 127}



train Epoch:12/35: 100%|██████████| 103/103 [00:28<00:00,  3.61it/s, loss=0.102]

Epoch 12 Loss: 0.2228630452769474



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.28it/s, loss=0.687]

Epoch 1 Loss: 0.5381410758603703
0.5381410758603703 {'over_corr': 641, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 308, 'total_err': 701, 'true_corr': 135}



train Epoch:13/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.269]

Epoch 13 Loss: 0.2067081841450293



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.30it/s, loss=0.470]

Epoch 1 Loss: 0.5248653739690781
0.5248653739690781 {'over_corr': 606, 'total_err': 952, 'true_corr': tensor(319, device='cuda:0')} {'over_corr': 298, 'total_err': 701, 'true_corr': 134}



train Epoch:14/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.179]

Epoch 14 Loss: 0.192388812532124



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.28it/s, loss=0.532]

Epoch 1 Loss: 0.5133155564015562
0.5133155564015562 {'over_corr': 574, 'total_err': 952, 'true_corr': tensor(312, device='cuda:0')} {'over_corr': 291, 'total_err': 701, 'true_corr': 134}



train Epoch:15/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.117]

Epoch 15 Loss: 0.1784905308079951



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.25it/s, loss=0.571]

Epoch 1 Loss: 0.5086383650248701
0.5086383650248701 {'over_corr': 548, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 285, 'total_err': 701, 'true_corr': 135}



train Epoch:16/35: 100%|██████████| 103/103 [00:28<00:00,  3.60it/s, loss=0.334]

Epoch 16 Loss: 0.16899891732965858



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.27it/s, loss=0.215]

Epoch 1 Loss: 0.4952132288705219
0.4952132288705219 {'over_corr': 525, 'total_err': 952, 'true_corr': tensor(318, device='cuda:0')} {'over_corr': 270, 'total_err': 701, 'true_corr': 142}



train Epoch:17/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.095]

Epoch 17 Loss: 0.15495184186882185



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.26it/s, loss=0.416]

Epoch 1 Loss: 0.4882486868988384
0.4882486868988384 {'over_corr': 502, 'total_err': 952, 'true_corr': tensor(324, device='cuda:0')} {'over_corr': 261, 'total_err': 701, 'true_corr': 146}



train Epoch:18/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.180]

Epoch 18 Loss: 0.1452971991260075



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.26it/s, loss=0.501]

Epoch 1 Loss: 0.4898818995464932
0.4898818995464932 {'over_corr': 505, 'total_err': 952, 'true_corr': tensor(319, device='cuda:0')} {'over_corr': 265, 'total_err': 701, 'true_corr': 144}



train Epoch:19/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.046]

Epoch 19 Loss: 0.13404354919652336



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.26it/s, loss=0.371]

Epoch 1 Loss: 0.4797569398175586
0.4797569398175586 {'over_corr': 478, 'total_err': 952, 'true_corr': tensor(317, device='cuda:0')} {'over_corr': 248, 'total_err': 701, 'true_corr': 147}



train Epoch:20/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.136]

Epoch 20 Loss: 0.12552866361384252



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.22it/s, loss=0.377]

Epoch 1 Loss: 0.4720387580719861
0.4720387580719861 {'over_corr': 459, 'total_err': 952, 'true_corr': tensor(324, device='cuda:0')} {'over_corr': 241, 'total_err': 701, 'true_corr': 152}



train Epoch:21/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=0.082]

Epoch 21 Loss: 0.11583360215847932



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.28it/s, loss=0.318]

Epoch 1 Loss: 0.4658196080814708
0.4658196080814708 {'over_corr': 432, 'total_err': 952, 'true_corr': tensor(325, device='cuda:0')} {'over_corr': 233, 'total_err': 701, 'true_corr': 156}



train Epoch:22/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=0.079]

Epoch 22 Loss: 0.10729365050792694



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.24it/s, loss=0.387]

Epoch 1 Loss: 0.4640892672945153
0.4640892672945153 {'over_corr': 435, 'total_err': 952, 'true_corr': tensor(324, device='cuda:0')} {'over_corr': 233, 'total_err': 701, 'true_corr': 159}



train Epoch:23/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=0.082]

Epoch 23 Loss: 0.10086766460422174



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.28it/s, loss=0.242]

Epoch 1 Loss: 0.4655061563984914
0.4655061563984914 {'over_corr': 417, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 225, 'total_err': 701, 'true_corr': 157}



train Epoch:24/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.068]

Epoch 24 Loss: 0.09493459073953259



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.25it/s, loss=0.226]

Epoch 1 Loss: 0.45147641985253856
0.45147641985253856 {'over_corr': 425, 'total_err': 952, 'true_corr': tensor(317, device='cuda:0')} {'over_corr': 232, 'total_err': 701, 'true_corr': 155}



train Epoch:25/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.027]

Epoch 25 Loss: 0.08564309873482556



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.32it/s, loss=0.433]

Epoch 1 Loss: 0.4554080387408083
0.4554080387408083 {'over_corr': 398, 'total_err': 952, 'true_corr': tensor(318, device='cuda:0')} {'over_corr': 220, 'total_err': 701, 'true_corr': 157}



train Epoch:26/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.057]

Epoch 26 Loss: 0.0780988953356604



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.27it/s, loss=0.402]

Epoch 1 Loss: 0.4539190093902024
0.4539190093902024 {'over_corr': 390, 'total_err': 952, 'true_corr': tensor(327, device='cuda:0')} {'over_corr': 217, 'total_err': 701, 'true_corr': 166}



train Epoch:27/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.022]

Epoch 27 Loss: 0.0710611923335536



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.26it/s, loss=0.610]

Epoch 1 Loss: 0.4509815698997541
0.4509815698997541 {'over_corr': 380, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 214, 'total_err': 701, 'true_corr': 162}



train Epoch:28/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.045]

Epoch 28 Loss: 0.06644001756839961



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.21it/s, loss=0.672]

Epoch 1 Loss: 0.4576866894283078
0.4576866894283078 {'over_corr': 368, 'total_err': 952, 'true_corr': tensor(306, device='cuda:0')} {'over_corr': 204, 'total_err': 701, 'true_corr': 164}



train Epoch:29/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=0.045]

Epoch 29 Loss: 0.06275829759616296



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.24it/s, loss=0.336]

Epoch 1 Loss: 0.44594311307777057
0.44594311307777057 {'over_corr': 368, 'total_err': 952, 'true_corr': tensor(319, device='cuda:0')} {'over_corr': 208, 'total_err': 701, 'true_corr': 166}



train Epoch:30/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=0.070]

Epoch 30 Loss: 0.05895095958727077



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.31it/s, loss=0.331]

Epoch 1 Loss: 0.44805323400280694
0.44805323400280694 {'over_corr': 351, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 206, 'total_err': 701, 'true_corr': 163}



train Epoch:31/35: 100%|██████████| 103/103 [00:28<00:00,  3.64it/s, loss=0.040]

Epoch 31 Loss: 0.05178072961788733



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.28it/s, loss=0.629]

Epoch 1 Loss: 0.4428539831529964
0.4428539831529964 {'over_corr': 356, 'total_err': 952, 'true_corr': tensor(317, device='cuda:0')} {'over_corr': 208, 'total_err': 701, 'true_corr': 162}



train Epoch:32/35: 100%|██████████| 103/103 [00:28<00:00,  3.63it/s, loss=0.045]

Epoch 32 Loss: 0.04820322122388673



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.18it/s, loss=0.814]

Epoch 1 Loss: 0.447786728089506
0.447786728089506 {'over_corr': 348, 'total_err': 952, 'true_corr': tensor(319, device='cuda:0')} {'over_corr': 203, 'total_err': 701, 'true_corr': 164}



train Epoch:33/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.049]

Epoch 33 Loss: 0.04474195866908842



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.22it/s, loss=0.409]

Epoch 1 Loss: 0.44494747844609345
0.44494747844609345 {'over_corr': 341, 'total_err': 952, 'true_corr': tensor(301, device='cuda:0')} {'over_corr': 198, 'total_err': 701, 'true_corr': 160}



train Epoch:34/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.019]

Epoch 34 Loss: 0.04192122569289601



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.18it/s, loss=0.332]

Epoch 1 Loss: 0.4453751810572364
0.4453751810572364 {'over_corr': 326, 'total_err': 952, 'true_corr': tensor(313, device='cuda:0')} {'over_corr': 193, 'total_err': 701, 'true_corr': 168}



train Epoch:35/35: 100%|██████████| 103/103 [00:28<00:00,  3.62it/s, loss=0.087]

Epoch 35 Loss: 0.04083291152763425



dev Epoch:1/1: 100%|██████████| 44/44 [00:08<00:00,  5.19it/s, loss=0.480]

Epoch 1 Loss: 0.44386737048625946
0.44386737048625946 {'over_corr': 332, 'total_err': 952, 'true_corr': tensor(309, device='cuda:0')} {'over_corr': 201, 'total_err': 701, 'true_corr': 167}



dev Epoch:1/1: 100%|██████████| 108/108 [00:39<00:00,  2.75it/s, loss=0.707]

Epoch 1 Loss: 0.5030088256354686
0.5030088256354686 {'over_corr': 3781, 'total_err': 5278, 'true_corr': tensor(1238, device='cuda:0')} {'over_corr': 1692, 'total_err': 3436, 'true_corr': 384}



