In [1]:
# common packages
import pandas as pd
import time

# # DL framework
import torch
from torchtext import data

from attractivedata import AttractiveData
from trainer import AttractiveTrainer

## Load and prepare data

In [2]:
train_file = 'data/train.csv'
test_file = 'data/test.csv'
pretrained_file = 'glove.42B.300d'
config = {
    'max_size': 64,
    'min_freq': 5,
    'batch_size': 64
}

In [3]:
AttractiveData = AttractiveData(train_file, test_file, pretrained_file, config)



In [4]:
for i, sentence in enumerate(AttractiveData.test_data):
    if i == 3:
        print(vars(AttractiveData.train_data[i]), vars(sentence))

{'Headline': ['Sorry', ',', 'i', 'spent', 'it', 'on', 'myself', '!', 'Harvey', 'Nichols', "'", 'hilarious', 'Christmas', 'advert', 'sees', 'people', 'treating', 'themselves', 'instead', 'of', 'others'], 'Category': 'femail', 'Label': '3.333333333333333'} {'Headline': ['Three', 'police', 'officers', 'accused', 'of', 'stealing', '?', '?', '30k', 'during', 'raid', 'on', 'criminal'], 'Category': 'news'}


In [5]:
len(AttractiveData.CATEGORIES_LABEL.vocab.freqs)

18

In [6]:
max_len = 0
for i in range(i):
    if len(AttractiveData.test_data[i].Headline) >= max_len:
        max_len = len(AttractiveData.test_data[i].Headline)
max_len

31

## Start to train

In [7]:
num_workers = 10

config['timestr'] = time.strftime("%Y%m%d-%H%M%S")
config['save_name'] = 'AttractiveNet'
config['input_dim'] = len(AttractiveData.TEXT.vocab)
config['embedding_dim'] = 300
config['category_dim'] = len(AttractiveData.CATEGORIES_LABEL.vocab)
config['category_embedding_dim'] = 16
config['hidden_dim'] = 256
config['output_dim'] = 1
config['log_steps'] = 10
config['epochs'] = 100
config['lr'] = {
    'encoder': 1e-5,
    'embedding': 1e-5,
    'linear': 1e-4
}
config['num_layers'] = 3
config['nhead'] = 4
config['dropout'] = 0.1

# timestr = time.strftime("%Y%m%d-%H%M%S")
# save_name = 'AttractiveNet'
# input_dim = len(AttractiveData.TEXT.vocab)
# embedding_dim = 300

# category_dim = len(AttractiveData.CATEGORIES_LABEL.vocab)
# category_output_dim = 16
# hidden_dim = 256
# output_dim = 1
# log_steps = 10
# epochs = 100
# lr = {
#     'encoder': 1e-5,
#     'embedding': 1e-5,
#     'linear': 1e-4
# }
# num_layers = 3
# nhead = 4
# dropout = 0.1
pretrained_embeddings = AttractiveData.TEXT.vocab.vectors
print(pretrained_embeddings.shape)

torch.Size([1519, 300])


In [8]:
# max(AttractiveData.df_train.Headline.str.len()), max(AttractiveData.df_test.Headline.str.len())

In [9]:
AttractiveTrainer = AttractiveTrainer(config, AttractiveData.device, AttractiveData.trainloader, pretrained_embeddings)

In [10]:
AttractiveTrainer.model

TransformerModel(
  (embedding): AttractiveEmbedding(
    (token): TokenEmbedding(1519, 300, padding_idx=1)
    (position): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (category_embedding): CategoryEmbedding(18, 16, padding_idx=0)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=300, out_features=300, bias=True)
        )
        (linear1): Linear(in_features=300, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=300, bias=True)
        (norm1): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadA

In [11]:
AttractiveTrainer.train()

Epoch 0


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.94it/s]



EP_train | avg_loss: 2.1264512203633785 |
Epoch 1


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.34it/s]



EP_train | avg_loss: 0.716330318711698 |
Epoch 2


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.77it/s]



EP_train | avg_loss: 0.6008199080824852 |
Epoch 3


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.85it/s]



EP_train | avg_loss: 0.6064025377854705 |
Epoch 4


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.90it/s]



EP_train | avg_loss: 0.6161546502262354 |
Epoch 5


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.84it/s]



EP_train | avg_loss: 0.6159679424017668 |
Epoch 6


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.33it/s]



EP_train | avg_loss: 0.6158104473724961 |
Epoch 7


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.20it/s]



EP_train | avg_loss: 0.6141456998884678 |
Epoch 8


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.97it/s]



EP_train | avg_loss: 0.6091969050467014 |
Epoch 9


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.29it/s]



EP_train | avg_loss: 0.6109592830762267 |
Epoch 10


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.93it/s]



EP_train | avg_loss: 0.6045540934428573 |
Epoch 11


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.59it/s]



EP_train | avg_loss: 0.604931777343154 |
Epoch 12


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 160.54it/s]



EP_train | avg_loss: 0.6013987036421895 |
Epoch 13


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.98it/s]



EP_train | avg_loss: 0.600404717028141 |
Epoch 14


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.84it/s]



EP_train | avg_loss: 0.6014129091054201 |
Epoch 15


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.05it/s]



EP_train | avg_loss: 0.599339815787971 |
Epoch 16


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.92it/s]



EP_train | avg_loss: 0.595892172306776 |
Epoch 17


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.39it/s]



EP_train | avg_loss: 0.5962485279887915 |
Epoch 18


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.79it/s]



EP_train | avg_loss: 0.5950515801087022 |
Epoch 19


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.76it/s]



EP_train | avg_loss: 0.5878775445744395 |
Epoch 20


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.69it/s]



EP_train | avg_loss: 0.5899443635717034 |
Epoch 21


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.71it/s]



EP_train | avg_loss: 0.5886870073154569 |
Epoch 22


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.33it/s]



EP_train | avg_loss: 0.5892290715128183 |
Epoch 23


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.95it/s]



EP_train | avg_loss: 0.5925715193152428 |
Epoch 24


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.62it/s]



EP_train | avg_loss: 0.5893661519512534 |
Epoch 25


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.05it/s]



EP_train | avg_loss: 0.5886210696771741 |
Epoch 26


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.75it/s]



EP_train | avg_loss: 0.5874509206041694 |
Epoch 27


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.83it/s]



EP_train | avg_loss: 0.5891991080716252 |
Epoch 28


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.19it/s]



EP_train | avg_loss: 0.5850382158532739 |
Epoch 29


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.01it/s]



EP_train | avg_loss: 0.5855081407353282 |
Epoch 30


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.87it/s]



EP_train | avg_loss: 0.5874772379174829 |
Epoch 31


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.81it/s]



EP_train | avg_loss: 0.5849552983418107 |
Epoch 32


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.33it/s]



EP_train | avg_loss: 0.5845329994335771 |
Epoch 33


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.05it/s]



EP_train | avg_loss: 0.585008162073791 |
Epoch 34


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.81it/s]



EP_train | avg_loss: 0.5822856863960624 |
Epoch 35


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.07it/s]



EP_train | avg_loss: 0.5830546990036964 |
Epoch 36


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 165.05it/s]



EP_train | avg_loss: 0.5841121198609471 |
Epoch 37


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.55it/s]



EP_train | avg_loss: 0.5845642164349556 |
Epoch 38


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.92it/s]



EP_train | avg_loss: 0.5830715419724584 |
Epoch 39


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.71it/s]



EP_train | avg_loss: 0.5804405333474278 |
Epoch 40


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.97it/s]



EP_train | avg_loss: 0.5784589648246765 |
Epoch 41


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.62it/s]



EP_train | avg_loss: 0.5809566462412477 |
Epoch 42


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.58it/s]



EP_train | avg_loss: 0.582093246281147 |
Epoch 43


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.45it/s]



EP_train | avg_loss: 0.5806047413498163 |
Epoch 44


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.96it/s]



EP_train | avg_loss: 0.5791099118068814 |
Epoch 45


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.77it/s]



EP_train | avg_loss: 0.5777712017297745 |
Epoch 46


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.18it/s]



EP_train | avg_loss: 0.5775939058512449 |
Epoch 47


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.60it/s]



EP_train | avg_loss: 0.5782189890742302 |
Epoch 48


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.55it/s]



EP_train | avg_loss: 0.5750260930508375 |
Epoch 49


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 162.79it/s]



EP_train | avg_loss: 0.5771268177777529 |
Epoch 50


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.10it/s]



EP_train | avg_loss: 0.5751514062285423 |
Epoch 51


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.20it/s]



EP_train | avg_loss: 0.5750976456329226 |
Epoch 52


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.03it/s]



EP_train | avg_loss: 0.5764242419973016 |
Epoch 53


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.70it/s]



EP_train | avg_loss: 0.5793727291747928 |
Epoch 54


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.94it/s]



EP_train | avg_loss: 0.5750351464375854 |
Epoch 55


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.41it/s]



EP_train | avg_loss: 0.5753482589498162 |
Epoch 56


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.81it/s]



EP_train | avg_loss: 0.5778508931398392 |
Epoch 57


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.17it/s]



EP_train | avg_loss: 0.5756341544911265 |
Epoch 58


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.68it/s]



EP_train | avg_loss: 0.574397299438715 |
Epoch 59


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.91it/s]



EP_train | avg_loss: 0.5739315850660205 |
Epoch 60


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.30it/s]



EP_train | avg_loss: 0.5731045100837946 |
Epoch 61


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.90it/s]



EP_train | avg_loss: 0.5755252428352833 |
Epoch 62


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.88it/s]



EP_train | avg_loss: 0.574599769897759 |
Epoch 63


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.57it/s]



EP_train | avg_loss: 0.5766490399837494 |
Epoch 64


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.83it/s]



EP_train | avg_loss: 0.5748791797086596 |
Epoch 65


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.06it/s]



EP_train | avg_loss: 0.5761355804279447 |
Epoch 66


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.79it/s]



EP_train | avg_loss: 0.5743416184559464 |
Epoch 67


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.97it/s]



EP_train | avg_loss: 0.5754066351801157 |
Epoch 68


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.08it/s]



EP_train | avg_loss: 0.5750394565984607 |
Epoch 69


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.01it/s]



EP_train | avg_loss: 0.572807121090591 |
Epoch 70


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.14it/s]



EP_train | avg_loss: 0.5729521056637168 |
Epoch 71


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.62it/s]



EP_train | avg_loss: 0.5724949417635798 |
Epoch 72


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.69it/s]



EP_train | avg_loss: 0.5724207805469632 |
Epoch 73


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.30it/s]



EP_train | avg_loss: 0.5713195484131575 |
Epoch 74


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.44it/s]



EP_train | avg_loss: 0.5729529215022922 |
Epoch 75


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.29it/s]



EP_train | avg_loss: 0.5732952002435923 |
Epoch 76


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.59it/s]



EP_train | avg_loss: 0.5713845044374466 |
Epoch 77


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.15it/s]



EP_train | avg_loss: 0.5730637460947037 |
Epoch 78


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.44it/s]



EP_train | avg_loss: 0.5716070476919413 |
Epoch 79


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.64it/s]



EP_train | avg_loss: 0.5734401484951377 |
Epoch 80


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.67it/s]



EP_train | avg_loss: 0.572718258947134 |
Epoch 81


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.95it/s]



EP_train | avg_loss: 0.571412930265069 |
Epoch 82


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.71it/s]



EP_train | avg_loss: 0.5703428564593196 |
Epoch 83


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.09it/s]



EP_train | avg_loss: 0.5687410542741418 |
Epoch 84


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.22it/s]



EP_train | avg_loss: 0.5716075720265508 |
Epoch 85


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.53it/s]



EP_train | avg_loss: 0.5707149710506201 |
Epoch 86


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.41it/s]



EP_train | avg_loss: 0.5691952649503946 |
Epoch 87


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.94it/s]



EP_train | avg_loss: 0.5697351787239313 |
Epoch 88


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.05it/s]



EP_train | avg_loss: 0.5716681573539972 |
Epoch 89


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.76it/s]



EP_train | avg_loss: 0.5717302160337567 |
Epoch 90


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.34it/s]



EP_train | avg_loss: 0.5686133923009038 |
Epoch 91


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.69it/s]



EP_train | avg_loss: 0.5691460128873587 |
Epoch 92


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.17it/s]



EP_train | avg_loss: 0.567839352414012 |
Epoch 93


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.09it/s]



EP_train | avg_loss: 0.5696707936003804 |
Epoch 94


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.90it/s]



EP_train | avg_loss: 0.5691296132281423 |
Epoch 95


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.54it/s]



EP_train | avg_loss: 0.5687452368438244 |
Epoch 96


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 163.72it/s]



EP_train | avg_loss: 0.5693738646805286 |
Epoch 97


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.14it/s]



EP_train | avg_loss: 0.5659137992188334 |
Epoch 98


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 164.01it/s]



EP_train | avg_loss: 0.5677553070709109 |
Epoch 99


EP: train | lr: {'encoder': 1e-05, 'embedding': 1e-05, 'linear': 0.0001}: 100%|| 32/32 [00:00<00:00, 162.98it/s]



EP_train | avg_loss: 0.5681747989729047 |


## for classification, not better

In [14]:
# from sklearn.metrics import mean_squared_error
# a = AttractiveTrainer.train_predict
# AttractiveData.LABEL.vocab.itos[int(a[0])], AttractiveTrainer.train_true[0]
# correct = 0
# pred_list = []
# true_list = []
# for i in range(len(a)):
#     pred = AttractiveData.LABEL.vocab.itos[int(a[i])]
#     pred_list.append(float(pred))
#     true = AttractiveData.LABEL.vocab.itos[int(AttractiveTrainer.train_true[i])]
#     true_list.append(float(true))
# mean_squared_error(true_list, pred_list)
# # true_list

0.5601443355119825

## Below is testing

In [12]:
from transformermodel import TransformerModel
PATH = './model/AttractiveNet_20201030-145225_0.568.100'
load_model = TransformerModel(config).to(AttractiveData.device)
load_model.load_state_dict(torch.load(PATH))
load_model.eval()

TransformerModel(
  (embedding): AttractiveEmbedding(
    (token): TokenEmbedding(1519, 300, padding_idx=1)
    (position): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (category_embedding): CategoryEmbedding(18, 16, padding_idx=0)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): _LinearWithBias(in_features=300, out_features=300, bias=True)
        )
        (linear1): Linear(in_features=300, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=300, bias=True)
        (norm1): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((300,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadA

In [13]:
def predict_attractive(sentence, category):
    indexed_sentence = [AttractiveData.TEXT.vocab.stoi[t] for t in sentence]
    indexed_category = [AttractiveData.CATEGORIES_LABEL.vocab.stoi[category]]
    tensor_sentence = torch.LongTensor(indexed_sentence).to(AttractiveData.device)
    tensor_category = torch.LongTensor(indexed_category).to(AttractiveData.device)

    tensor_sentence = tensor_sentence.unsqueeze(1)
    tensor_category = tensor_category

    prediction = load_model(tensor_sentence, tensor_category)
    
    return prediction

In [14]:
# train mean = 3.2, test mean = 2.8
predict_list = []
for i, sentence in enumerate(AttractiveData.test_data):
    prediction = predict_attractive(sentence.Headline, sentence.Category)
    predict_list.append(prediction.item() - 3.2 + 2.8)
AttractiveData.df_test['Label'] = predict_list
AttractiveData.df_test[['ID', 'Label']].to_csv('transformers.csv', index=False)

In [15]:
train_category = list(AttractiveData.CATEGORIES_LABEL.vocab.freqs)
test_category = list(AttractiveData.df_test['Category'].value_counts().keys())
for each_test in test_category:
    if each_test not in train_category:
        print(each_test)
print()
for each_train in train_category:
    if each_train not in test_category:
        print(each_train)


golf
beauty


In [17]:
train_category, test_category

(['travel',
  'health',
  'femail',
  'sport',
  'gardening',
  'sciencetech',
  'news',
  'food',
  'football',
  'travelnews',
  'cricket',
  'golf',
  'books',
  'rugbyunion',
  'home',
  'boxing',
  'tennis',
  'concussion',
  'othersports',
  'beauty',
  'formulaone',
  'racing'],
 ['health',
  'femail',
  'sciencetech',
  'travel',
  'news',
  'football',
  'food',
  'living',
  'books',
  'boxing',
  'rugbyunion',
  'othersports',
  'formulaone',
  'cricket',
  'us',
  'tennis',
  'sport',
  'middleeast',
  'racing'])

## Below just for fun guess

In [17]:
a = AttractiveData.df_test['ID'].to_list()

In [13]:
from sklearn.metrics import mean_squared_error

In [9]:
mean_squared_error(a, b)
# Training all 3.0 got mse = 0.5545

0.5545343137254902

In [18]:
# only for fun
import random
guess_list = [2.3333333333333335, 3.3333333333333335, 3.6666666666666665, 2.6666666666666665]
b = []
for i in range(len(a)):
    b.append(random.choice(guess_list))

In [19]:
AttractiveData.df_test['Label'] = b

In [20]:
AttractiveData.df_test[['ID', 'Label']].to_csv('all_3.csv', index=False)