# Load Libraries

In [1]:
import pandas as pd
import numpy as np
import pickle
import torch
from torch import nn

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from tqdm import tqdm
import gc
import random

from datetime import date

# Constants

In [2]:
DATASET_PATH = "avito_cv2vac_with_ranks_clear.pq"
SEED = 42
# DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE = 'cuda'

In [3]:
! ls -l {DATASET_PATH}

"ls" ­Ґ пў«пҐвбп ў­гваҐ­­Ґ© Ё«Ё ў­Ґи­Ґ©
Є®¬ ­¤®©, ЁбЇ®«­пҐ¬®© Їа®Ја ¬¬®© Ё«Ё Ї ЄҐв­л¬ д ©«®¬.


# Load Data (make splits)

In [3]:
with open("val_id.pickle", 'rb') as f:
    val_id = pickle.load(f)

with open("test_id.pickle", 'rb') as f:
    test_id = pickle.load(f)

with open("train_id.pickle", 'rb') as f:
    train_id = pickle.load(f)

In [4]:
df = pd.read_parquet(DATASET_PATH)

In [5]:
# сделаем id для резюме, чтобы побиться по ним (чтобы на валидации при подсчете nDCG не было лика)

# vac_to_id = dict(zip(df['vac_des'].unique(), range(df['vac_des'].nunique())))

In [6]:
# df['vac_id'] = df['vac_des'].apply(lambda x: vac_to_id[x])

In [7]:
# df.to_parquet(DATASET_PATH)

# Dataset

In [5]:
class TripletDataset(Dataset):
    def __init__(self, df, vac_column, res_column, label_column):
        """
         Create dataset for Siamese Net training.

         Parameters
         ----------
         df : pd.DataFrame
             the dataframe we create dataset from
         vac_column: str
             name of the column of the vacancy
         res_column: str
             name of the column of the resume
         label_column: str
             name of the column of the vacancy embeddings

             Returns
         -------
         None
         """
        self.df = df[[vac_column, res_column, label_column]]

        self.vac_column = vac_column
        self.res_column = res_column
        self.label_column = label_column

        # предполагаю, что каждое резюме кидается ровно на 1 вакансию, составляя одну пару
        self.nunique_pairs = df[res_column].nunique()



    def __len__(self):
        """
         Return total amount of unique pairs: (vac_embed, res_embed).

         Parameters
         ----------
         None

         Returns
         -------
         int
             total amount of unique pairs: (vac_embed, res_embed) 
         """
        return self.nunique_pairs
  
    def __getitem__(self, idx):
        '''
         Return training object: (vac_embed, res_embed, label, rank, max_rank);
         Return rank and max_rank to penalty most appropriate samples more.

         Parameters
         ----------
         idx: int
             index of the samples we want to get.

         Returns
         -------
         tuple[torch.tensor]
             training object like a tuple: (vac_embed, res_embed, label, rank, max_rank)

        '''
        anchor_row = self.df.iloc[idx, :]
        anchor_vac = anchor_row[self.vac_column]
        anchor_label = anchor_row[self.label_column]
        try:
            if anchor_label == 1:
                positive_res = anchor_row[self.res_column]
                negative_res = self.df.loc[(self.df[self.vac_column] == anchor_vac) & (self.df[self.label_column] == 0), self.res_column].iloc[0]

            else:
                negative_res = anchor_row[self.res_column]
                positive_res = self.df.loc[(self.df[self.vac_column] == anchor_vac) & (self.df[self.label_column] == 1), self.res_column].iloc[0]

            return anchor_vac, positive_res, negative_res
        except:
            return "not found", "not found", "not found"

In [6]:
def collate_fn(data):
    """     
     Make dict samples from tuples (it is easier to use);

     Parameters
     ----------
       data: is a list of tuples with (vac, pos_res, neg_res, label)
      
    """
    vac, pos_res, neg_res = zip(*data)

    dict_data = {'vac': vac, 
                 'pos_res': pos_res,
                 'neg_res': neg_res}
                 
    return dict_data

In [7]:
train_dataset = TripletDataset(df=df[df['vac_id'].isin(train_id)],
                               vac_column='vac_des', 
                               res_column='res_des', 
                               label_column='label')

val_dataset = TripletDataset(df=df[df['vac_id'].isin(val_id)],
                               vac_column='vac_des', 
                               res_column='res_des', 
                               label_column='label')

test_dataset = TripletDataset(df=df[df['vac_id'].isin(test_id)],
                               vac_column='vac_des', 
                               res_column='res_des', 
                               label_column='label')

In [6]:
# with open("train_dataset.pickle", "rb") as f:
#     train_dataset = pickle.load(f)

# with open("val_dataset.pickle", "rb") as f:
#     val_dataset = pickle.load(f)

# with open("test_dataset.pickle", "rb") as f:
#     test_dataset = pickle.load(f)

# Make net



In [8]:
# добавил функцию для создания эмбеддингов внутри

class SiameseCVNet(nn.Module):
    def __init__(self, vac_vocab_size, res_vocab_size,
               embedding_dim, rnn_hidden_dim, 
               hidden_layers, fc1_output=512, fc2_output=128):
    
        super(SiameseCVNet, self).__init__()

        self.vac_vocab_size = vac_vocab_size
        self.res_vocab_size = res_vocab_size

        self.embedding_dim = embedding_dim
        self.rnn_hidden_dim = rnn_hidden_dim
        self.hidden_layers = hidden_layers

        # считаем после конкатенации в forward_one
        self.fc1_input_one = 2 * (self.embedding_dim + (self.hidden_layers + 1) * self.rnn_hidden_dim)

        # но мы конкатенируем 2 сэмпла!
        self.fc1_input = 2 * self.fc1_input_one

        self.fc1_output = fc1_output
        self.fc2_output = fc2_output

        self.vac_embed = nn.Embedding(vac_vocab_size, embedding_dim)
        self.res_embed = nn.Embedding(res_vocab_size, embedding_dim)

        self.rnn = nn.LSTM(input_size=embedding_dim,
                           hidden_size=rnn_hidden_dim,
                           num_layers=hidden_layers,
                           batch_first=True)

        fc1 = nn.Linear(self.fc1_input, self.fc1_output)
        relu = nn.ReLU()
        fc2 = nn.Linear(self.fc1_output, self.fc2_output)
        sigmoid = nn.Sigmoid()

        self.nn_head = nn.Sequential(
            fc1,
            relu,
            fc2,
            sigmoid

          ) 

    def forward(self, vac_text, res_text):
        vac_embeds = self.vac_embed(vac_text)
        res_embeds = self.res_embed(res_text)

        catted_output_vac = self.forward_one(vac_embeds)
        catted_output_res = self.forward_one(res_embeds)

        # конкатенируем и пускаем через dense

        catted_output = torch.cat((catted_output_vac, catted_output_res), dim=-1)
        sigm_output = self.nn_head(catted_output)

        return sigm_output

    def forward_one(self, batch):
        '''
        image there is just a tensor of embeddings
        shit with dims for sure
        '''

        # print('sample:', batch.shape)

        rnn_output, (hidden_states, cell_states) = self.rnn(batch)

        # print('rnn output:', rnn_output.shape)

        embed_max_pool = batch.max(dim=1)[0]
        embed_avg_pool = batch.sum(dim=1) / len(batch)

        rnn_max_pool = rnn_output.max(dim=1)[0]
        rnn_avg_pool = rnn_output.sum(dim=1) / len(rnn_output)  

        # print('embed pool:', embed_max_pool.shape, embed_avg_pool.shape)
        # print('rnn output pool:', rnn_max_pool.shape, rnn_max_pool.shape) 
        # print('hidden and state: ', hidden_states.shape, cell_states.shape)

        # тут 0 ось -- кол-во слоев в rnn-блоке
        hidden_states = torch.cat([hidden_states[i, :, :] for i in range(hidden_states.shape[0])], dim=-1)
        cell_states = torch.cat([cell_states[i, :, :] for i in range(cell_states.shape[0])], dim=-1)

        # print('hidden and state: ', hidden_states.shape, cell_states.shape)

        catted_output = torch.cat((embed_max_pool, embed_avg_pool, rnn_max_pool, 
                                  rnn_avg_pool, hidden_states, cell_states), dim=-1)

        return catted_output 

In [9]:
# c = 0

# for batch in loader:
#   vac = batch["vac"]
#   pos_res = batch["pos_res"]
#   neg_res = batch["neg_res"]

#   vac_ind = make_indexes_from_tuple(vac, vac_vocab).to(DEVICE)
#   pos_res_ind = make_indexes_from_tuple(pos_res, res_vocab).to(DEVICE)

#   neg_res_ind = make_indexes_from_tuple(neg_res, res_vocab).to(DEVICE)

#   pos_sim = model(vac_ind, pos_res_ind)
#   neg_sim = model(vac_ind, neg_res_ind)

#   print(neg_res_ind.shape)

#   c += 1
#   if c == 10:
#     break


#     # 2 * embed + 2 * hidden + 2 * layers * hidden

# Make Vocab

In [22]:
! pip install natasha

Collecting natasha
  Downloading natasha-1.5.0-py3-none-any.whl (34.4 MB)
     ---------------------------------------- 34.4/34.4 MB 9.9 MB/s eta 0:00:00
Collecting yargy>=0.14.0
  Downloading yargy-0.15.1-py3-none-any.whl (33 kB)
Collecting slovnet>=0.6.0
  Downloading slovnet-0.6.0-py3-none-any.whl (46 kB)
     ---------------------------------------- 46.7/46.7 kB 2.3 MB/s eta 0:00:00
Collecting ipymarkup>=0.8.0
  Downloading ipymarkup-0.9.0-py3-none-any.whl (14 kB)
Collecting navec>=0.9.0
  Downloading navec-0.10.0-py3-none-any.whl (23 kB)
Installing collected packages: navec, yargy, slovnet, ipymarkup, natasha
Successfully installed ipymarkup-0.9.0 natasha-1.5.0 navec-0.10.0 slovnet-0.6.0 yargy-0.15.1


In [9]:
from natasha import Segmenter, Doc

In [10]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# vac_texts = []
# res_texts = []

segmenter = Segmenter()

In [11]:
# запоминаем vocab на train'е

# for vac_text, res_text, _ in iter(train_dataset):
#     vac_doc = Doc(vac_text)
#     vac_doc.segment(segmenter)
#     vac_tokens = [token.text for token in vac_doc.tokens]
#     vac_texts.extend([token.lower() for token in vac_tokens])

#     res_doc = Doc(res_text)
#     res_doc.segment(segmenter)
#     res_tokens = [token.text for token in res_doc.tokens]
#     res_texts.extend([token.lower() for token in res_tokens])

In [12]:
# len(vac_texts), len(res_texts)

In [13]:
# вокабы

# vac_vocab = {token: ind for ind, token in enumerate(list(set(vac_texts)))}
# res_vocab = {token: ind for ind, token in enumerate(list(set(res_texts)))}

In [14]:
# len(vac_vocab), len(res_vocab)

In [11]:
with open("vac_vocab.pickle", 'rb') as f:
    vac_vocab = pickle.load(f)

with open("res_vocab.pickle", 'rb') as f:
    res_vocab = pickle.load(f)

# First attempts

In [12]:
class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin
    
    def forward(self, pos_sim: torch.Tensor, neg_sim: torch.Tensor) -> torch.Tensor:
        losses = torch.relu(neg_sim - pos_sim + self.margin)
        
        return losses.mean()

In [13]:
def make_indexes_from_tuple(t, vocab):
    indexes = []
    for sent in t:
        sent = Doc(sent)
        sent.segment(segmenter) 
        ind = np.array([vocab.get(token.text.lower()) for token in sent.tokens], dtype=np.float16)

        ind = np.nan_to_num(ind, nan=0)
        ind = torch.LongTensor(ind)

        indexes.append(ind)
  
    padded_indexes = nn.utils.rnn.pad_sequence(indexes, padding_value=0, batch_first=True)
    return padded_indexes

In [14]:
1 in set((1, torch.tensor([1]), torch.tensor([0, 0])))

True

In [14]:
def train_epoch(model, optimizer, dataset, batch_size, shuffle, collate_fn, verbose_every_n_batches=300):
    
    torch_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=collate_fn)
    loss = TripletLoss(margin=1)
    total_train_loss = 0
    running_train_loss = 0
    num_batches = 1
    model = model.to(DEVICE)
    model.train()
    for batch in tqdm(torch_dataloader, desc="Training"):
        vac, pos_res, neg_res = batch["vac"], batch["pos_res"], batch["neg_res"]
        if "not found" in vac:
            continue
        vac_ind = make_indexes_from_tuple(vac, vac_vocab).to(DEVICE)
        pos_res_ind = make_indexes_from_tuple(pos_res, res_vocab).to(DEVICE)
        neg_res_ind = make_indexes_from_tuple(neg_res, res_vocab).to(DEVICE)
        pos_sim = model(vac_ind, pos_res_ind)
        neg_sim = model(vac_ind, neg_res_ind)
        batch_loss = loss(pos_sim, neg_sim)
        batch_loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_train_loss += batch_loss 
#         ??? * batch_size
        running_train_loss += batch_loss
        if num_batches == 1:
            print(f"Train loss after first batch: {running_train_loss}", end='\r')    
        
        if num_batches % verbose_every_n_batches == 0:
            print(f"Mean train loss on the last {verbose_every_n_batches} batches: {running_train_loss / verbose_every_n_batches};", end="\r")
            running_train_loss = 0

        num_batches += 1
            
    print(f"Mean train loss after epoch: {total_train_loss / num_batches}")
    print(f"Total train loss after epoch: {total_train_loss}")
  

# по хорошему эту функцию надо будет менять, если хотим считать nDCG
def eval_model(model, dataset, batch_size, collate_fn, 
               verbose_every_n_batches=300):
    torch_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    preds, targets = [], []
    
    total_valid_loss = 0
    num_batches = 0
    
    model.eval()
    with torch.no_grad():
        for batch in tqdm(torch_dataloader, desc="Evaluating"):
            vac, pos_res, neg_res = batch["vac"], batch["pos_res"], batch["neg_res"]
            if "not found" in vac:
                continue
            vac_ind = make_indexes_from_tuple(vac, vac_vocab).to(DEVICE)
            pos_res_ind = make_indexes_from_tuple(pos_res, res_vocab).to(DEVICE)
            neg_res_ind = make_indexes_from_tuple(neg_res, res_vocab).to(DEVICE)
            pos_sim = model(vac_ind, pos_res_ind)
            neg_sim = model(vac_ind, neg_res_ind)
            preds.extend(pos_sim.detach().cpu().numpy())
            targets.extend(np.ones(pos_sim.shape))
            preds.extend(neg_sim.detach().cpu().numpy())
            targets.extend(np.zeros(neg_sim.shape))

    return targets, preds

In [15]:
def clean_cuda_cache():
  start_available, reserved = torch.cuda.mem_get_info()
  torch.cuda.empty_cache()
  gc.collect()
  fin_available, reserved = torch.cuda.mem_get_info()
  print(f"cleaned {(fin_available - start_available) / 2**10} gb")
  print(f"available {fin_available / 2**20} gb")

In [16]:
def train_val_loop(
    model, optimizer, scheduler, 
    dataset_train, dataset_val, dataset_test,
    batch_size_train, batch_size_test, batch_size_val,
    num_epochs, train_shuffle, collate_fn, 
    verbose_every_n_batches, eval_on_train, 
    early_stopping_patience=7
):

    for n_epoch in range(1, num_epochs + 1):
        train_epoch(
                    model=model, 
                    optimizer=optimizer, 
                    dataset=dataset_train, 
                    batch_size=batch_size_train, 
                    shuffle=train_shuffle, 
                    collate_fn=collate_fn, 
                    verbose_every_n_batches=verbose_every_n_batches
                  )
                    
        # clean cache before validation    
        clean_cuda_cache()    
            
        targets_val, preds_val = eval_model(
            model=model,
            dataset=dataset_val,
            batch_size=batch_size_val,
            collate_fn=collate_fn
        )
        
        # for logging while validating
        loss = TripletLoss(margin=1)  
        # count common loss
        val_loss = loss(torch.tensor(np.array(targets_val)), torch.tensor(np.array(preds_val)))
        
        with open(LOGGING_FILE_PATH, 'a') as f:
            f.write(f'epoch {n_epoch}. valid loss: {val_loss}\n')

        # torch.save(model.state_dict(), os.path.join(EXP_CHECKPOINTS_PATH, f"epoch_{n_epoch}_{datetime.now().strftime('%Y-%m-%d')}_{datetime.now().strftime('%H:%M:%S')}_testCC_{round(test_roc_auc_CC, 3)}_testPIL_{round(test_roc_auc_PIL, 3)}.pt"))
                
        targets_test, preds_test = eval_model(
            model=model,
            dataset=dataset_test,
            batch_size=batch_size_test,
            collate_fn=collate_fn
        )
        # count common loss
        test_loss =  loss(torch.tensor(np.array(targets_test)), torch.tensor(np.array(preds_test)))
        
        with open(LOGGING_FILE_PATH, 'a') as f:
            f.write(f'epoch {n_epoch}. test loss: {test_loss}\n')

        scheduler.step(val_loss)
        
#         if eval_on_train:
#                 targets_train, preds_train = eval_model(
#                 model=model,
#                 dataset=dataset_train,
#                 batch_size=batch_size_train,
#                 collate_fn=collate_fn
#             )
#                 # count common loss
#                 train_loss = loss(torch.tensor(np.array(targets_train)), torch.tensor(np.array(preds_train)))
                
#                 with open(LOGGING_FILE_PATH, 'a') as f:
#                     f.write(f'epoch {n_epoch}. train loss: {train_loss}\n')

        print(35 * "=" + "\n")


In [17]:
# EXP_PATH = f"D:\\aaa_final"
LOGGING_FILE_PATH = "D:\\aaa_final\\logs.txt"


In [18]:
model_parameters = {
    "embedding_dim": 128,
    "rnn_hidden_dim": 512,
    "hidden_layers": 1,
    "fc1_output": 256,
    "fc2_output": 128,
    "vac_vocab_size": len(vac_vocab),
    "res_vocab_size": len(res_vocab),
}

# похоже, на 512 уже не вывозит
experement_parameters = {
    "optimizer_lr": 1e-4,
    "optimizer_weight_decay": 1e-4,
    "scheduler_patience": 5,
    "scheduler_factor": 0.8,
    "dataset_train": train_dataset,
    "dataset_test": val_dataset,
    "dataset_val": test_dataset,
    "batch_size_train": 64, 
    "batch_size_val": 64, 
    "batch_size_test": 64,
    "num_epochs": 5,
    "train_shuffle": True,
    "collate_fn": collate_fn,
    "verbose_every_n_batches": 300,
    "eval_on_train": False,
}

# надо будет размерности чинить
model = SiameseCVNet(
    embedding_dim=model_parameters["embedding_dim"],
    vac_vocab_size=model_parameters["vac_vocab_size"],
    res_vocab_size=model_parameters["res_vocab_size"],    
    rnn_hidden_dim=model_parameters["rnn_hidden_dim"],
    hidden_layers=model_parameters["hidden_layers"],
    fc1_output=model_parameters["fc1_output"],
    fc2_output=model_parameters["fc2_output"]
)

# классика
optimizer = torch.optim.AdamW(
    params=model.parameters(),
    lr=experement_parameters["optimizer_lr"],
    weight_decay=experement_parameters["optimizer_weight_decay"]
)

# норм оптимайзер вроде
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer=optimizer,
    patience=experement_parameters["scheduler_patience"],
    factor=experement_parameters["scheduler_factor"]
)

loss = TripletLoss(margin=1)

In [19]:
train_val_loop(
  model=model, optimizer=optimizer, scheduler=scheduler, 
  dataset_train=experement_parameters["dataset_train"], 
  dataset_test=experement_parameters["dataset_test"], 
  dataset_val=experement_parameters["dataset_val"],
  batch_size_train=experement_parameters["batch_size_train"],
  batch_size_val=experement_parameters["batch_size_val"], 
  batch_size_test=experement_parameters["batch_size_test"],
  num_epochs=experement_parameters["num_epochs"], 
  train_shuffle=experement_parameters["train_shuffle"], 
  collate_fn=experement_parameters["collate_fn"], 
  verbose_every_n_batches=experement_parameters["verbose_every_n_batches"],
  eval_on_train=True,
)ы

Training:   0%|                                                                        | 1/952 [00:03<53:01,  3.35s/it]

Train loss after first batch: 0.9999215602874756

Training:  32%|██████████████████████▏                                               | 302/952 [16:14<34:32,  3.19s/it]

Mean train loss on the last 300 batches: 1.0004866123199463;

Training:  64%|████████████████████████████████████████████▋                         | 607/952 [32:36<18:15,  3.18s/it]

Mean train loss on the last 300 batches: 1.000151515007019;

Training:  96%|██████████████████████████████████████████████████████████████████▉   | 911/952 [48:55<02:13,  3.25s/it]

Mean train loss on the last 300 batches: 0.9977630972862244;

Training: 100%|██████████████████████████████████████████████████████████████████████| 952/952 [51:07<00:00,  3.22s/it]


Mean train loss after epoch: 0.9981701374053955
Total train loss after epoch: 940.2762451171875
cleaned 2668544.0 gb
available 4643.0 gb


Evaluating: 100%|████████████████████████████████████████████████████████████████████| 493/493 [04:27<00:00,  1.84it/s]
Evaluating: 100%|████████████████████████████████████████████████████████████████████| 190/190 [00:51<00:00,  3.72it/s]





Training:   0%|                                                                        | 1/952 [00:03<53:11,  3.36s/it]

Train loss after first batch: 1.0294036865234375

Training:  32%|██████████████████████▍                                               | 305/952 [16:48<35:45,  3.32s/it]

Mean train loss on the last 300 batches: 0.999599039554596;

Training:  64%|████████████████████████████████████████████▊                         | 610/952 [33:40<18:59,  3.33s/it]

Mean train loss on the last 300 batches: 0.989678680896759;

Training:  96%|██████████████████████████████████████████████████████████████████▉   | 910/952 [50:16<02:21,  3.36s/it]

Mean train loss on the last 300 batches: 0.9828541278839111;

Training: 100%|██████████████████████████████████████████████████████████████████████| 952/952 [52:34<00:00,  3.31s/it]


Mean train loss after epoch: 0.9893582463264465
Total train loss after epoch: 931.9754028320312
cleaned 4198492.0 gb
available 4643.0 gb


Evaluating: 100%|████████████████████████████████████████████████████████████████████| 493/493 [04:31<00:00,  1.81it/s]
Evaluating: 100%|████████████████████████████████████████████████████████████████████| 190/190 [00:51<00:00,  3.72it/s]





Training:   0%|                                                                        | 1/952 [00:03<51:17,  3.24s/it]

Train loss after first batch: 0.8979332447052002

Training:  32%|██████████████████████▍                                               | 305/952 [16:34<37:07,  3.44s/it]

Mean train loss on the last 300 batches: 0.9669723510742188;

Training:  64%|████████████████████████████████████████████▊                         | 609/952 [34:01<19:11,  3.36s/it]

Mean train loss on the last 300 batches: 0.9624716639518738;

Training:  96%|██████████████████████████████████████████████████████████████████▉   | 910/952 [50:53<02:22,  3.38s/it]

Mean train loss on the last 300 batches: 0.967728316783905;

Training: 100%|██████████████████████████████████████████████████████████████████████| 952/952 [53:13<00:00,  3.35s/it]


Mean train loss after epoch: 0.966425895690918
Total train loss after epoch: 910.3731689453125
cleaned 2859008.0 gb
available 4643.0 gb


Evaluating: 100%|████████████████████████████████████████████████████████████████████| 493/493 [05:41<00:00,  1.44it/s]
Evaluating: 100%|████████████████████████████████████████████████████████████████████| 190/190 [00:55<00:00,  3.42it/s]





Training:   0%|                                                                        | 1/952 [00:03<54:48,  3.46s/it]

Train loss after first batch: 0.9845389723777771

Training:  32%|██████████████████████▎                                               | 304/952 [19:01<34:32,  3.20s/it]

Mean train loss on the last 300 batches: 0.9741309881210327;

Training:  64%|████████████████████████████████████████████▊                         | 609/952 [35:30<18:20,  3.21s/it]

Mean train loss on the last 300 batches: 0.9658050537109375;

Training:  96%|██████████████████████████████████████████████████████████████████▉   | 911/952 [51:46<02:16,  3.32s/it]

Mean train loss on the last 300 batches: 0.9582651257514954;

Training: 100%|██████████████████████████████████████████████████████████████████████| 952/952 [53:58<00:00,  3.40s/it]


Mean train loss after epoch: 0.9643377661705017
Total train loss after epoch: 908.4061279296875
cleaned 3264512.0 gb
available 4643.0 gb


Evaluating: 100%|████████████████████████████████████████████████████████████████████| 493/493 [04:32<00:00,  1.81it/s]
Evaluating: 100%|████████████████████████████████████████████████████████████████████| 190/190 [00:50<00:00,  3.74it/s]





Training:   0%|                                                                        | 1/952 [00:03<50:47,  3.20s/it]

Train loss after first batch: 0.9180682897567749

Training:  32%|██████████████████████▌                                               | 306/952 [16:27<35:26,  3.29s/it]

Mean train loss on the last 300 batches: 0.9735668897628784;

Training:  64%|████████████████████████████████████████████▋                         | 607/952 [33:13<19:01,  3.31s/it]

Mean train loss on the last 300 batches: 0.9510174989700317;

Training:  96%|██████████████████████████████████████████████████████████████████▉   | 910/952 [50:17<02:19,  3.32s/it]

Mean train loss on the last 300 batches: 0.9537065029144287;

Training: 100%|██████████████████████████████████████████████████████████████████████| 952/952 [52:38<00:00,  3.32s/it]


Mean train loss after epoch: 0.9562451839447021
Total train loss after epoch: 900.7828979492188
cleaned 4128872.0 gb
available 4643.0 gb


Evaluating: 100%|████████████████████████████████████████████████████████████████████| 493/493 [06:17<00:00,  1.31it/s]
Evaluating: 100%|████████████████████████████████████████████████████████████████████| 190/190 [00:54<00:00,  3.47it/s]







In [20]:
torch.save(model.state_dict(), "model_weights.pth")