In [1]:
# Install libraries for data_loader
!pip install pandas
!pip install sklearn
!pip install torchvision
!pip install tqdm
!pip install torchmetrics



In [2]:
# Import DataLoader and corresponding libraries
import pandas
import torchvision.transforms as TT
from torch.utils.data import DataLoader, Dataset, sampler
from sklearn.model_selection import StratifiedKFold
from torchvision import utils
from PIL import Image

In [3]:
# Import libraries for tensors
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [4]:
# For fbeta-score
from torchmetrics.functional import fbeta_score

In [5]:
# For model construction
from collections import OrderedDict

In [6]:
# Import tqdm for progress bar construction
import tqdm

In [7]:
# from google.colab import drive
# drive.mount('/content/gdrive')

# Datatypes and Devices (from Assignment 2)
dtype = torch.float
ltype = torch.long

if torch.cuda.is_available():
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda:0


In [8]:
# Hyper-parameters for K-Fold Cross Validation
N = 10
seed = 42

torch.manual_seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
random.seed(seed)

# Directories for Data
FF1010_Path = './data/'
AudioImage_Path = './image/'

In [9]:
# Call StratifiedKFold object
skf = StratifiedKFold(
    n_splits=N, shuffle=True, random_state=seed
)

In [10]:
# Modify dataframe for K-Fold Cross Validation (birdclef2021)
birdclef_csv = pandas.read_csv(AudioImage_Path + 'metadata.csv')
birdclef_csv.loc[birdclef_csv['label_id'] >= 0,'filepath'] = \
    AudioImage_Path + birdclef_csv.query('label_id >= 0')['primary_label'] + '/' + \
    birdclef_csv.query('label_id >= 0')['filename'] + '.npy'

birdclef_csv = birdclef_csv.dropna()
birdclef_csv = birdclef_csv.reset_index(drop=True)

# Add 'fold' attribute for dataset classification
birdclef_dataframe = birdclef_csv.copy()
for n, (_, nth_groups) in enumerate(
    skf.split(birdclef_dataframe, birdclef_dataframe['label_id'])):
    birdclef_dataframe.loc[nth_groups, 'fold'] = int(n)



In [11]:
# Class for Birdclef dataset
class birdclef(Dataset):
    def __init__(self, dataframe, process='train', labels='label_id'):
        self.dataframe = dataframe
        self.filepaths = dataframe['filepath'].values
        self.labels = dataframe[labels].values
        self.process = process
        self.batch_threshold = 64
        
        # Transforms for each train and validation
        self.train_transform = TT.Compose([
            TT.Resize([128, 281]),
            TT.RandomHorizontalFlip(p=0.5),
            TT.RandomVerticalFlip(p=0.5),
            TT.ToTensor(),
            TT.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ])
        self.val_transform = TT.Compose([
            TT.Resize([128, 281]),
            TT.ToTensor(),
            TT.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
        ])
    
    def __getitem__(self, idx):
        source = np.load(self.filepaths[idx])
        
        # Rearrange numpy arrays
        source = source.transpose(1, 2, 0)
        
        # Add RGB dimension
        source = np.stack((np.squeeze(source), ) * 3, -1)
        if len(source.shape) == 3:
            source = np.expand_dims(source, axis=2)
        source = source.transpose(2, 0, 1, 3)
        N, H, W, C = source.shape
        label_rp = N
        
        # Apply transform
        if self.process == 'train':
            source = torch.stack([self.train_transform(Image.fromarray(x)) for x in source])
        elif self.process == 'valid':
            source = torch.stack([self.val_transform(Image.fromarray(x)) for x in source])
        
        # Avoid CUDA OOM
        if N > self.batch_threshold:
            unit = (N - self.batch_threshold) // 2
            label_rp = self.batch_threshold
            if N % 2 == 0:
                source = source[unit : -unit]
            else:
                source = source[unit : -(unit + 1)]
        
        return source, torch.tensor(self.labels[idx], dtype=ltype).repeat(label_rp)
    
    def __len__(self):
        return len(self.dataframe)

This function evaluates model.

In [12]:
def evaluate_model(loader, model, beta=1.0):
    print("Checking accuracy score on validation set.")
    # TODO: extend this so that we can print that we evaluate test set.
    num_correct = 0
    num_samples = 0
    log_sum = 0
    fbeta_sum = 0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device, dtype=torch.float)
            y = y.to(device=device, dtype=torch.long)
            y = torch.squeeze(y, 0)
            scores = model(torch.squeeze(x, 0))
            # scores means classfication class for each class. It should be the tensor with size of (Input size, Number of classes)

            # Checks naive accuracy.
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)

            # Checks Log Loss.
            log_loss = F.cross_entropy(scores, y)
            log_sum += log_loss.sum() * preds.size(0)

            # Checks Fbeta-score.
            fbeta = fbeta_score(preds, y, beta=beta)
            fbeta_sum += fbeta * preds.size(0)
            
            # Erase memory caches
            del x, y, scores, log_loss
        
        acc = float(num_correct) / num_samples
        log_score = log_sum / num_samples
        f_score = fbeta_sum / num_samples
        print('\nAccuracy: %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
        print('Log Loss score:\t%.2f' % (log_score))
        print('Fbeta-score (beta=%d): \t%.2f' % (beta , f_score))
    return acc, log_score, f_score

Let's train with model 2.

In [13]:
import os

# Print period for accuracy.
print_period = 1000

# Hyper-parameters for training 
birdclef_batch = 1
birdclef_epoch = 30

# Learning Rate.
learning_rate_2 = 0.0005
eta_min = 5e-6
momentum = 0.9

# L2 Regularization Hyperparamter
weight_decay_2 = 0.01

# Beta constant for Fbeta-score.
# If you want to give more weight to precision, use value smaller than 1.0.
# If you want to give more weight to recall, use value larger than 1.0.
beta = 1.0

# Prototype of model 2.
# ResNet50 outputs (Batchsize, 1000) tensor as output, so we reduce them to 397.
class Model2(nn.Module):
    def __init__(self, num_classes=397):
        super().__init__()
        self.resnet50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True).to(device)
        self.relu = nn.ReLU().to(device)
        self.linear = nn.Linear(1000, num_classes, bias=True).to(device)
    
    def forward (self, x):
        x = self.resnet50(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

cur_model = Model2()
print(cur_model)

def checkpoint(model, optimizer, scheduler, train_losses, val_losses, log_scores, fbeta_scores, epoch):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_losses': train_losses,
            'val_losses': val_losses,
            'log_scores': log_scores,
            'fbeta_scores': fbeta_scores,
            }, './bird_specificator.pt')

def train_model(model, train_loader, val_loader, optimizer, scheduler, beta=beta, epoch=birdclef_epoch, ckpt_enable=True):
    val_losses = []
    train_losses = []
    log_scores = []
    fbeta_scores = []
    
    loss = 0
    log_score = 0
    saved_epoch = -1
    
    # Load checkpoint
    if ckpt_enable and os.path.exists('./bird_specificator.pt'):
        ckpt = torch.load('./bird_specificator.pt')
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        scheduler.load_state_dict(ckpt['scheduler_state_dict'])
        train_losses = ckpt['train_losses']
        val_losses = ckpt['val_losses']
        log_scores = ckpt['log_scores']
        fbeta_scores = ckpt['fbeta_scores']
        saved_epoch = ckpt['epoch']
    
    for e in range(epoch):
        if e <= saved_epoch:
            continue
            
        epoch_losses = []
        print(f"Training model 2, epoch {e+1}")
        for index, (source, label) in enumerate(tqdm.tqdm(train_loader)):          
            x = source.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = label.to(device=device, dtype=torch.long)
            y = torch.squeeze(y, 0)

            scores = model(torch.squeeze(x, 0))
            loss = F.cross_entropy(scores, y) # Log loss for our project.

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())

            # every print_period, print loss.
            if index % print_period == 0:
                print('Iteration %d, loss = %.4f' % (index, loss.item()))
                
            # Erase memory caches
            del source, label, x, y, scores, loss
            
        # Cosine LR Annealing Scheduler
        scheduler.step()
        
        # Append the score to lists
        acc, log_score, fbeta_score = evaluate_model(val_loader, model, beta=beta)
        val_losses.append(acc)
        train_losses.append(torch.tensor(sum(epoch_losses) / len(epoch_losses), dtype=dtype))
        log_scores.append(log_score)
        fbeta_scores.append(fbeta_score)
        
        # every epoch, save the model
        checkpoint(model, optimizer, scheduler, train_losses, val_losses, log_scores, fbeta_scores, e)

    return model, val_losses, train_losses, log_scores, fbeta_scores

Using cache found in C:\Users\youngrae/.cache\torch\hub\pytorch_vision_v0.10.0


Model2(
  (resnet50): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          

In [13]:
import os

# Print period for accuracy.
print_period = 1000

# Hyper-parameters for training 
birdclef_batch = 1
birdclef_epoch = 30

# Learning Rate.
learning_rate_2 = 0.0005
eta_min = 5e-6
momentum = 0.9import os

# Print period for accuracy.
print_period = 1000

# Hyper-parameters for training 
birdclef_batch = 1
birdclef_epoch = 30

# Learning Rate.
learning_rate_2 = 0.0005
eta_min = 5e-6
momentum = 0.9

# L2 Regularization Hyperparamter
weight_decay_2 = 0.01

# Beta constant for Fbeta-score.
# If you want to give more weight to precision, use value smaller than 1.0.
# If you want to give more weight to recall, use value larger than 1.0.
beta = 1.0

# Prototype of model 2.
# ResNet50 outputs (Batchsize, 1000) tensor as output, so we reduce them to 397.
class Model2(nn.Module):
    def __init__(self, num_classes=397):
        super().__init__()
        self.resnet50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True).to(device)
        self.relu = nn.ReLU().to(device)
        self.linear = nn.Linear(1000, num_classes, bias=True).to(device)
    
    def forward (self, x):
        x = self.resnet50(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

cur_model = Model2()
print(cur_model)

def checkpoint(model, optimizer, scheduler, train_losses, val_losses, log_scores, fbeta_scores, epoch):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_losses': train_losses,
            'val_losses': val_losses,
            'log_scores': log_scores,
            'fbeta_scores': fbeta_scores,
            }, './bird_specificator.pt')

def train_model(model, train_loader, val_loader, optimizer, scheduler, beta=beta, epoch=birdclef_epoch, ckpt_enable=True):
    val_losses = []
    train_losses = []
    log_scores = []
    fbeta_scores = []
    
    loss = 0
    log_score = 0
    saved_epoch = -1
    
    # Load checkpoint
    if ckpt_enable and os.path.exists('./bird_specificator.pt'):
        ckpt = torch.load('./bird_specificator.pt')
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        scheduler.load_state_dict(ckpt['scheduler_state_dict'])
        train_losses = ckpt['train_losses']
        val_losses = ckpt['val_losses']
        log_scores = ckpt['log_scores']
        fbeta_scores = ckpt['fbeta_scores']
        saved_epoch = ckpt['epoch']
    
    for e in range(epoch):
        if e <= saved_epoch:
            continue
            
        epoch_losses = []
        print(f"Training model 2, epoch {e+1}")
        for index, (source, label) in enumerate(tqdm.tqdm(train_loader)):          
            x = source.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = label.to(device=device, dtype=torch.long)
            y = torch.squeeze(y, 0)

            scores = model(torch.squeeze(x, 0))
            loss = F.cross_entropy(scores, y) # Log loss for our project.

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())

            # every print_period, print loss.
            if index % print_period == 0:
                print('Iteration %d, loss = %.4f' % (index, loss.item()))
                
            # Erase memory caches
            del source, label, x, y, scores, loss
            
        # Cosine LR Annealing Scheduler
        scheduler.step()
        
        # Append the score to lists
        acc, log_score, fbeta_score = evaluate_model(val_loader, model, beta=beta)
        val_losses.append(acc)
        train_losses.append(torch.tensor(sum(epoch_losses) / len(epoch_losses), dtype=dtype))
        log_scores.append(log_score)
        fbeta_scores.append(fbeta_score)
        
        # every epoch, save the model
        checkpoint(model, optimizer, scheduler, train_losses, val_losses, log_scores, fbeta_scores, e)

    return model, val_losses, train_losses, log_scores, fbeta_scores

# L2 Regularization Hyperparamter
weight_decay_2 = 0.01

# Beta constant for Fbeta-score.
# If you want to give more weight to precision, use value smaller than 1.0.
# If you want to give more weight to recall, use value larger than 1.0.
beta = 1.0

# Prototype of model 2.
# ResNet50 outputs (Batchsize, 1000) tensor as output, so we reduce them to 397.
class Model2(nn.Module):
    def __init__(self, num_classes=397):
        super().__init__()
        self.resnet50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True).to(device)
        self.relu = nn.ReLU().to(device)
        self.linear = nn.Linear(1000, num_classes, bias=True).to(device)
    
    def forward (self, x):
        x = self.resnet50(x)
        x = self.relu(x)
        x = self.linear(x)
        return x

cur_model = Model2()
print(cur_model)

def checkpoint(model, optimizer, scheduler, train_losses, val_losses, log_scores, fbeta_scores, epoch):
    torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'train_losses': train_losses,
            'val_losses': val_losses,
            'log_scores': log_scores,
            'fbeta_scores': fbeta_scores,
            }, './bird_specificator.pt')

def train_model(model, train_loader, val_loader, optimizer, scheduler, beta=beta, epoch=birdclef_epoch, ckpt_enable=True):
    val_losses = []
    train_losses = []
    log_scores = []
    fbeta_scores = []
    
    loss = 0
    log_score = 0
    saved_epoch = -1
    
    # Load checkpoint
    if ckpt_enable and os.path.exists('./bird_specificator.pt'):
        ckpt = torch.load('./bird_specificator.pt')
        model.load_state_dict(ckpt['model_state_dict'])
        optimizer.load_state_dict(ckpt['optimizer_state_dict'])
        scheduler.load_state_dict(ckpt['scheduler_state_dict'])
        train_losses = ckpt['train_losses']
        val_losses = ckpt['val_losses']
        log_scores = ckpt['log_scores']
        fbeta_scores = ckpt['fbeta_scores']
        saved_epoch = ckpt['epoch']
    
    for e in range(epoch):
        if e <= saved_epoch:
            continue
            
        epoch_losses = []
        print(f"Training model 2, epoch {e+1}")
        for index, (source, label) in enumerate(tqdm.tqdm(train_loader)):          
            x = source.to(device=device, dtype=torch.float)  # move to device, e.g. GPU
            y = label.to(device=device, dtype=torch.long)
            y = torch.squeeze(y, 0)

            scores = model(torch.squeeze(x, 0))
            loss = F.cross_entropy(scores, y) # Log loss for our project.

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_losses.append(loss.item())

            # every print_period, print loss.
            if index % print_period == 0:
                print('Iteration %d, loss = %.4f' % (index, loss.item()))
                
            # Erase memory caches
            del source, label, x, y, scores, loss
            
        # Cosine LR Annealing Scheduler
        scheduler.step()
        
        # Append the score to lists
        acc, log_score, fbeta_score = evaluate_model(val_loader, model, beta=beta)
        val_losses.append(acc)
        train_losses.append(torch.tensor(sum(epoch_losses) / len(epoch_losses), dtype=dtype))
        log_scores.append(log_score)
        fbeta_scores.append(fbeta_score)
        
        # every epoch, save the model
        checkpoint(model, optimizer, scheduler, train_losses, val_losses, log_scores, fbeta_scores, e)

    return model, val_losses, train_losses, log_scores, fbeta_scores

Using cache found in C:\Users\youngrae/.cache\torch\hub\pytorch_vision_v0.10.0


Model2(
  (resnet50): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          

In [14]:
# Train loop for bird specificator
def bird_train(model, train_dataframe, val_dataframe, test_dataframe):  
    train_data = birdclef(train_dataframe, process='train', labels='label_id')
    val_data = birdclef(val_dataframe, process='valid', labels='label_id')
    test_data = birdclef(test_dataframe, process='valid', labels='label_id')
    
    # Construct data loader for train and validation
    train_loader = DataLoader(train_data, batch_size=birdclef_batch,
                             sampler=sampler.SubsetRandomSampler(range(len(train_dataframe))), 
                             drop_last=True)
    val_loader = DataLoader(val_data, batch_size=birdclef_batch,
                             sampler=sampler.SubsetRandomSampler(range(len(val_dataframe))),
                             drop_last=False)
    test_loader = DataLoader(test_data, batch_size=birdclef_batch,
                              sampler=sampler.SubsetRandomSampler(range(len(test_dataframe))),
                              drop_last=False)
    
    optimizer = optim.SGD(model.parameters(), lr=learning_rate_2, weight_decay=weight_decay_2, momentum=momentum)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=birdclef_epoch, eta_min=eta_min)
    trained_model, val_losses, train_losses, log_scores, fbeta_scores = train_model(
        model, train_loader, val_loader, optimizer, scheduler, beta=beta)
    test_acc, test_log_score, test_fbeta_score = evaluate_model(test_loader, trained_model, beta=beta)
    
    return val_losses, train_losses, log_scores, fbeta_scores, [test_acc, test_log_score, test_fbeta_score]

In [15]:
# Train, Validate and Test for nocall detector
def bird(model, dataframe, val_index, test_index):
    
    # Check that validation fold is not same as test fold
    assert val_index != test_index, \
        'Validation and test should be done on different fold.'
    print('Test set : %d, Validation set : %d' % (test_index, val_index))
    
    train_dataframe = dataframe.query(
        'fold != ' + str(val_index) + ' and fold != ' + str(test_index) 
    ).reset_index(drop=True)
    val_dataframe = dataframe.query(
        'fold == ' + str(val_index) 
    ).reset_index(drop=False)
    test_dataframe = dataframe.query(
        'fold == ' + str(test_index) 
    ).reset_index(drop=False)
    
    val_losses, train_losses, log_scores, fbeta_scores, test_scores = bird_train(
        model, train_dataframe, val_dataframe, test_dataframe)

    return val_losses, train_losses, log_scores, fbeta_scores, test_scores

In [16]:
val_losses, train_losses, log_scores, fbeta_scores, test_scores = \
    bird(cur_model, birdclef_dataframe, 1, 0)
del cur_model

# for i in range(2, 10):
#     cur_model = Model2()
#     cv_losses, ctr_losses, cl_scores, cf_scores, cte_scores = nocall(cur_model, birdclef_dataframe, i, 0)
#     val_losses = torch.mean(torch.stack((torch.tensor(val_losses), torch.tensor(cv_losses))), dim=0)
#     train_losses = torch.mean(torch.stack((torch.tensor(train_losses), torch.tensor(ctr_losses))), dim=0)
#     log_scores = torch.mean(torch.stack((torch.tensor(log_scores), torch.tensor(cl_scores))), dim=0)
#     fbeta_scores = torch.mean(torch.stack((torch.tensor(fbeta_scores), torch.tensor(cf_scores))), dim=0)
#     for j in range(len(test_scores)):
#         test_scores[j] = torch.mean(torch.stack((torch.tensor(test_scores[j]), torch.tensor(cte_scores[j]))), dim=0)

Test set : 0, Validation set : 1
Training model 2, epoch 1


  0%|                                                                             | 3/50298 [00:03<11:36:33,  1.20it/s]

Iteration 0, loss = 5.0807


  2%|█▌                                                                           | 1003/50298 [01:22<56:07, 14.64it/s]

Iteration 1000, loss = 5.0580


  4%|██▉                                                                        | 2003/50298 [02:41<1:08:04, 11.82it/s]

Iteration 2000, loss = 6.1730


  6%|████▌                                                                        | 3002/50298 [03:57<54:47, 14.39it/s]

Iteration 3000, loss = 4.9930


  8%|█████▉                                                                     | 4002/50298 [05:15<1:22:27,  9.36it/s]

Iteration 4000, loss = 6.0236


 10%|███████▋                                                                     | 5004/50298 [06:32<48:01, 15.72it/s]

Iteration 5000, loss = 5.0903


 12%|████████▉                                                                  | 6001/50298 [07:56<1:01:45, 11.95it/s]

Iteration 6000, loss = 5.4952


 14%|██████████▋                                                                  | 7002/50298 [09:17<49:39, 14.53it/s]

Iteration 7000, loss = 5.9457


 16%|████████████▎                                                                | 8003/50298 [10:38<51:43, 13.63it/s]

Iteration 8000, loss = 6.0523


 18%|█████████████▊                                                               | 9003/50298 [11:55<44:42, 15.39it/s]

Iteration 9000, loss = 5.1893


 20%|███████████████                                                             | 10004/50298 [13:11<38:02, 17.65it/s]

Iteration 10000, loss = 5.8120


 22%|████████████████▌                                                           | 11002/50298 [14:29<59:33, 11.00it/s]

Iteration 11000, loss = 5.8312


 24%|██████████████████▏                                                         | 12003/50298 [15:46<44:55, 14.21it/s]

Iteration 12000, loss = 5.9556


 26%|███████████████████▋                                                        | 13001/50298 [17:08<41:56, 14.82it/s]

Iteration 13000, loss = 5.6737


 28%|█████████████████████▏                                                      | 14004/50298 [18:27<39:57, 15.14it/s]

Iteration 14000, loss = 5.4792


 30%|██████████████████████▋                                                     | 15001/50298 [19:44<46:49, 12.56it/s]

Iteration 15000, loss = 6.3066


 32%|████████████████████████▏                                                   | 16002/50298 [21:01<35:42, 16.01it/s]

Iteration 16000, loss = 5.7630


 34%|█████████████████████████▋                                                  | 17002/50298 [22:16<43:40, 12.71it/s]

Iteration 17000, loss = 5.6808


 36%|███████████████████████████▏                                                | 18003/50298 [23:32<32:17, 16.67it/s]

Iteration 18000, loss = 5.6035


 38%|████████████████████████████▋                                               | 19001/50298 [24:50<55:17,  9.43it/s]

Iteration 19000, loss = 4.8548


 40%|██████████████████████████████▏                                             | 20002/50298 [26:09<56:51,  8.88it/s]

Iteration 20000, loss = 5.1444


 42%|███████████████████████████████▋                                            | 21001/50298 [27:26<36:55, 13.23it/s]

Iteration 21000, loss = 5.1617


 44%|█████████████████████████████████▏                                          | 22004/50298 [28:45<29:22, 16.06it/s]

Iteration 22000, loss = 6.4004


 46%|██████████████████████████████████▊                                         | 23002/50298 [30:02<38:40, 11.77it/s]

Iteration 23000, loss = 4.4676


 48%|████████████████████████████████████▎                                       | 24004/50298 [31:20<26:21, 16.62it/s]

Iteration 24000, loss = 5.9733


 50%|█████████████████████████████████████▊                                      | 25003/50298 [32:35<25:09, 16.76it/s]

Iteration 25000, loss = 4.7388


 52%|███████████████████████████████████████▎                                    | 26004/50298 [33:52<25:36, 15.81it/s]

Iteration 26000, loss = 6.7162


 54%|████████████████████████████████████████▊                                   | 27003/50298 [35:10<21:34, 18.00it/s]

Iteration 27000, loss = 4.9246


 56%|██████████████████████████████████████████▎                                 | 28002/50298 [36:29<33:08, 11.21it/s]

Iteration 28000, loss = 5.2906


 58%|███████████████████████████████████████████▊                                | 29004/50298 [37:45<23:24, 15.16it/s]

Iteration 29000, loss = 6.1588


 60%|█████████████████████████████████████████████▎                              | 30002/50298 [39:04<25:13, 13.41it/s]

Iteration 30000, loss = 4.7378


 62%|██████████████████████████████████████████████▊                             | 31003/50298 [40:19<21:23, 15.04it/s]

Iteration 31000, loss = 5.3060


 64%|████████████████████████████████████████████████▎                           | 32002/50298 [41:39<27:33, 11.06it/s]

Iteration 32000, loss = 4.9164


 66%|█████████████████████████████████████████████████▊                          | 32999/50298 [42:52<23:31, 12.26it/s]

Iteration 33000, loss = 6.4699


 68%|███████████████████████████████████████████████████▍                        | 34002/50298 [44:10<21:18, 12.74it/s]

Iteration 34000, loss = 5.1058


 70%|████████████████████████████████████████████████████▉                       | 35001/50298 [45:30<13:38, 18.69it/s]

Iteration 35000, loss = 6.2725


 72%|██████████████████████████████████████████████████████▍                     | 36003/50298 [46:48<16:16, 14.64it/s]

Iteration 36000, loss = 5.5534


 74%|███████████████████████████████████████████████████████▉                    | 37003/50298 [48:05<18:38, 11.89it/s]

Iteration 37000, loss = 5.8408


 76%|█████████████████████████████████████████████████████████▍                  | 38001/50298 [49:20<17:31, 11.69it/s]

Iteration 38000, loss = 5.7665


 78%|██████████████████████████████████████████████████████████▉                 | 39004/50298 [50:42<15:01, 12.52it/s]

Iteration 39000, loss = 5.4451


 80%|████████████████████████████████████████████████████████████▍               | 40001/50298 [52:00<11:12, 15.32it/s]

Iteration 40000, loss = 6.0831


 82%|█████████████████████████████████████████████████████████████▉              | 41001/50298 [53:19<11:41, 13.25it/s]

Iteration 41000, loss = 4.7075


 84%|███████████████████████████████████████████████████████████████▍            | 42004/50298 [54:40<10:59, 12.58it/s]

Iteration 42000, loss = 6.4617


 85%|████████████████████████████████████████████████████████████████▉           | 43003/50298 [55:58<08:56, 13.59it/s]

Iteration 43000, loss = 6.1013


 87%|██████████████████████████████████████████████████████████████████▍         | 44005/50298 [57:15<06:19, 16.58it/s]

Iteration 44000, loss = 4.6825


 89%|███████████████████████████████████████████████████████████████████▉        | 45000/50298 [58:28<04:59, 17.67it/s]

Iteration 45000, loss = 6.1110


 91%|█████████████████████████████████████████████████████████████████████▌      | 46004/50298 [59:43<05:40, 12.62it/s]

Iteration 46000, loss = 6.2475


 93%|█████████████████████████████████████████████████████████████████████▏    | 47003/50298 [1:01:02<03:32, 15.48it/s]

Iteration 47000, loss = 5.0043


 95%|██████████████████████████████████████████████████████████████████████▌   | 48001/50298 [1:02:21<04:37,  8.27it/s]

Iteration 48000, loss = 5.0773


 97%|████████████████████████████████████████████████████████████████████████  | 49004/50298 [1:03:40<01:35, 13.59it/s]

Iteration 49000, loss = 3.7761


 99%|█████████████████████████████████████████████████████████████████████████▌| 50005/50298 [1:04:59<00:17, 16.68it/s]

Iteration 50000, loss = 4.8926


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:05:21<00:00, 12.83it/s]


Checking accuracy score on validation set.

Accuracy: 1308 / 68541 correct (1.91)
Log Loss score:	5.63
Fbeta-score (beta=1): 	0.02
Training model 2, epoch 2


  0%|                                                                              | 1/50298 [00:00<2:10:46,  6.41it/s]

Iteration 0, loss = 5.7318


  2%|█▍                                                                         | 1003/50298 [01:17<1:03:19, 12.97it/s]

Iteration 1000, loss = 4.9051


  4%|███                                                                          | 2004/50298 [02:33<54:24, 14.79it/s]

Iteration 2000, loss = 4.9494


  6%|████▌                                                                        | 3003/50298 [03:46<47:35, 16.56it/s]

Iteration 3000, loss = 6.0778


  8%|██████▏                                                                      | 4004/50298 [05:03<41:48, 18.46it/s]

Iteration 4000, loss = 5.5069


 10%|███████▍                                                                   | 5003/50298 [06:22<1:01:47, 12.22it/s]

Iteration 5000, loss = 5.7196


 12%|████████▉                                                                  | 6001/50298 [07:37<1:21:04,  9.11it/s]

Iteration 6000, loss = 7.1453


 14%|██████████▋                                                                  | 7003/50298 [08:52<54:23, 13.27it/s]

Iteration 7000, loss = 4.9533


 16%|████████████▏                                                                | 8001/50298 [10:10<38:00, 18.54it/s]

Iteration 8000, loss = 5.7747


 18%|█████████████▊                                                               | 9003/50298 [11:25<38:41, 17.78it/s]

Iteration 9000, loss = 3.2910


 20%|███████████████                                                             | 10003/50298 [12:40<42:05, 15.96it/s]

Iteration 10000, loss = 5.3215


 22%|████████████████▋                                                           | 11003/50298 [13:58<45:00, 14.55it/s]

Iteration 11000, loss = 5.4751


 24%|██████████████████▏                                                         | 12004/50298 [15:14<42:37, 14.97it/s]

Iteration 12000, loss = 5.2197


 26%|███████████████████▋                                                        | 13003/50298 [16:33<31:27, 19.76it/s]

Iteration 13000, loss = 4.9625


 28%|█████████████████████▏                                                      | 14001/50298 [17:55<45:03, 13.42it/s]

Iteration 14000, loss = 3.3230


 30%|██████████████████████▋                                                     | 15000/50298 [19:13<31:23, 18.74it/s]

Iteration 15000, loss = 7.1487


 32%|████████████████████████▏                                                   | 16005/50298 [20:32<34:01, 16.80it/s]

Iteration 16000, loss = 3.7556


 34%|█████████████████████████▋                                                  | 17001/50298 [21:48<33:40, 16.48it/s]

Iteration 17000, loss = 5.7492


 36%|███████████████████████████▏                                                | 18003/50298 [23:07<42:51, 12.56it/s]

Iteration 18000, loss = 5.1749


 38%|████████████████████████████▋                                               | 19005/50298 [24:23<27:38, 18.87it/s]

Iteration 19000, loss = 4.2520


 40%|██████████████████████████████▏                                             | 20002/50298 [25:41<31:32, 16.01it/s]

Iteration 20000, loss = 5.1732


 42%|███████████████████████████████▋                                            | 21003/50298 [26:58<35:02, 13.93it/s]

Iteration 21000, loss = 4.1101


 44%|█████████████████████████████████▏                                          | 22003/50298 [28:16<35:10, 13.40it/s]

Iteration 22000, loss = 2.9235


 46%|██████████████████████████████████▊                                         | 23004/50298 [29:30<27:06, 16.78it/s]

Iteration 23000, loss = 4.8371


 48%|████████████████████████████████████▎                                       | 24003/50298 [30:45<24:38, 17.79it/s]

Iteration 24000, loss = 8.2511


 50%|█████████████████████████████████████▊                                      | 25004/50298 [32:03<25:45, 16.37it/s]

Iteration 25000, loss = 1.5027


 52%|███████████████████████████████████████▎                                    | 26002/50298 [33:20<29:50, 13.57it/s]

Iteration 26000, loss = 6.8796


 54%|████████████████████████████████████████▊                                   | 27000/50298 [34:36<34:43, 11.18it/s]

Iteration 27000, loss = 2.6378


 56%|██████████████████████████████████████████▎                                 | 28004/50298 [35:54<24:47, 14.99it/s]

Iteration 28000, loss = 3.6140


 58%|███████████████████████████████████████████▊                                | 29004/50298 [37:14<26:02, 13.63it/s]

Iteration 29000, loss = 4.3888


 60%|█████████████████████████████████████████████▎                              | 30003/50298 [38:32<22:32, 15.01it/s]

Iteration 30000, loss = 1.8385


 62%|██████████████████████████████████████████████▊                             | 31005/50298 [39:48<19:01, 16.90it/s]

Iteration 31000, loss = 3.5035


 64%|████████████████████████████████████████████████▎                           | 32002/50298 [41:05<24:21, 12.51it/s]

Iteration 32000, loss = 5.6518


 66%|█████████████████████████████████████████████████▊                          | 33002/50298 [42:24<22:57, 12.56it/s]

Iteration 33000, loss = 4.4160


 68%|███████████████████████████████████████████████████▍                        | 34002/50298 [43:42<22:45, 11.94it/s]

Iteration 34000, loss = 4.3373


 70%|████████████████████████████████████████████████████▉                       | 35003/50298 [45:01<23:37, 10.79it/s]

Iteration 35000, loss = 2.8830


 72%|██████████████████████████████████████████████████████▍                     | 36004/50298 [46:17<14:34, 16.35it/s]

Iteration 36000, loss = 5.8582


 74%|███████████████████████████████████████████████████████▉                    | 37002/50298 [47:34<18:43, 11.83it/s]

Iteration 37000, loss = 5.5422


 76%|█████████████████████████████████████████████████████████▍                  | 38004/50298 [48:52<17:49, 11.50it/s]

Iteration 38000, loss = 3.3720


 78%|██████████████████████████████████████████████████████████▉                 | 39004/50298 [50:10<13:38, 13.79it/s]

Iteration 39000, loss = 2.2397


 80%|████████████████████████████████████████████████████████████▍               | 40004/50298 [51:31<11:51, 14.47it/s]

Iteration 40000, loss = 4.8225


 82%|█████████████████████████████████████████████████████████████▉              | 41002/50298 [52:48<14:16, 10.85it/s]

Iteration 41000, loss = 4.5469


 84%|███████████████████████████████████████████████████████████████▍            | 42001/50298 [54:06<14:11,  9.75it/s]

Iteration 42000, loss = 6.2251


 85%|████████████████████████████████████████████████████████████████▉           | 43003/50298 [55:24<08:36, 14.13it/s]

Iteration 43000, loss = 5.2774


 87%|██████████████████████████████████████████████████████████████████▍         | 44002/50298 [56:45<06:09, 17.05it/s]

Iteration 44000, loss = 4.1523


 89%|███████████████████████████████████████████████████████████████████▉        | 45002/50298 [58:00<04:23, 20.11it/s]

Iteration 45000, loss = 5.3460


 91%|█████████████████████████████████████████████████████████████████████▌      | 46002/50298 [59:17<05:55, 12.07it/s]

Iteration 46000, loss = 4.9875


 93%|█████████████████████████████████████████████████████████████████████▏    | 47003/50298 [1:00:39<04:01, 13.65it/s]

Iteration 47000, loss = 2.8700


 95%|██████████████████████████████████████████████████████████████████████▌   | 48004/50298 [1:02:01<02:39, 14.34it/s]

Iteration 48000, loss = 3.0330


 97%|████████████████████████████████████████████████████████████████████████  | 49005/50298 [1:03:19<01:07, 19.25it/s]

Iteration 49000, loss = 3.8522


 99%|█████████████████████████████████████████████████████████████████████████▌| 50003/50298 [1:04:40<00:19, 14.81it/s]

Iteration 50000, loss = 2.3261


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:05:03<00:00, 12.88it/s]


Checking accuracy score on validation set.

Accuracy: 10523 / 68541 correct (15.35)
Log Loss score:	4.20
Fbeta-score (beta=1): 	0.15
Training model 2, epoch 3


  0%|                                                                              | 1/50298 [00:00<1:44:47,  8.00it/s]

Iteration 0, loss = 5.4943


  2%|█▌                                                                           | 1003/50298 [01:20<50:13, 16.36it/s]

Iteration 1000, loss = 4.7642


  4%|██▉                                                                        | 2002/50298 [02:37<1:02:56, 12.79it/s]

Iteration 2000, loss = 3.2638


  6%|████▌                                                                        | 3003/50298 [03:55<50:17, 15.67it/s]

Iteration 3000, loss = 2.8701


  8%|█████▉                                                                     | 4002/50298 [05:09<1:06:03, 11.68it/s]

Iteration 4000, loss = 2.3819


 10%|███████▍                                                                   | 5005/50298 [06:25<1:01:04, 12.36it/s]

Iteration 5000, loss = 3.3227


 12%|█████████▏                                                                   | 6003/50298 [07:41<49:59, 14.77it/s]

Iteration 6000, loss = 0.9499


 14%|██████████▋                                                                  | 7004/50298 [08:58<43:35, 16.55it/s]

Iteration 7000, loss = 3.3119


 16%|████████████▎                                                                | 8004/50298 [10:14<40:28, 17.42it/s]

Iteration 8000, loss = 3.2864


 18%|█████████████▊                                                               | 9002/50298 [11:28<54:39, 12.59it/s]

Iteration 9000, loss = 4.5732


 20%|███████████████                                                             | 10003/50298 [12:45<39:25, 17.04it/s]

Iteration 10000, loss = 6.4880


 22%|████████████████▋                                                           | 11004/50298 [14:02<42:20, 15.47it/s]

Iteration 11000, loss = 4.2054


 24%|██████████████████▏                                                         | 12003/50298 [15:17<50:08, 12.73it/s]

Iteration 12000, loss = 4.0290


 26%|███████████████████▋                                                        | 13001/50298 [16:32<42:33, 14.61it/s]

Iteration 13000, loss = 3.3019


 28%|█████████████████████▏                                                      | 14002/50298 [17:47<34:57, 17.30it/s]

Iteration 14000, loss = 6.0072


 30%|██████████████████████▋                                                     | 15002/50298 [19:05<42:04, 13.98it/s]

Iteration 15000, loss = 1.7280


 32%|████████████████████████▏                                                   | 16003/50298 [20:22<40:00, 14.29it/s]

Iteration 16000, loss = 3.0453


 34%|█████████████████████████▋                                                  | 17004/50298 [21:43<48:59, 11.33it/s]

Iteration 17000, loss = 1.0744


 36%|███████████████████████████▏                                                | 18002/50298 [23:00<45:13, 11.90it/s]

Iteration 18000, loss = 3.2140


 38%|████████████████████████████▋                                               | 19003/50298 [24:20<33:35, 15.52it/s]

Iteration 19000, loss = 4.7587


 40%|██████████████████████████████▏                                             | 20004/50298 [25:37<34:56, 14.45it/s]

Iteration 20000, loss = 6.2742


 42%|███████████████████████████████▋                                            | 21001/50298 [26:55<27:54, 17.50it/s]

Iteration 21000, loss = 4.2527


 44%|█████████████████████████████████▏                                          | 22003/50298 [28:13<28:19, 16.65it/s]

Iteration 22000, loss = 4.6558


 46%|██████████████████████████████████▊                                         | 23005/50298 [29:29<23:04, 19.72it/s]

Iteration 23000, loss = 1.8017


 48%|████████████████████████████████████▎                                       | 24000/50298 [30:45<42:08, 10.40it/s]

Iteration 24000, loss = 1.5859


 50%|█████████████████████████████████████▊                                      | 25005/50298 [32:03<24:37, 17.12it/s]

Iteration 25000, loss = 5.4696


 52%|███████████████████████████████████████▎                                    | 26003/50298 [33:20<28:23, 14.27it/s]

Iteration 26000, loss = 5.4874


 54%|████████████████████████████████████████▊                                   | 27002/50298 [34:36<26:40, 14.56it/s]

Iteration 27000, loss = 3.5786


 56%|██████████████████████████████████████████▎                                 | 28005/50298 [35:52<23:51, 15.57it/s]

Iteration 28000, loss = 3.4011


 58%|███████████████████████████████████████████▊                                | 29004/50298 [37:08<33:45, 10.51it/s]

Iteration 29000, loss = 4.7162


 60%|█████████████████████████████████████████████▎                              | 30004/50298 [38:21<21:03, 16.06it/s]

Iteration 30000, loss = 2.6812


 62%|██████████████████████████████████████████████▊                             | 31002/50298 [39:37<20:33, 15.65it/s]

Iteration 31000, loss = 3.3103


 64%|████████████████████████████████████████████████▎                           | 32003/50298 [40:57<29:43, 10.26it/s]

Iteration 32000, loss = 4.6047


 66%|█████████████████████████████████████████████████▊                          | 33003/50298 [42:12<16:58, 16.98it/s]

Iteration 33000, loss = 3.1961


 68%|███████████████████████████████████████████████████▍                        | 34002/50298 [43:29<24:35, 11.05it/s]

Iteration 34000, loss = 2.6256


 70%|████████████████████████████████████████████████████▉                       | 35003/50298 [44:40<17:38, 14.45it/s]

Iteration 35000, loss = 7.3188


 72%|██████████████████████████████████████████████████████▍                     | 36004/50298 [45:58<13:48, 17.26it/s]

Iteration 36000, loss = 1.1956


 74%|███████████████████████████████████████████████████████▉                    | 37003/50298 [47:15<13:57, 15.88it/s]

Iteration 37000, loss = 11.0780


 76%|█████████████████████████████████████████████████████████▍                  | 38003/50298 [48:33<16:10, 12.67it/s]

Iteration 38000, loss = 2.9027


 78%|██████████████████████████████████████████████████████████▉                 | 39004/50298 [49:51<11:46, 15.99it/s]

Iteration 39000, loss = 2.4134


 80%|████████████████████████████████████████████████████████████▍               | 40003/50298 [51:09<11:04, 15.49it/s]

Iteration 40000, loss = 2.1047


 82%|█████████████████████████████████████████████████████████████▉              | 41003/50298 [52:28<10:54, 14.21it/s]

Iteration 41000, loss = 0.6353


 84%|███████████████████████████████████████████████████████████████▍            | 42000/50298 [53:40<08:07, 17.03it/s]

Iteration 42000, loss = 2.9234


 85%|████████████████████████████████████████████████████████████████▉           | 43001/50298 [54:58<09:04, 13.41it/s]

Iteration 43000, loss = 4.6268


 87%|██████████████████████████████████████████████████████████████████▍         | 44002/50298 [56:15<06:18, 16.65it/s]

Iteration 44000, loss = 5.0926


 89%|███████████████████████████████████████████████████████████████████▉        | 45001/50298 [57:30<05:23, 16.37it/s]

Iteration 45000, loss = 5.7064


 91%|█████████████████████████████████████████████████████████████████████▌      | 46003/50298 [58:47<04:06, 17.43it/s]

Iteration 46000, loss = 1.7857


 93%|█████████████████████████████████████████████████████████████████████▏    | 47003/50298 [1:00:04<02:55, 18.76it/s]

Iteration 47000, loss = 4.3940


 95%|██████████████████████████████████████████████████████████████████████▌   | 48003/50298 [1:01:25<02:53, 13.26it/s]

Iteration 48000, loss = 2.4561


 97%|████████████████████████████████████████████████████████████████████████  | 49003/50298 [1:02:40<01:22, 15.74it/s]

Iteration 49000, loss = 2.2666


 99%|█████████████████████████████████████████████████████████████████████████▌| 50003/50298 [1:03:57<00:17, 16.74it/s]

Iteration 50000, loss = 5.7076


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:04:18<00:00, 13.04it/s]


Checking accuracy score on validation set.

Accuracy: 12742 / 68541 correct (18.59)
Log Loss score:	3.98
Fbeta-score (beta=1): 	0.19
Training model 2, epoch 4


  0%|                                                                              | 1/50298 [00:00<1:25:30,  9.80it/s]

Iteration 0, loss = 5.7103


  2%|█▌                                                                           | 1005/50298 [01:18<54:41, 15.02it/s]

Iteration 1000, loss = 5.4276


  4%|███                                                                          | 2004/50298 [02:36<53:41, 14.99it/s]

Iteration 2000, loss = 4.4895


  6%|████▍                                                                      | 3003/50298 [03:53<1:05:22, 12.06it/s]

Iteration 3000, loss = 3.8013


  8%|██████▏                                                                      | 4001/50298 [05:10<51:18, 15.04it/s]

Iteration 4000, loss = 1.5954


 10%|███████▋                                                                     | 5004/50298 [06:24<47:32, 15.88it/s]

Iteration 5000, loss = 3.4143


 12%|█████████▏                                                                   | 6004/50298 [07:40<56:42, 13.02it/s]

Iteration 6000, loss = 3.5727


 14%|██████████▋                                                                  | 7004/50298 [08:56<43:45, 16.49it/s]

Iteration 7000, loss = 3.8788


 16%|████████████▎                                                                | 8003/50298 [10:09<53:19, 13.22it/s]

Iteration 8000, loss = 3.4571


 18%|█████████████▊                                                               | 9003/50298 [11:28<47:37, 14.45it/s]

Iteration 9000, loss = 2.2086


 20%|███████████████                                                             | 10003/50298 [12:44<41:19, 16.25it/s]

Iteration 10000, loss = 4.3344


 22%|████████████████▋                                                           | 11004/50298 [14:00<35:39, 18.36it/s]

Iteration 11000, loss = 2.7558


 24%|██████████████████▏                                                         | 12003/50298 [15:15<49:01, 13.02it/s]

Iteration 12000, loss = 5.8889


 26%|███████████████████▋                                                        | 13003/50298 [16:33<51:40, 12.03it/s]

Iteration 13000, loss = 1.5658


 28%|█████████████████████▏                                                      | 14005/50298 [17:53<43:36, 13.87it/s]

Iteration 14000, loss = 3.3521


 30%|██████████████████████▋                                                     | 15004/50298 [19:05<36:51, 15.96it/s]

Iteration 15000, loss = 1.2985


 32%|████████████████████████▏                                                   | 16004/50298 [20:20<43:34, 13.12it/s]

Iteration 16000, loss = 4.5455


 34%|█████████████████████████▋                                                  | 17002/50298 [21:35<33:45, 16.44it/s]

Iteration 17000, loss = 3.9267


 36%|███████████████████████████▏                                                | 18001/50298 [22:50<33:44, 15.96it/s]

Iteration 18000, loss = 4.7957


 38%|████████████████████████████▋                                               | 19000/50298 [24:01<38:45, 13.46it/s]

Iteration 19000, loss = 0.5776


 40%|██████████████████████████████▏                                             | 20005/50298 [25:13<32:51, 15.37it/s]

Iteration 20000, loss = 3.0721


 42%|███████████████████████████████▋                                            | 21001/50298 [26:32<29:05, 16.78it/s]

Iteration 21000, loss = 3.9997


 44%|█████████████████████████████████▏                                          | 22005/50298 [27:44<24:28, 19.27it/s]

Iteration 22000, loss = 2.1324


 46%|██████████████████████████████████▊                                         | 23001/50298 [29:01<34:21, 13.24it/s]

Iteration 23000, loss = 4.7466


 48%|████████████████████████████████████▎                                       | 24002/50298 [30:20<31:32, 13.90it/s]

Iteration 24000, loss = 4.4031


 50%|█████████████████████████████████████▊                                      | 25003/50298 [31:38<34:46, 12.13it/s]

Iteration 25000, loss = 6.5676


 52%|███████████████████████████████████████▎                                    | 26002/50298 [32:55<26:01, 15.56it/s]

Iteration 26000, loss = 3.8599


 54%|████████████████████████████████████████▊                                   | 27004/50298 [34:12<23:44, 16.35it/s]

Iteration 27000, loss = 0.8329


 56%|██████████████████████████████████████████▎                                 | 28004/50298 [35:25<27:56, 13.30it/s]

Iteration 28000, loss = 0.3381


 58%|███████████████████████████████████████████▊                                | 29001/50298 [36:41<42:13,  8.41it/s]

Iteration 29000, loss = 3.8836


 60%|█████████████████████████████████████████████▎                              | 30002/50298 [37:56<24:58, 13.54it/s]

Iteration 30000, loss = 5.4665


 62%|██████████████████████████████████████████████▊                             | 31002/50298 [39:13<21:02, 15.29it/s]

Iteration 31000, loss = 3.0048


 64%|████████████████████████████████████████████████▎                           | 32002/50298 [40:33<25:10, 12.12it/s]

Iteration 32000, loss = 1.4860


 66%|█████████████████████████████████████████████████▊                          | 33003/50298 [41:48<19:19, 14.92it/s]

Iteration 33000, loss = 3.4610


 68%|███████████████████████████████████████████████████▍                        | 34004/50298 [43:03<17:51, 15.21it/s]

Iteration 34000, loss = 6.2246


 70%|████████████████████████████████████████████████████▉                       | 35002/50298 [44:16<17:54, 14.24it/s]

Iteration 35000, loss = 1.5060


 72%|██████████████████████████████████████████████████████▍                     | 36004/50298 [45:32<16:47, 14.19it/s]

Iteration 36000, loss = 4.5164


 74%|███████████████████████████████████████████████████████▉                    | 37003/50298 [46:47<17:20, 12.78it/s]

Iteration 37000, loss = 1.9380


 76%|█████████████████████████████████████████████████████████▍                  | 38002/50298 [48:01<16:59, 12.07it/s]

Iteration 38000, loss = 2.9373


 78%|██████████████████████████████████████████████████████████▉                 | 39004/50298 [49:17<10:51, 17.33it/s]

Iteration 39000, loss = 4.7636


 80%|████████████████████████████████████████████████████████████▍               | 40005/50298 [50:34<12:57, 13.25it/s]

Iteration 40000, loss = 4.6953


 82%|█████████████████████████████████████████████████████████████▉              | 41001/50298 [51:55<09:59, 15.51it/s]

Iteration 41000, loss = 5.6155


 84%|███████████████████████████████████████████████████████████████▍            | 42003/50298 [53:09<07:37, 18.12it/s]

Iteration 42000, loss = 0.0632


 85%|████████████████████████████████████████████████████████████████▉           | 43002/50298 [54:28<12:20,  9.85it/s]

Iteration 43000, loss = 2.8300


 87%|██████████████████████████████████████████████████████████████████▍         | 44001/50298 [55:44<06:11, 16.96it/s]

Iteration 44000, loss = 3.4977


 89%|███████████████████████████████████████████████████████████████████▉        | 45000/50298 [57:00<06:25, 13.75it/s]

Iteration 45000, loss = 4.0597


 91%|█████████████████████████████████████████████████████████████████████▌      | 46005/50298 [58:19<05:49, 12.30it/s]

Iteration 46000, loss = 3.7578


 93%|███████████████████████████████████████████████████████████████████████     | 47001/50298 [59:34<04:09, 13.20it/s]

Iteration 47000, loss = 3.6448


 95%|██████████████████████████████████████████████████████████████████████▌   | 48002/50298 [1:00:52<03:09, 12.13it/s]

Iteration 48000, loss = 4.1373


 97%|████████████████████████████████████████████████████████████████████████  | 49002/50298 [1:02:11<01:45, 12.24it/s]

Iteration 49000, loss = 9.5886


 99%|█████████████████████████████████████████████████████████████████████████▌| 50002/50298 [1:03:31<00:19, 15.54it/s]

Iteration 50000, loss = 5.2751


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:03:54<00:00, 13.12it/s]


Checking accuracy score on validation set.

Accuracy: 15010 / 68541 correct (21.90)
Log Loss score:	3.76
Fbeta-score (beta=1): 	0.22
Training model 2, epoch 5


  0%|                                                                              | 4/50298 [00:00<1:22:26, 10.17it/s]

Iteration 0, loss = 2.6340


  2%|█▌                                                                           | 1003/50298 [01:16<44:50, 18.32it/s]

Iteration 1000, loss = 1.7403


  4%|███                                                                          | 2004/50298 [02:31<46:57, 17.14it/s]

Iteration 2000, loss = 4.3664


  6%|████▍                                                                      | 3000/50298 [03:46<1:10:14, 11.22it/s]

Iteration 3000, loss = 2.1575


  8%|██████▏                                                                      | 4004/50298 [05:04<56:01, 13.77it/s]

Iteration 4000, loss = 4.0808


 10%|███████▋                                                                     | 5002/50298 [06:20<47:54, 15.76it/s]

Iteration 5000, loss = 4.1666


 12%|████████▉                                                                  | 6001/50298 [07:34<1:07:02, 11.01it/s]

Iteration 6000, loss = 1.8893


 14%|██████████▋                                                                  | 7002/50298 [08:51<48:13, 14.96it/s]

Iteration 7000, loss = 4.6677


 16%|████████████▎                                                                | 8002/50298 [10:07<50:10, 14.05it/s]

Iteration 8000, loss = 3.2329


 18%|█████████████▊                                                               | 9004/50298 [11:22<43:12, 15.93it/s]

Iteration 9000, loss = 3.7356


 20%|███████████████                                                             | 10005/50298 [12:40<37:53, 17.72it/s]

Iteration 10000, loss = 3.6877


 22%|████████████████▋                                                           | 11005/50298 [13:55<38:59, 16.80it/s]

Iteration 11000, loss = 2.8447


 24%|██████████████████▏                                                         | 12004/50298 [15:08<40:32, 15.74it/s]

Iteration 12000, loss = 4.6953


 26%|███████████████████▋                                                        | 13003/50298 [16:23<48:19, 12.86it/s]

Iteration 13000, loss = 2.4473


 28%|█████████████████████▏                                                      | 14004/50298 [17:41<39:39, 15.25it/s]

Iteration 14000, loss = 4.1516


 30%|██████████████████████▋                                                     | 15001/50298 [19:00<30:18, 19.41it/s]

Iteration 15000, loss = 4.8640


 32%|████████████████████████▏                                                   | 16004/50298 [20:16<30:56, 18.47it/s]

Iteration 16000, loss = 5.6969


 34%|█████████████████████████▋                                                  | 17002/50298 [21:33<47:57, 11.57it/s]

Iteration 17000, loss = 2.5257


 36%|███████████████████████████▏                                                | 18001/50298 [22:48<40:27, 13.31it/s]

Iteration 18000, loss = 3.3238


 38%|████████████████████████████▋                                               | 19003/50298 [24:05<33:35, 15.53it/s]

Iteration 19000, loss = 1.5316


 40%|██████████████████████████████▏                                             | 20002/50298 [25:23<37:15, 13.55it/s]

Iteration 20000, loss = 3.4713


 42%|███████████████████████████████▋                                            | 21001/50298 [26:41<43:31, 11.22it/s]

Iteration 21000, loss = 8.8761


 44%|█████████████████████████████████▏                                          | 22004/50298 [27:54<28:44, 16.41it/s]

Iteration 22000, loss = 3.9348


 46%|██████████████████████████████████▊                                         | 23005/50298 [29:09<31:22, 14.50it/s]

Iteration 23000, loss = 4.1486


 48%|████████████████████████████████████▎                                       | 24003/50298 [30:24<30:58, 14.15it/s]

Iteration 24000, loss = 5.9573


 50%|█████████████████████████████████████▊                                      | 25003/50298 [31:42<29:47, 14.15it/s]

Iteration 25000, loss = 2.9788


 52%|███████████████████████████████████████▎                                    | 26003/50298 [32:58<20:47, 19.47it/s]

Iteration 26000, loss = 2.4494


 54%|████████████████████████████████████████▊                                   | 27003/50298 [34:14<29:46, 13.04it/s]

Iteration 27000, loss = 3.9338


 56%|██████████████████████████████████████████▎                                 | 28006/50298 [35:32<22:14, 16.71it/s]

Iteration 28000, loss = 3.7931


 58%|███████████████████████████████████████████▊                                | 29002/50298 [36:49<33:41, 10.54it/s]

Iteration 29000, loss = 3.2660


 60%|█████████████████████████████████████████████▎                              | 30004/50298 [38:06<19:57, 16.94it/s]

Iteration 30000, loss = 0.6692


 62%|██████████████████████████████████████████████▊                             | 31004/50298 [39:24<24:30, 13.12it/s]

Iteration 31000, loss = 2.5975


 64%|████████████████████████████████████████████████▎                           | 32005/50298 [40:42<18:21, 16.61it/s]

Iteration 32000, loss = 3.9381


 66%|█████████████████████████████████████████████████▊                          | 33002/50298 [41:56<15:32, 18.56it/s]

Iteration 33000, loss = 3.8308


 68%|███████████████████████████████████████████████████▍                        | 34003/50298 [43:14<27:20,  9.93it/s]

Iteration 34000, loss = 5.5586


 70%|████████████████████████████████████████████████████▉                       | 35003/50298 [44:28<17:00, 14.98it/s]

Iteration 35000, loss = 2.6589


 72%|██████████████████████████████████████████████████████▍                     | 36003/50298 [45:43<16:09, 14.75it/s]

Iteration 36000, loss = 4.4213


 74%|███████████████████████████████████████████████████████▉                    | 37005/50298 [46:58<12:30, 17.72it/s]

Iteration 37000, loss = 4.0174


 76%|█████████████████████████████████████████████████████████▍                  | 38002/50298 [48:16<18:59, 10.79it/s]

Iteration 38000, loss = 5.8376


 78%|██████████████████████████████████████████████████████████▉                 | 39003/50298 [49:30<19:46,  9.52it/s]

Iteration 39000, loss = 2.4656


 80%|████████████████████████████████████████████████████████████▍               | 40003/50298 [50:45<10:42, 16.03it/s]

Iteration 40000, loss = 8.0086


 82%|█████████████████████████████████████████████████████████████▉              | 41002/50298 [52:00<10:04, 15.37it/s]

Iteration 41000, loss = 4.4586


 84%|███████████████████████████████████████████████████████████████▍            | 42003/50298 [53:18<09:31, 14.50it/s]

Iteration 42000, loss = 1.6002


 85%|████████████████████████████████████████████████████████████████▉           | 43003/50298 [54:33<07:33, 16.09it/s]

Iteration 43000, loss = 0.6859


 87%|██████████████████████████████████████████████████████████████████▍         | 44003/50298 [55:49<08:45, 11.98it/s]

Iteration 44000, loss = 0.1433


 89%|███████████████████████████████████████████████████████████████████▉        | 45002/50298 [57:08<05:54, 14.95it/s]

Iteration 45000, loss = 5.0357


 91%|█████████████████████████████████████████████████████████████████████▌      | 46004/50298 [58:24<05:11, 13.80it/s]

Iteration 46000, loss = 0.7802


 93%|███████████████████████████████████████████████████████████████████████     | 47004/50298 [59:39<03:59, 13.75it/s]

Iteration 47000, loss = 2.3771


 95%|██████████████████████████████████████████████████████████████████████▌   | 48004/50298 [1:00:53<02:43, 14.01it/s]

Iteration 48000, loss = 5.6697


 97%|████████████████████████████████████████████████████████████████████████  | 49001/50298 [1:02:12<01:34, 13.73it/s]

Iteration 49000, loss = 1.3736


 99%|█████████████████████████████████████████████████████████████████████████▌| 50003/50298 [1:03:30<00:20, 14.71it/s]

Iteration 50000, loss = 3.4783


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:03:55<00:00, 13.11it/s]


Checking accuracy score on validation set.

Accuracy: 18156 / 68541 correct (26.49)
Log Loss score:	3.57
Fbeta-score (beta=1): 	0.26
Training model 2, epoch 6


  0%|                                                                              | 1/50298 [00:00<1:52:18,  7.46it/s]

Iteration 0, loss = 2.5407


  2%|█▌                                                                           | 1003/50298 [01:16<54:11, 15.16it/s]

Iteration 1000, loss = 4.0164


  4%|███                                                                          | 2002/50298 [02:30<58:53, 13.67it/s]

Iteration 2000, loss = 5.9923


  6%|████▌                                                                        | 3000/50298 [03:48<52:36, 14.98it/s]

Iteration 3000, loss = 3.4079


  8%|██████▏                                                                      | 4005/50298 [05:07<45:18, 17.03it/s]

Iteration 4000, loss = 0.9646


 10%|███████▋                                                                     | 5003/50298 [06:21<47:13, 15.98it/s]

Iteration 5000, loss = 2.5966


 12%|█████████▏                                                                   | 6003/50298 [07:38<48:24, 15.25it/s]

Iteration 6000, loss = 3.1706


 14%|██████████▋                                                                  | 7003/50298 [08:53<57:21, 12.58it/s]

Iteration 7000, loss = 2.6457


 16%|████████████▎                                                                | 8004/50298 [10:08<55:22, 12.73it/s]

Iteration 8000, loss = 6.7203


 18%|█████████████▊                                                               | 9002/50298 [11:24<48:16, 14.26it/s]

Iteration 9000, loss = 4.7688


 20%|██████████████▋                                                           | 10002/50298 [12:40<1:08:41,  9.78it/s]

Iteration 10000, loss = 4.8595


 22%|████████████████▋                                                           | 11004/50298 [13:52<54:09, 12.09it/s]

Iteration 11000, loss = 6.3218


 24%|██████████████████▏                                                         | 12004/50298 [15:07<40:04, 15.93it/s]

Iteration 12000, loss = 6.7935


 26%|███████████████████▏                                                      | 13002/50298 [16:25<1:05:13,  9.53it/s]

Iteration 13000, loss = 6.3407


 28%|█████████████████████▏                                                      | 14002/50298 [17:37<46:46, 12.93it/s]

Iteration 14000, loss = 3.7620


 30%|██████████████████████▋                                                     | 15002/50298 [18:54<48:27, 12.14it/s]

Iteration 15000, loss = 6.1339


 32%|████████████████████████▏                                                   | 16002/50298 [20:13<44:07, 12.95it/s]

Iteration 16000, loss = 7.0383


 34%|█████████████████████████▋                                                  | 17004/50298 [21:29<31:16, 17.74it/s]

Iteration 17000, loss = 3.3828


 36%|███████████████████████████▏                                                | 18001/50298 [22:45<36:06, 14.90it/s]

Iteration 18000, loss = 2.0162


 38%|████████████████████████████▋                                               | 19004/50298 [24:03<42:30, 12.27it/s]

Iteration 19000, loss = 2.8377


 40%|██████████████████████████████▏                                             | 20002/50298 [25:15<33:56, 14.88it/s]

Iteration 20000, loss = 4.1681


 42%|███████████████████████████████▋                                            | 21002/50298 [26:31<58:53,  8.29it/s]

Iteration 21000, loss = 2.9310


 44%|█████████████████████████████████▏                                          | 22003/50298 [27:46<37:09, 12.69it/s]

Iteration 22000, loss = 0.0413


 46%|██████████████████████████████████▊                                         | 23004/50298 [28:58<28:12, 16.12it/s]

Iteration 23000, loss = 3.6943


 48%|████████████████████████████████████▎                                       | 24003/50298 [30:15<25:02, 17.50it/s]

Iteration 24000, loss = 4.3141


 50%|█████████████████████████████████████▊                                      | 25002/50298 [31:30<31:21, 13.44it/s]

Iteration 25000, loss = 4.4782


 52%|███████████████████████████████████████▎                                    | 26001/50298 [32:45<34:54, 11.60it/s]

Iteration 26000, loss = 2.6840


 54%|████████████████████████████████████████▊                                   | 27002/50298 [34:03<23:28, 16.54it/s]

Iteration 27000, loss = 0.7878


 56%|██████████████████████████████████████████▎                                 | 28004/50298 [35:19<23:14, 15.98it/s]

Iteration 28000, loss = 5.2266


 58%|███████████████████████████████████████████▊                                | 29002/50298 [36:36<29:45, 11.93it/s]

Iteration 29000, loss = 2.7201


 60%|█████████████████████████████████████████████▎                              | 30002/50298 [37:54<26:53, 12.58it/s]

Iteration 30000, loss = 4.6797


 62%|██████████████████████████████████████████████▊                             | 31003/50298 [39:12<27:28, 11.70it/s]

Iteration 31000, loss = 0.8367


 64%|████████████████████████████████████████████████▎                           | 32003/50298 [40:28<19:48, 15.40it/s]

Iteration 32000, loss = 2.5342


 66%|█████████████████████████████████████████████████▊                          | 33002/50298 [41:46<18:30, 15.58it/s]

Iteration 33000, loss = 5.2618


 68%|███████████████████████████████████████████████████▍                        | 34005/50298 [43:02<17:55, 15.15it/s]

Iteration 34000, loss = 4.0428


 70%|████████████████████████████████████████████████████▉                       | 35001/50298 [44:18<18:03, 14.12it/s]

Iteration 35000, loss = 1.3681


 72%|██████████████████████████████████████████████████████▍                     | 36002/50298 [45:38<28:04,  8.49it/s]

Iteration 36000, loss = 1.8364


 74%|███████████████████████████████████████████████████████▉                    | 37002/50298 [46:56<16:28, 13.45it/s]

Iteration 37000, loss = 0.3142


 76%|█████████████████████████████████████████████████████████▍                  | 38001/50298 [48:10<19:25, 10.55it/s]

Iteration 38000, loss = 2.9349


 78%|██████████████████████████████████████████████████████████▉                 | 39004/50298 [49:28<16:12, 11.61it/s]

Iteration 39000, loss = 2.4114


 80%|████████████████████████████████████████████████████████████▍               | 40005/50298 [50:44<10:15, 16.72it/s]

Iteration 40000, loss = 2.7340


 82%|█████████████████████████████████████████████████████████████▉              | 41002/50298 [52:00<11:06, 13.94it/s]

Iteration 41000, loss = 3.1848


 84%|███████████████████████████████████████████████████████████████▍            | 42003/50298 [53:18<11:19, 12.20it/s]

Iteration 42000, loss = 1.4949


 85%|████████████████████████████████████████████████████████████████▉           | 43001/50298 [54:37<08:53, 13.67it/s]

Iteration 43000, loss = 4.0795


 87%|██████████████████████████████████████████████████████████████████▍         | 44004/50298 [55:53<05:34, 18.81it/s]

Iteration 44000, loss = 2.8422


 89%|████████████████████████████████████████████████████████████████████        | 45004/50298 [57:10<05:08, 17.13it/s]

Iteration 45000, loss = 5.2635


 91%|█████████████████████████████████████████████████████████████████████▌      | 46004/50298 [58:24<05:33, 12.87it/s]

Iteration 46000, loss = 1.9687


 93%|███████████████████████████████████████████████████████████████████████     | 47002/50298 [59:39<07:00,  7.83it/s]

Iteration 47000, loss = 2.9830


 95%|██████████████████████████████████████████████████████████████████████▌   | 48004/50298 [1:01:01<02:25, 15.74it/s]

Iteration 48000, loss = 3.7020


 97%|████████████████████████████████████████████████████████████████████████  | 49004/50298 [1:02:20<01:35, 13.51it/s]

Iteration 49000, loss = 4.5644


 99%|█████████████████████████████████████████████████████████████████████████▌| 50003/50298 [1:03:38<00:17, 17.34it/s]

Iteration 50000, loss = 2.4324


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:03:58<00:00, 13.10it/s]


Checking accuracy score on validation set.

Accuracy: 18055 / 68541 correct (26.34)
Log Loss score:	3.51
Fbeta-score (beta=1): 	0.26
Training model 2, epoch 7


  0%|                                                                              | 1/50298 [00:00<1:30:32,  9.26it/s]

Iteration 0, loss = 1.4728


  2%|█▌                                                                           | 1003/50298 [01:14<59:33, 13.79it/s]

Iteration 1000, loss = 3.2380


  4%|███                                                                          | 2002/50298 [02:32<58:28, 13.76it/s]

Iteration 2000, loss = 6.7135


  6%|████▍                                                                      | 3001/50298 [03:47<1:01:07, 12.90it/s]

Iteration 3000, loss = 1.5284


  8%|██████▏                                                                      | 4003/50298 [05:03<44:26, 17.36it/s]

Iteration 4000, loss = 0.1254


 10%|███████▋                                                                     | 5004/50298 [06:18<55:27, 13.61it/s]

Iteration 5000, loss = 0.9843


 12%|█████████▏                                                                   | 6001/50298 [07:35<49:21, 14.96it/s]

Iteration 6000, loss = 2.2214


 14%|██████████▋                                                                  | 7004/50298 [08:53<46:27, 15.53it/s]

Iteration 7000, loss = 1.7763


 16%|████████████▎                                                                | 8004/50298 [10:09<48:16, 14.60it/s]

Iteration 8000, loss = 2.8375


 18%|█████████████▊                                                               | 9004/50298 [11:25<41:54, 16.42it/s]

Iteration 9000, loss = 5.8655


 20%|███████████████                                                             | 10002/50298 [12:38<57:32, 11.67it/s]

Iteration 10000, loss = 4.9669


 22%|████████████████▋                                                           | 11004/50298 [13:56<38:22, 17.06it/s]

Iteration 11000, loss = 3.5650


 24%|██████████████████▏                                                         | 12004/50298 [15:15<38:23, 16.63it/s]

Iteration 12000, loss = 4.5274


 26%|███████████████████▋                                                        | 13002/50298 [16:30<50:49, 12.23it/s]

Iteration 13000, loss = 4.4327


 28%|█████████████████████▏                                                      | 14004/50298 [17:46<32:06, 18.84it/s]

Iteration 14000, loss = 3.7467


 30%|██████████████████████▋                                                     | 15003/50298 [19:03<36:14, 16.23it/s]

Iteration 15000, loss = 2.7925


 32%|████████████████████████▏                                                   | 16004/50298 [20:19<45:31, 12.56it/s]

Iteration 16000, loss = 5.0288


 34%|█████████████████████████▋                                                  | 17002/50298 [21:36<59:43,  9.29it/s]

Iteration 17000, loss = 0.2655


 36%|███████████████████████████▏                                                | 18002/50298 [22:52<44:03, 12.22it/s]

Iteration 18000, loss = 4.2884


 38%|████████████████████████████▋                                               | 19002/50298 [24:10<40:07, 13.00it/s]

Iteration 19000, loss = 2.0315


 40%|██████████████████████████████▏                                             | 20003/50298 [25:26<29:32, 17.09it/s]

Iteration 20000, loss = 4.8078


 42%|███████████████████████████████▋                                            | 21002/50298 [26:44<35:59, 13.57it/s]

Iteration 21000, loss = 0.0213


 44%|█████████████████████████████████▏                                          | 22003/50298 [27:59<38:35, 12.22it/s]

Iteration 22000, loss = 1.9950


 46%|██████████████████████████████████▊                                         | 23003/50298 [29:15<33:47, 13.47it/s]

Iteration 23000, loss = 4.0371


 48%|████████████████████████████████████▎                                       | 24003/50298 [30:30<29:56, 14.63it/s]

Iteration 24000, loss = 5.8458


 50%|█████████████████████████████████████▊                                      | 25004/50298 [31:46<28:56, 14.57it/s]

Iteration 25000, loss = 5.7898


 52%|███████████████████████████████████████▎                                    | 26005/50298 [33:01<22:06, 18.31it/s]

Iteration 26000, loss = 3.2412


 54%|████████████████████████████████████████▊                                   | 27003/50298 [34:16<31:06, 12.48it/s]

Iteration 27000, loss = 0.1331


 56%|██████████████████████████████████████████▎                                 | 28002/50298 [35:36<25:48, 14.39it/s]

Iteration 28000, loss = 3.5891


 58%|███████████████████████████████████████████▊                                | 29004/50298 [36:51<22:46, 15.58it/s]

Iteration 29000, loss = 0.8721


 60%|█████████████████████████████████████████████▎                              | 30004/50298 [38:07<22:05, 15.31it/s]

Iteration 30000, loss = 1.9817


 62%|██████████████████████████████████████████████▊                             | 31002/50298 [39:28<26:21, 12.20it/s]

Iteration 31000, loss = 1.3022


 64%|████████████████████████████████████████████████▎                           | 32004/50298 [40:42<17:17, 17.64it/s]

Iteration 32000, loss = 3.9643


 66%|█████████████████████████████████████████████████▊                          | 33003/50298 [41:56<22:20, 12.90it/s]

Iteration 33000, loss = 4.1605


 68%|███████████████████████████████████████████████████▍                        | 34001/50298 [43:13<25:09, 10.79it/s]

Iteration 34000, loss = 2.7872


 70%|████████████████████████████████████████████████████▉                       | 35001/50298 [44:29<18:26, 13.82it/s]

Iteration 35000, loss = 3.4972


 72%|██████████████████████████████████████████████████████▍                     | 36001/50298 [45:46<17:10, 13.88it/s]

Iteration 36000, loss = 4.1650


 74%|███████████████████████████████████████████████████████▉                    | 37003/50298 [47:04<15:01, 14.75it/s]

Iteration 37000, loss = 1.1765


 76%|█████████████████████████████████████████████████████████▍                  | 38003/50298 [48:19<10:48, 18.95it/s]

Iteration 38000, loss = 5.1903


 78%|██████████████████████████████████████████████████████████▉                 | 38999/50298 [49:36<13:59, 13.45it/s]

Iteration 39000, loss = 0.3934


 80%|████████████████████████████████████████████████████████████▍               | 40004/50298 [50:52<09:33, 17.94it/s]

Iteration 40000, loss = 2.1191


 82%|█████████████████████████████████████████████████████████████▉              | 41003/50298 [52:10<08:44, 17.74it/s]

Iteration 41000, loss = 2.7411


 84%|███████████████████████████████████████████████████████████████▍            | 42002/50298 [53:26<13:04, 10.57it/s]

Iteration 42000, loss = 0.2176


 85%|████████████████████████████████████████████████████████████████▉           | 43004/50298 [54:43<08:00, 15.19it/s]

Iteration 43000, loss = 1.9299


 87%|██████████████████████████████████████████████████████████████████▍         | 44001/50298 [55:59<07:45, 13.53it/s]

Iteration 44000, loss = 1.4715


 89%|███████████████████████████████████████████████████████████████████▉        | 45003/50298 [57:17<05:49, 15.13it/s]

Iteration 45000, loss = 0.9382


 91%|█████████████████████████████████████████████████████████████████████▌      | 46002/50298 [58:32<05:32, 12.93it/s]

Iteration 46000, loss = 3.0204


 93%|███████████████████████████████████████████████████████████████████████     | 47004/50298 [59:47<04:13, 12.98it/s]

Iteration 47000, loss = 4.0872


 95%|██████████████████████████████████████████████████████████████████████▋   | 48005/50298 [1:01:05<02:15, 16.89it/s]

Iteration 48000, loss = 2.3344


 97%|████████████████████████████████████████████████████████████████████████  | 49001/50298 [1:02:20<01:31, 14.22it/s]

Iteration 49000, loss = 2.6626


 99%|█████████████████████████████████████████████████████████████████████████▌| 50002/50298 [1:03:37<00:23, 12.40it/s]

Iteration 50000, loss = 3.1892


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:04:00<00:00, 13.10it/s]


Checking accuracy score on validation set.

Accuracy: 20717 / 68541 correct (30.23)
Log Loss score:	3.32
Fbeta-score (beta=1): 	0.30
Training model 2, epoch 8


  0%|                                                                              | 1/50298 [00:00<1:29:41,  9.35it/s]

Iteration 0, loss = 0.9821


  2%|█▌                                                                           | 1004/50298 [01:14<52:59, 15.50it/s]

Iteration 1000, loss = 0.2274


  4%|███                                                                          | 2003/50298 [02:29<58:53, 13.67it/s]

Iteration 2000, loss = 2.1502


  6%|████▍                                                                      | 3002/50298 [03:46<1:16:45, 10.27it/s]

Iteration 3000, loss = 3.9319


  8%|██████▏                                                                      | 4004/50298 [05:06<50:47, 15.19it/s]

Iteration 4000, loss = 3.2339


 10%|███████▍                                                                   | 5002/50298 [06:20<1:01:26, 12.29it/s]

Iteration 5000, loss = 0.3466


 12%|████████▉                                                                  | 6003/50298 [07:37<1:00:10, 12.27it/s]

Iteration 6000, loss = 3.7419


 14%|██████████▋                                                                  | 7003/50298 [08:51<55:06, 13.10it/s]

Iteration 7000, loss = 3.9359


 16%|████████████▎                                                                | 8002/50298 [10:06<56:14, 12.53it/s]

Iteration 8000, loss = 1.7839


 18%|█████████████▊                                                               | 9002/50298 [11:24<46:52, 14.68it/s]

Iteration 9000, loss = 0.5078


 20%|███████████████                                                             | 10004/50298 [12:40<45:55, 14.62it/s]

Iteration 10000, loss = 3.9321


 22%|████████████████▋                                                           | 11003/50298 [13:54<41:54, 15.63it/s]

Iteration 11000, loss = 4.7745


 24%|██████████████████▏                                                         | 12002/50298 [15:09<38:08, 16.73it/s]

Iteration 12000, loss = 1.0808


 26%|███████████████████▋                                                        | 13002/50298 [16:25<37:26, 16.60it/s]

Iteration 13000, loss = 4.0401


 28%|█████████████████████▏                                                      | 14000/50298 [17:44<33:30, 18.06it/s]

Iteration 14000, loss = 2.9616


 30%|██████████████████████▋                                                     | 15003/50298 [18:58<44:51, 13.11it/s]

Iteration 15000, loss = 3.2120


 32%|████████████████████████▏                                                   | 16003/50298 [20:16<43:17, 13.20it/s]

Iteration 16000, loss = 1.9657


 34%|█████████████████████████▋                                                  | 17004/50298 [21:33<38:03, 14.58it/s]

Iteration 17000, loss = 4.5947


 36%|███████████████████████████▏                                                | 18004/50298 [22:50<36:56, 14.57it/s]

Iteration 18000, loss = 2.9747


 38%|████████████████████████████▋                                               | 19002/50298 [24:04<45:18, 11.51it/s]

Iteration 19000, loss = 3.0955


 40%|██████████████████████████████▏                                             | 20004/50298 [25:18<37:07, 13.60it/s]

Iteration 20000, loss = 5.1262


 42%|███████████████████████████████▋                                            | 21003/50298 [26:35<28:42, 17.00it/s]

Iteration 21000, loss = 5.7760


 44%|█████████████████████████████████▏                                          | 22004/50298 [27:50<29:43, 15.86it/s]

Iteration 22000, loss = 4.5202


 46%|██████████████████████████████████▊                                         | 23004/50298 [29:05<31:18, 14.53it/s]

Iteration 23000, loss = 4.8303


 48%|████████████████████████████████████▎                                       | 24002/50298 [30:17<34:12, 12.81it/s]

Iteration 24000, loss = 3.3279


 50%|█████████████████████████████████████▊                                      | 25004/50298 [31:34<23:04, 18.27it/s]

Iteration 25000, loss = 1.5470


 52%|███████████████████████████████████████▎                                    | 26003/50298 [32:47<24:17, 16.67it/s]

Iteration 26000, loss = 0.9950


 54%|████████████████████████████████████████▊                                   | 27004/50298 [34:03<22:11, 17.50it/s]

Iteration 27000, loss = 2.1075


 56%|██████████████████████████████████████████▎                                 | 28002/50298 [35:21<31:01, 11.98it/s]

Iteration 28000, loss = 4.6792


 58%|███████████████████████████████████████████▊                                | 29002/50298 [36:39<28:17, 12.55it/s]

Iteration 29000, loss = 5.3481


 60%|█████████████████████████████████████████████▎                              | 30001/50298 [37:55<20:18, 16.65it/s]

Iteration 30000, loss = 3.6715


 62%|██████████████████████████████████████████████▊                             | 31003/50298 [39:10<21:10, 15.19it/s]

Iteration 31000, loss = 6.4072


 64%|████████████████████████████████████████████████▎                           | 32003/50298 [40:28<20:01, 15.23it/s]

Iteration 32000, loss = 4.2906


 66%|█████████████████████████████████████████████████▊                          | 33001/50298 [41:45<37:33,  7.68it/s]

Iteration 33000, loss = 2.6875


 68%|███████████████████████████████████████████████████▍                        | 34002/50298 [43:03<16:11, 16.77it/s]

Iteration 34000, loss = 5.0339


 70%|████████████████████████████████████████████████████▉                       | 35004/50298 [44:19<19:41, 12.94it/s]

Iteration 35000, loss = 3.6383


 72%|██████████████████████████████████████████████████████▍                     | 36002/50298 [45:36<23:23, 10.19it/s]

Iteration 36000, loss = 3.6993


 74%|███████████████████████████████████████████████████████▉                    | 37004/50298 [46:51<12:09, 18.22it/s]

Iteration 37000, loss = 2.3768


 76%|█████████████████████████████████████████████████████████▍                  | 38004/50298 [48:08<16:20, 12.54it/s]

Iteration 38000, loss = 3.7244


 78%|██████████████████████████████████████████████████████████▉                 | 39003/50298 [49:24<11:01, 17.06it/s]

Iteration 39000, loss = 3.0602


 80%|████████████████████████████████████████████████████████████▍               | 40002/50298 [50:39<14:59, 11.45it/s]

Iteration 40000, loss = 1.4142


 82%|█████████████████████████████████████████████████████████████▉              | 41003/50298 [51:57<09:31, 16.26it/s]

Iteration 41000, loss = 1.6648


 84%|███████████████████████████████████████████████████████████████▍            | 42002/50298 [53:16<11:18, 12.23it/s]

Iteration 42000, loss = 3.4461


 85%|████████████████████████████████████████████████████████████████▉           | 43004/50298 [54:32<10:51, 11.19it/s]

Iteration 43000, loss = 2.5370


 87%|██████████████████████████████████████████████████████████████████▍         | 44003/50298 [55:47<06:38, 15.78it/s]

Iteration 44000, loss = 1.0059


 89%|███████████████████████████████████████████████████████████████████▉        | 45002/50298 [57:04<07:28, 11.81it/s]

Iteration 45000, loss = 2.2365


 91%|█████████████████████████████████████████████████████████████████████▌      | 46002/50298 [58:17<05:34, 12.84it/s]

Iteration 46000, loss = 4.0024


 93%|███████████████████████████████████████████████████████████████████████     | 47001/50298 [59:36<02:48, 19.59it/s]

Iteration 47000, loss = 2.5176


 95%|██████████████████████████████████████████████████████████████████████▌   | 48001/50298 [1:00:51<02:22, 16.15it/s]

Iteration 48000, loss = 1.6751


 97%|████████████████████████████████████████████████████████████████████████  | 49003/50298 [1:02:11<01:38, 13.17it/s]

Iteration 49000, loss = 5.4499


 99%|█████████████████████████████████████████████████████████████████████████▌| 50000/50298 [1:03:27<00:19, 15.02it/s]

Iteration 50000, loss = 0.5139


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:03:50<00:00, 13.13it/s]


Checking accuracy score on validation set.

Accuracy: 22252 / 68541 correct (32.47)
Log Loss score:	3.26
Fbeta-score (beta=1): 	0.32
Training model 2, epoch 9


  0%|                                                                              | 1/50298 [00:00<1:31:22,  9.17it/s]

Iteration 0, loss = 2.8620


  2%|█▍                                                                         | 1004/50298 [01:14<1:04:23, 12.76it/s]

Iteration 1000, loss = 4.5740


  4%|███                                                                          | 2003/50298 [02:29<53:04, 15.16it/s]

Iteration 2000, loss = 3.8643


  6%|████▌                                                                        | 3001/50298 [03:48<44:47, 17.60it/s]

Iteration 3000, loss = 4.9218


  8%|██████▏                                                                      | 4001/50298 [05:01<57:42, 13.37it/s]

Iteration 4000, loss = 4.8716


 10%|███████▋                                                                     | 5004/50298 [06:13<43:04, 17.52it/s]

Iteration 5000, loss = 1.2353


 12%|████████▉                                                                  | 6004/50298 [07:29<1:13:35, 10.03it/s]

Iteration 6000, loss = 4.8493


 14%|██████████▋                                                                  | 7003/50298 [08:51<56:53, 12.68it/s]

Iteration 7000, loss = 3.0371


 16%|████████████▎                                                                | 8004/50298 [10:08<42:30, 16.58it/s]

Iteration 8000, loss = 0.4251


 18%|█████████████▊                                                               | 9002/50298 [11:25<40:06, 17.16it/s]

Iteration 9000, loss = 1.7130


 20%|███████████████                                                             | 10001/50298 [12:42<37:59, 17.68it/s]

Iteration 10000, loss = 1.6562


 22%|████████████████▋                                                           | 11003/50298 [13:58<39:00, 16.79it/s]

Iteration 11000, loss = 0.4018


 24%|██████████████████▏                                                         | 12003/50298 [15:17<44:00, 14.50it/s]

Iteration 12000, loss = 4.6786


 26%|███████████████████▏                                                      | 13002/50298 [16:33<1:05:53,  9.43it/s]

Iteration 13000, loss = 3.2950


 28%|█████████████████████▏                                                      | 14002/50298 [17:51<48:16, 12.53it/s]

Iteration 14000, loss = 4.6934


 30%|██████████████████████▋                                                     | 15000/50298 [19:03<41:32, 14.16it/s]

Iteration 15000, loss = 3.9219


 32%|████████████████████████▏                                                   | 16003/50298 [20:22<41:58, 13.62it/s]

Iteration 16000, loss = 3.2706


 34%|█████████████████████████▋                                                  | 17003/50298 [21:38<52:51, 10.50it/s]

Iteration 17000, loss = 3.8434


 36%|███████████████████████████▏                                                | 18003/50298 [22:55<27:29, 19.58it/s]

Iteration 18000, loss = 7.1435


 38%|████████████████████████████▋                                               | 19004/50298 [24:09<32:12, 16.20it/s]

Iteration 19000, loss = 0.7153


 40%|██████████████████████████████▏                                             | 20003/50298 [25:26<50:35,  9.98it/s]

Iteration 20000, loss = 5.0010


 42%|███████████████████████████████▋                                            | 21003/50298 [26:45<38:34, 12.66it/s]

Iteration 21000, loss = 3.1118


 44%|█████████████████████████████████▏                                          | 22002/50298 [28:01<46:28, 10.15it/s]

Iteration 22000, loss = 3.5692


 46%|██████████████████████████████████▊                                         | 23001/50298 [29:16<33:02, 13.77it/s]

Iteration 23000, loss = 2.5576


 48%|████████████████████████████████████▎                                       | 24002/50298 [30:32<32:50, 13.34it/s]

Iteration 24000, loss = 0.0335


 50%|█████████████████████████████████████▊                                      | 25002/50298 [31:52<36:10, 11.65it/s]

Iteration 25000, loss = 4.5492


 52%|███████████████████████████████████████▎                                    | 26004/50298 [33:11<21:51, 18.52it/s]

Iteration 26000, loss = 3.8395


 54%|████████████████████████████████████████▊                                   | 27004/50298 [34:24<26:33, 14.62it/s]

Iteration 27000, loss = 1.0114


 56%|██████████████████████████████████████████▎                                 | 28003/50298 [35:40<32:12, 11.53it/s]

Iteration 28000, loss = 3.7503


 58%|███████████████████████████████████████████▊                                | 29002/50298 [36:54<22:44, 15.60it/s]

Iteration 29000, loss = 3.6204


 60%|█████████████████████████████████████████████▎                              | 30003/50298 [38:07<25:46, 13.13it/s]

Iteration 30000, loss = 2.6689


 62%|██████████████████████████████████████████████▊                             | 31002/50298 [39:23<22:44, 14.14it/s]

Iteration 31000, loss = 2.4967


 64%|████████████████████████████████████████████████▎                           | 32003/50298 [40:36<20:32, 14.85it/s]

Iteration 32000, loss = 4.6497


 66%|█████████████████████████████████████████████████▊                          | 33003/50298 [41:53<18:26, 15.63it/s]

Iteration 33000, loss = 6.3010


 68%|███████████████████████████████████████████████████▍                        | 34003/50298 [43:10<22:07, 12.27it/s]

Iteration 34000, loss = 1.1910


 70%|████████████████████████████████████████████████████▉                       | 35000/50298 [44:27<30:47,  8.28it/s]

Iteration 35000, loss = 1.2031


 72%|██████████████████████████████████████████████████████▍                     | 36004/50298 [45:43<12:54, 18.46it/s]

Iteration 36000, loss = 5.2549


 74%|███████████████████████████████████████████████████████▉                    | 37003/50298 [46:59<18:05, 12.25it/s]

Iteration 37000, loss = 1.7430


 76%|█████████████████████████████████████████████████████████▍                  | 38005/50298 [48:16<09:54, 20.67it/s]

Iteration 38000, loss = 3.2222


 78%|██████████████████████████████████████████████████████████▉                 | 39003/50298 [49:33<13:09, 14.31it/s]

Iteration 39000, loss = 5.6527


 80%|████████████████████████████████████████████████████████████▍               | 40001/50298 [50:48<09:54, 17.32it/s]

Iteration 40000, loss = 4.0968


 82%|█████████████████████████████████████████████████████████████▉              | 41002/50298 [52:03<11:54, 13.01it/s]

Iteration 41000, loss = 3.6067


 84%|███████████████████████████████████████████████████████████████▍            | 42003/50298 [53:16<10:24, 13.28it/s]

Iteration 42000, loss = 6.0276


 85%|████████████████████████████████████████████████████████████████▉           | 43004/50298 [54:31<07:49, 15.53it/s]

Iteration 43000, loss = 3.9069


 87%|██████████████████████████████████████████████████████████████████▍         | 44000/50298 [55:45<07:06, 14.77it/s]

Iteration 44000, loss = 2.2740


 89%|███████████████████████████████████████████████████████████████████▉        | 45002/50298 [57:02<06:46, 13.04it/s]

Iteration 45000, loss = 1.0099


 91%|█████████████████████████████████████████████████████████████████████▌      | 46001/50298 [58:19<04:22, 16.37it/s]

Iteration 46000, loss = 6.4013


 93%|███████████████████████████████████████████████████████████████████████     | 47004/50298 [59:37<03:21, 16.31it/s]

Iteration 47000, loss = 5.6156


 95%|██████████████████████████████████████████████████████████████████████▋   | 48005/50298 [1:00:54<02:05, 18.31it/s]

Iteration 48000, loss = 3.9415


 97%|████████████████████████████████████████████████████████████████████████  | 49003/50298 [1:02:14<01:29, 14.42it/s]

Iteration 49000, loss = 1.6471


 99%|█████████████████████████████████████████████████████████████████████████▌| 50002/50298 [1:03:29<00:19, 15.05it/s]

Iteration 50000, loss = 0.1814


100%|██████████████████████████████████████████████████████████████████████████| 50298/50298 [1:03:52<00:00, 13.13it/s]


Checking accuracy score on validation set.

Accuracy: 22234 / 68541 correct (32.44)
Log Loss score:	3.24
Fbeta-score (beta=1): 	0.32
Training model 2, epoch 10


  0%|                                                                              | 1/50298 [00:00<1:43:06,  8.13it/s]

Iteration 0, loss = 2.6270


  2%|█▌                                                                           | 1004/50298 [01:11<59:08, 13.89it/s]

Iteration 1000, loss = 1.0958


  4%|███                                                                          | 2002/50298 [02:33<51:59, 15.48it/s]

Iteration 2000, loss = 1.0909


  6%|████▍                                                                      | 3001/50298 [03:54<1:35:22,  8.27it/s]

Iteration 3000, loss = 2.2934


  8%|██████▏                                                                      | 4001/50298 [05:09<57:40, 13.38it/s]

Iteration 4000, loss = 7.1577


 10%|███████▋                                                                     | 5002/50298 [06:27<56:52, 13.27it/s]

Iteration 5000, loss = 6.0158


 12%|████████▉                                                                  | 6002/50298 [07:41<1:17:03,  9.58it/s]

Iteration 6000, loss = 1.4269


 14%|██████████▍                                                                | 7003/50298 [09:01<1:09:52, 10.33it/s]

Iteration 7000, loss = 3.5244


 16%|███████████▉                                                               | 8002/50298 [10:24<1:02:50, 11.22it/s]

Iteration 8000, loss = 2.5647


 18%|█████████████▊                                                               | 9005/50298 [11:37<42:10, 16.32it/s]

Iteration 9000, loss = 2.1125


 20%|██████████████▋                                                           | 10003/50298 [12:53<1:06:40, 10.07it/s]

Iteration 10000, loss = 7.1374


 22%|████████████████▏                                                         | 11002/50298 [14:14<1:02:11, 10.53it/s]

Iteration 11000, loss = 0.0485


 24%|█████████████████▋                                                        | 12003/50298 [15:38<1:10:02,  9.11it/s]

Iteration 12000, loss = 2.1811


 26%|███████████████████▋                                                        | 13003/50298 [17:02<35:40, 17.42it/s]

Iteration 13000, loss = 4.1895


 28%|█████████████████████▏                                                      | 14002/50298 [18:22<37:38, 16.07it/s]

Iteration 14000, loss = 4.4929


 30%|██████████████████████▋                                                     | 15003/50298 [19:45<46:29, 12.65it/s]

Iteration 15000, loss = 4.5328


 32%|████████████████████████▏                                                   | 16002/50298 [21:02<49:19, 11.59it/s]

Iteration 16000, loss = 1.8185


 34%|█████████████████████████▋                                                  | 17003/50298 [22:20<56:05,  9.89it/s]

Iteration 17000, loss = 1.8539


 36%|███████████████████████████▏                                                | 18004/50298 [23:36<36:05, 14.91it/s]

Iteration 18000, loss = 4.0813


 38%|████████████████████████████▋                                               | 19001/50298 [24:53<44:29, 11.72it/s]

Iteration 19000, loss = 0.8877


 40%|██████████████████████████████▏                                             | 20002/50298 [26:06<51:13,  9.86it/s]

Iteration 20000, loss = 2.2644


 42%|███████████████████████████████▋                                            | 21003/50298 [27:23<27:03, 18.04it/s]

Iteration 21000, loss = 2.8658


 44%|█████████████████████████████████▏                                          | 22002/50298 [28:41<32:25, 14.54it/s]

Iteration 22000, loss = 1.3089


 46%|██████████████████████████████████▊                                         | 23003/50298 [30:00<30:30, 14.91it/s]

Iteration 23000, loss = 4.7354


 48%|████████████████████████████████████▎                                       | 24004/50298 [31:19<36:28, 12.01it/s]

Iteration 24000, loss = 3.4747


 50%|█████████████████████████████████████▊                                      | 25003/50298 [32:38<33:29, 12.59it/s]

Iteration 25000, loss = 2.2214


 51%|██████████████████████████████████████▍                                     | 25461/50298 [33:15<32:26, 12.76it/s]


KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

train_losses = torch.tensor(train_losses, device = 'cpu')
plt.plot(np.arange(len(train_losses)), train_losses)
plt.title('Train Loss Curve of Bird Classificator')

In [None]:
val_losses = torch.tensor(val_losses, device = 'cpu')
plt.plot(np.arange(len(val_losses)), val_losses)
plt.title('Validation Accuracy Curve of Bird Classificator')

In [None]:
log_scores = torch.tensor(log_scores, device = 'cpu')
plt.plot(np.arange(len(log_scores)), log_scores)
plt.title('Log-score Curve of Bird Classificator')

In [None]:
fbeta_scores = torch.tensor(fbeta_scores, device = 'cpu')
plt.plot(np.arange(len(fbeta_scores)), fbeta_scores)
plt.title('F-beta score Curve of Bird Classificator')

In [None]:
print('\nAccuracy: %.2f' % (100 * test_scores[0]))
print('Log Loss score:\t%.2f' % (test_scores[1]))
print('Fbeta-score (beta=1.0): \t%.2f' % (test_scores[2]))