In [2]:
"""
Links
  https://colab.research.google.com/drive/1pTuQhug6Dhl9XalKB0zUGf4FIdYFlpcX
"""
!pip install scikit-learn
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/9c/35/1c3f6e62d81f5f0daff1384e6d5e6c5758682a8357ebc765ece2b9def62b/transformers-3.0.0-py3-none-any.whl (754kB)
[K     |████████████████████████████████| 757kB 2.6MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 13.7MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB)
[K     |████████████████████████████████| 1.1MB 16.2MB/s 
Collecting tokenizers==0.8.0-rc4
[?25l  Downloading https://files.pythonhosted.org/packages/e8/bd/e5abec46af977c8a1375c1dca7cb1e5b3ec392ef279067af7f6bc50491a0/tokenizers-0.8.0rc4-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)
[K     |███

In [3]:
# Libraries and general settings
import datetime
from google.colab import drive
import numpy as np
import pandas as pd
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, fbeta_score
import time

import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data import TensorDataset, random_split
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup


pd.set_option("precision", 4)

drive.mount("/content/gdrive")

if torch.cuda.is_available():
    device = torch.device("cuda")
    print("%d GPU(s) available, namely %s" % (torch.cuda.device_count(), torch.cuda.get_device_name(0)))
else:
    device = torch.device("cpu")
    print("No GPU available, using CPU instead.")

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
1 GPU(s) available, namely Tesla K80


In [4]:
def get_accuracy_orig(labels, preds):
    """Calculate the accuracy of our predictions vs labels"""
    labels_flat = labels.flatten()
    pred_flat = np.argmax(preds, axis=1).flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)


def get_accuracy(labels, preds):
    """Calculate the accuracy of our predictions vs labels"""
    labels_flat = labels.flatten()
    pred_flat = np.argmax(preds, axis=1).flatten()
    accu = accuracy_score(labels_flat, pred_flat)
    return accu


def binary_clf_perf_scores(
    labels, preds, metrics: list = ["accuracy", "f1", "precision", "recall"], average: str = "binary"
):
    """This is a convenience function. Performance scores that are suited for a binary classification problem.
    Args:
        y_true: True y values as a 1D vector
        y_pred: Predicted y values as 1D vector
        metrics:
        average: Averaging scheme. Default is "binary"
    Returns:
        One single Pandas row with selected metrics
    """
    y_true = labels.flatten()
    y_pred = np.argmax(preds, axis=1).flatten()

    perf = {}
    if "accuracy" in metrics:
        perf["accuracy"] = accuracy_score(y_true, y_pred)

    if "precision" in metrics:
        perf["precision"] = precision_score(y_true, y_pred, average=average)

    if "recall" in metrics:
        perf["recall"] = recall_score(y_true, y_pred, average=average)

    if "f1" in metrics:
        perf["f1"] = fbeta_score(y_true, y_pred, beta=1, average=average)

    return perf


def format_time(elapsed):
    """Takes a time in seconds and returns a string hh:mm:ss"""
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))  # Format as hh:mm:ss

In [5]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True)  #

rawdata = pd.read_csv(
    "/content/gdrive/My Drive/data team/AI/projects/sensus/data/labels/deepsentiment - annotated customer reviews - punchh - 2018-master.tsv",
    delimiter="\t",
)
print("raw data has %d rows" % (len(rawdata)))

rawdata.sample(10)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…


raw data has 2700 rows


Unnamed: 0,source,message,guest_rating,feedback_id,business_id,created_at,customer_service,food_quality,app_program,ambience,waiting_time,polarity,conflict
797,2018-09,"Good so far, no rewards yet but works",,9987906.0,,,,,,,,,1.0
1381,2018-08,Awesome.,,8605778.0,396.0,2018-08-10 20:33:31.000,,1.0,,,,1.0,0.0
1475,2018-08,best place ever,5.0,8551876.0,680.0,2018-08-07 20:15:14.000,,,,1.0,,1.0,0.0
0,2018-09,Love you guys,5.0,8986609.0,452.0,2018-09-01 00:27:39.000,1.0,,,,,1.0,0.0
618,2018-09,Clean,,9799237.0,,,,,,1.0,,,0.0
367,2018-09,Great and outstanding!,,9345994.0,628.0,2018-09-20 20:40:40.000,1.0,,,,,1.0,0.0
778,2018-09,Store does t take debit cards,,9881900.0,,,0.0,,,,,,0.0
393,2018-09,Great lunch buffet for the price,1.0,9371069.0,435.0,2018-09-21 23:25:50.000,1.0,1.0,,,,1.0,0.0
573,2018-09,I love this place. The burnt ends is the best...,,4283073.0,,,1.0,1.0,,,,,
1680,2018-08,Thank you. Great food!,5.0,8840812.0,628.0,2018-08-24 03:09:57.000,,1.0,,,,1.0,0.0


In [35]:
# Get the lists of messages and their labels.
metric = "polarity"

data = rawdata.loc[pd.notna(rawdata[metric])].copy(deep=True)
data = data.loc[data[metric].isin([0, 1])]
print("Unique values: %s" % (set(data[metric])))
print("data has %d rows" % (len(data)))
print("data has %d one's and %d zero's" % (sum(data[metric] == 1), sum(data[metric] == 0)))

sentences = data["message"].values
labels = data[metric].astype(int).values

# Get the longest sentence
max_len = 0
for sent in sentences:
    # Tokenize the text and add `[CLS]` and `[SEP]` tokens.
    input_ids = tokenizer.encode(sent, add_special_tokens=True)
    max_len = max(max_len, len(input_ids))
print("Max sentence length: ", max_len)

data.sample(10)

Unique values: {0.0, 1.0}
data has 135 rows
data has 61 one's and 74 zero's
Max sentence length:  325


Unnamed: 0,source,message,guest_rating,feedback_id,business_id,created_at,customer_service,food_quality,app_program,ambience,waiting_time,polarity,conflict
1483,2018-08,Service is fast and food is excellent!,5.0,8565800.0,628.0,2018-08-08 17:41:05.000,,1.0,,,1.0,1.0,0.0
301,2018-09,I am always surprised by how poor the service ...,2.0,9287300.0,561.0,2018-09-17 21:10:28.000,0.0,,,,0.0,0.0,0.0
1948,2018-07,Good food and a bit too long of a wait.,4.0,8098500.0,628.0,2018-07-11 19:29:24.000,,1.0,,,0.0,0.0,1.0
1968,2018-07,Customer Service is seriously lacking. Had to ...,1.0,8116000.0,622.0,2018-07-12 21:10:23.000,0.0,0.0,,,0.0,0.0,0.0
1240,2018-09,I love the food and order was late。,,,,,,1.0,,,0.0,0.0,1.0
1821,2018-07,Great staff. No wait!,5.0,7953500.0,628.0,2018-07-02 02:21:00.000,1.0,,,,1.0,1.0,0.0
2574,2018-06,Quick and tastefully.,5.0,7808100.0,611.0,2018-06-22 17:44:51.000,1.0,1.0,,,1.0,1.0,0.0
868,2018-09,Chloe is awesome. I wish I could get my team ...,,9898300.0,,,1.0,,,,0.0,,1.0
2314,2018-06,Very slow service but the food was good!,,7550000.0,637.0,2018-06-05 15:32:25.000,0.0,1.0,,,0.0,0.0,0.0
877,2018-09,They left us waiting for a good 45min. Forgett...,,10005000.0,,,1.0,,,,0.0,,1.0


In [36]:
"""
Tokenize all of the sentences
"""
max_length = 128

# Tokenize all of the sentences and map the tokens to thier word ids
input_ids = []
attention_masks = []
for sent in sentences:
    # `encode_plus` will:
    #   (1) Tokenize the sentence.
    #   (2) Prepend the `[CLS]` token to the start.
    #   (3) Append the `[SEP]` token to the end.
    #   (4) Map tokens to their IDs.
    #   (5) Pad or truncate the sentence to `max_length`
    #   (6) Create attention masks for [PAD] tokens.
    encoded_dict = tokenizer.encode_plus(
        sent,
        add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
        max_length=max_length,  # Pad & truncate all sentences
        truncation=True,
        pad_to_max_length=True,
        return_attention_mask=True,
        return_tensors="pt",  # Return pytorch tensors
    )
    input_ids.append(encoded_dict["input_ids"])  # Add the encoded sentence to the list.
    attention_masks.append(encoded_dict["attention_mask"])  # And attn mask (differentiates padding from non-padding)

# Convert the lists into tensors
input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

# Print an example sentence as an example
for id in [0, 10, 100]:
    print("Original: ", sentences[id])
    print("Token IDs:", input_ids[id])

Original:  The service was fast and very friendly
Token IDs: tensor([ 101, 1996, 2326, 2001, 3435, 1998, 2200, 5379,  102,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0])
Original:  Quick, friendly and easy to order
Token IDs: tensor([ 101, 4248, 1010, 

In [37]:
# Create Train and test sets
dataset = TensorDataset(input_ids, attention_masks, labels)
train_size = int(0.9 * len(dataset))  # 90-10 train-validation split
val_size = len(dataset) - train_size
train_set, val_set = random_split(dataset, [train_size, val_size])

print("There are {:>5,} training samples".format(train_size))
print("There are {:>5,} validation samples".format(val_size))

There are   121 training samples
There are    14 validation samples


In [38]:
# For fine-tuning BERT on a specific task, the authors recommend a batch size of 16 or 32.
batch_size = 16

# Create the DataLoaders for our train and val sets
train_dataloader = DataLoader(
    train_set,  # The training samples.
    sampler=RandomSampler(train_set),  # select batches randomly
    batch_size=batch_size,  # Trains with this batch size.
)

validation_dataloader = DataLoader(
    val_set,  # The validation samples.
    sampler=SequentialSampler(val_set),  # Pull sequentially.
    batch_size=batch_size,  # Evaluate with this batch size.
)

In [39]:
# Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top.
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels=2,  # 2 for binary classification.
    output_attentions=False,  # Whether to returns attention weights.
    output_hidden_states=False,  # Whether to return all hidden states.
)
model.cuda()  # Tell PyTorch to run this model on the GPU

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [40]:
# Get all of the model's parameters as a list of tuples.
params = list(model.named_parameters())

print("The BERT model has {:} different named parameters.\n".format(len(params)))
print("==== Embedding Layer ====\n")
for p in params[0:5]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
print("\n==== First Transformer ====\n")

for p in params[5:21]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
print("\n==== Output Layer ====\n")
for p in params[-4:]:
    print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (30522, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

In [41]:
# Note: AdamW is a class from the huggingface library (as opposed to pytorch)
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(
    model.parameters(),
    lr=2e-5,  # args.learning_rate, default is 5e-5, our notebook had 2e-5
    eps=1e-8,  # args.adam_epsilon, default is 1e-8.
)
# The BERT authors recommend between 2 and 4
epochs = 4

# Total number of training steps is [number of batches] x [number of epochs].
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=total_steps  # Default value in run_glue.py
)

In [42]:
# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 40  # 42 is the original value

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# We'll store a number of quantities such as training and validation loss,
# validation accuracy, and timings.
training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()

# For each epoch...
for epoch_i in range(0, epochs):
    # ========================================
    #               Training
    # ========================================

    # Perform one full pass over the training set.

    print("")
    print("======== Epoch {:} / {:} ========".format(epoch_i + 1, epochs))
    print("Training...")

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_train_loss = 0

    # Put the model into training mode. Don't be mislead--the call to
    # `train` just changes the *mode*, it doesn't *perform* the training.
    # `dropout` and `batchnorm` layers behave differently during training
    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):
        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)

            # Report progress.
            print("  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.".format(step, len(train_dataloader), elapsed))

        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using the
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        # Always clear any previously calculated gradients before performing a
        # backward pass. PyTorch doesn't do this automatically because
        # accumulating the gradients is "convenient while training RNNs".
        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
        model.zero_grad()

        # Perform a forward pass (evaluate the model on this training batch).
        # The documentation for this `model` function is here:
        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
        # It returns different numbers of parameters depending on what arguments
        # arge given and what flags are set. For our useage here, it returns
        # the loss (because we provided labels) and the "logits"--the model
        # outputs prior to activation.
        loss, logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value
        # from the tensor.
        total_train_loss += loss.item()

        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)

    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables
    total_eval_accuracy, total_eval_precision, total_eval_recall, total_eval_f1_score = 0, 0, 0, 0
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:
        # Unpack this training batch from our dataloader.
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using
        # the `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids
        #   [1]: attention masks
        #   [2]: labels
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        # Tell pytorch not to bother with constructing the compute graph during
        # the forward pass, since this is only needed for backprop (training).
        with torch.no_grad():
            # Forward pass, calculate logit predictions.
            # token_type_ids is the same as the "segment ids", which
            # differentiates sentence 1 and 2 in 2-sentence tasks.
            # The documentation for this `model` function is here:
            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
            # Get the "logits" output by the model. The "logits" are the output
            # values prior to applying an activation function like the softmax.
            (loss, logits) = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)

        # Accumulate the validation loss.
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to("cpu").numpy()

        # Calculate the accuracy for this batch of test sentences, and
        # accumulate it over all batches.
        scores = binary_clf_perf_scores(label_ids, logits)

        total_eval_accuracy += scores["accuracy"]
        total_eval_precision += scores["precision"]
        total_eval_recall += scores["recall"]
        total_eval_f1_score += scores["f1"]

    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    avg_val_precision = total_eval_precision / len(validation_dataloader)
    avg_val_recall = total_eval_recall / len(validation_dataloader)
    avg_val_f1_score = total_eval_f1_score / len(validation_dataloader)

    avg_val_loss = total_eval_loss / len(validation_dataloader)
    validation_time = format_time(time.time() - t0)

    print("  Validation Accuracy: {0:.4f}".format(avg_val_accuracy))
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))
    training_stats.append(
        {
            "epoch": epoch_i + 1,
            "train_loss": avg_train_loss,
            "val_loss": avg_val_loss,
            "val_accuracy": avg_val_accuracy,
            "val_precision": avg_val_precision,
            "val_recall": avg_val_recall,
            "val_f1_score": avg_val_f1_score,
        }
    )
print("\nSuccessfully finished training")
print("Total training took {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))


Training...

  Average training loss: 0.74
  Training epcoh took: 0:00:05

Running Validation...
  Validation Accuracy: 0.3571
  Validation Loss: 0.75
  Validation took: 0:00:00

Training...

  Average training loss: 0.55
  Training epcoh took: 0:00:05

Running Validation...
  Validation Accuracy: 0.6429
  Validation Loss: 0.71
  Validation took: 0:00:00

Training...

  Average training loss: 0.50
  Training epcoh took: 0:00:05

Running Validation...
  Validation Accuracy: 0.6429
  Validation Loss: 0.67
  Validation took: 0:00:00

Training...

  Average training loss: 0.41
  Training epcoh took: 0:00:05

Running Validation...
  Validation Accuracy: 0.7143
  Validation Loss: 0.66
  Validation took: 0:00:00

Successfully finished training
Total training took 0:00:21 (h:mm:ss)


In [43]:
stats = pd.DataFrame(data=training_stats).set_index("epoch")
stats

Unnamed: 0_level_0,train_loss,val_loss,val_accuracy,val_precision,val_recall,val_f1_score
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0.7362,0.7501,0.3571,0.0,0.0,0.0
2,0.5477,0.713,0.6429,0.5,0.6,0.5455
3,0.4978,0.6717,0.6429,0.5,0.6,0.5455
4,0.4102,0.6587,0.7143,0.6,0.6,0.6
