In [None]:
!git clone https://github.com/wtaisner/atla-generator.git

Cloning into 'atla-generator'...
remote: Enumerating objects: 209, done.[K
remote: Counting objects: 100% (209/209), done.[K
remote: Compressing objects: 100% (133/133), done.[K
remote: Total 209 (delta 83), reused 155 (delta 47), pack-reused 0[K
Receiving objects: 100% (209/209), 3.43 MiB | 19.19 MiB/s, done.
Resolving deltas: 100% (83/83), done.


In [None]:
# uncomment this cell to save in drive
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.20.0-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 3.1 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 55.4 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 61.3 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 14.0 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling P

In [None]:
import importlib.util       
 
import_data = importlib.util.spec_from_file_location(
  "mod", "/content/atla-generator/src/data.py")   
data = importlib.util.module_from_spec(import_data)       
import_data.loader.exec_module(data)

import_dialoGPT = importlib.util.spec_from_file_location(
  "mod", "/content/atla-generator/src/DialoGPT.py")
dialoGPT = importlib.util.module_from_spec(import_dialoGPT)
import_dialoGPT.loader.exec_module(dialoGPT)

import_blenderbot = importlib.util.spec_from_file_location(
  "mod", "/content/atla-generator/src/blenderbot.py")
blenderbot = importlib.util.module_from_spec(import_blenderbot)
import_blenderbot.loader.exec_module(blenderbot)

In [None]:
import torch
from torch import cuda
from transformers import BlenderbotSmallTokenizer, BlenderbotSmallForConditionalGeneration, TrainingArguments, Trainer

# Set parameters

In [None]:
SEED = 42
MODEL = "facebook/blenderbot_small-90M"

NAME = "Aang"
N = 1

SRC_LEN = 512
RESP_LEN = 256

TRAIN_SIZE = 0.8

TRAIN_BATCH = 8
EVAL_BATCH = 8
TRAIN_EPOCHS = 5
VAL_EPOCHS = 1
OUTPUT_DIR = "/content/atla-generator/outputs/BlenderBot"
SAVE_DIR = f"/content/drive/MyDrive/outputs/blenderbot/{NAME}"
OVERWRITE_OUTPUT_DIR = True
EVAL_STRATEGY = "epoch"
LEARNING_RATE = 5e-5
LOAD_BEST_MODEL_AT_THE_END = True
PREDICTION_LOSS_ONLY = True

STEPS = 5

In [None]:
torch.manual_seed(SEED)
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
tokenizer = BlenderbotSmallTokenizer.from_pretrained(MODEL)
model = BlenderbotSmallForConditionalGeneration.from_pretrained(MODEL)
model = model.to(device)

Downloading:   0%|          | 0.00/941k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/337k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/205 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.47k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/334M [00:00<?, ?B/s]

In [None]:
df = data.read_dataframe('/content/atla-generator/data/avatar.csv')
df = dialoGPT.create_context(df, name=NAME, n=N)

train_dataset = df.sample(frac=TRAIN_SIZE, random_state=SEED)
eval_dataset = df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)


print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(eval_dataset.shape))

FULL Dataset: (1797, 2)
TRAIN Dataset: (1438, 2)
TEST Dataset: (359, 2)


In [None]:
training_set = blenderbot.CustomDataset(train_dataset, tokenizer, SRC_LEN, RESP_LEN)
val_set = blenderbot.CustomDataset(eval_dataset, tokenizer, SRC_LEN, RESP_LEN)

In [None]:
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    overwrite_output_dir=OVERWRITE_OUTPUT_DIR,
    num_train_epochs=TRAIN_EPOCHS,
    per_device_train_batch_size=TRAIN_BATCH,
    per_device_eval_batch_size=EVAL_BATCH,
    prediction_loss_only=PREDICTION_LOSS_ONLY,
    evaluation_strategy=EVAL_STRATEGY,
    save_strategy=EVAL_STRATEGY,
    seed=SEED,
    learning_rate=LEARNING_RATE,
    load_best_model_at_end=LOAD_BEST_MODEL_AT_THE_END,
)

In [None]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=training_set,
    eval_dataset=val_set,
)

In [None]:
trainer.train()

***** Running training *****
  Num examples = 1438
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 900


Epoch,Training Loss,Validation Loss
1,No log,3.252786
2,No log,3.266816
3,2.694900,3.365611
4,2.694900,3.515687
5,2.694900,3.595901


***** Running Evaluation *****
  Num examples = 359
  Batch size = 8
Saving model checkpoint to /content/atla-generator/outputs/BlenderBot/checkpoint-180
Configuration saved in /content/atla-generator/outputs/BlenderBot/checkpoint-180/config.json
Model weights saved in /content/atla-generator/outputs/BlenderBot/checkpoint-180/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 359
  Batch size = 8
Saving model checkpoint to /content/atla-generator/outputs/BlenderBot/checkpoint-360
Configuration saved in /content/atla-generator/outputs/BlenderBot/checkpoint-360/config.json
Model weights saved in /content/atla-generator/outputs/BlenderBot/checkpoint-360/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 359
  Batch size = 8
Saving model checkpoint to /content/atla-generator/outputs/BlenderBot/checkpoint-540
Configuration saved in /content/atla-generator/outputs/BlenderBot/checkpoint-540/config.json
Model weights saved in /content/atla-generator/outputs/Blende

TrainOutput(global_step=900, training_loss=2.1385196940104167, metrics={'train_runtime': 657.1343, 'train_samples_per_second': 10.941, 'train_steps_per_second': 1.37, 'total_flos': 1299934536007680.0, 'train_loss': 2.1385196940104167, 'epoch': 5.0})

In [None]:
trainer.save_model(SAVE_DIR) # "remember to download the model" ~ Anna Przybyłowska

Saving model checkpoint to /content/drive/MyDrive/outputs/blenderbot/Aang
Configuration saved in /content/drive/MyDrive/outputs/blenderbot/Aang/config.json
Model weights saved in /content/drive/MyDrive/outputs/blenderbot/Aang/pytorch_model.bin


# Perplexity

In [None]:
eval_results = trainer.evaluate(eval_dataset=val_set)
perplexity =  2 ** eval_results['eval_loss']
print(f"perplexity: {perplexity}")
eval_results

***** Running Evaluation *****
  Num examples = 359
  Batch size = 8


perplexity: 9.53204762385927


{'epoch': 5.0,
 'eval_loss': 3.252786159515381,
 'eval_runtime': 10.7848,
 'eval_samples_per_second': 33.288,
 'eval_steps_per_second': 4.173}

# Load saved model and chat

In [None]:
model = BlenderbotSmallForConditionalGeneration.from_pretrained(SAVE_DIR)
tokenizer = BlenderbotSmallTokenizer.from_pretrained(MODEL)

loading configuration file /content/drive/MyDrive/outputs/blenderbot/Aang/config.json
Model config BlenderbotSmallConfig {
  "_name_or_path": "facebook/blenderbot_small-90M",
  "activation_dropout": 0.0,
  "activation_function": "gelu",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "BlenderbotSmallForConditionalGeneration"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 512,
  "decoder_attention_heads": 16,
  "decoder_ffn_dim": 2048,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 8,
  "decoder_start_token_id": 1,
  "do_blenderbot_90_layernorm": true,
  "dropout": 0.1,
  "encoder_attention_heads": 16,
  "encoder_ffn_dim": 2048,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 8,
  "eos_token_id": 2,
  "extra_pos_embeddings": 0,
  "force_bos_token_to_be_generated": false,
  "forced_eos_token_id": 2,
  "gradient_checkpointing": false,
  "id2label": {
    "0": "LABEL_0",
    "1

In [None]:
blenderbot.chat_with_me(model, tokenizer, steps=STEPS, src_len=SRC_LEN)

>>User:how are you Aang?
Bot: , i'm doing great. it's the only way to be happy. what about you? how are you doing?
>>User:I'm trying to learn water bending
Bot: , i'm trying to master it. but i don't know how to do it. i've never been able to master any other elements.
>>User:but you are a great air bender
Bot: , i'm not. i don't know what to do. i've never been able to master the spirit world. i have no idea how to master it. but i'd like to learn it.
>>User:but you are the Avatar
Bot: , i'm not. i've never been in the avatar state before. but i'd like to see it. what about you? what do you mean by that?
>>User:you are our only hope, only you can save us from fire nation
Bot: , i don't know what to do. i'm not even sure if i'll be able to get out of the fire lord's palace. it's too dangerous. i can't see myself doing it. i need to find a way around it. but i've never been able to figure out how to do it. so, i guess i have no idea what i should do. maybe i just need to try it. and tha