In [1]:
import torch
from torch import cuda
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling, Trainer, TrainingArguments

from src import data
from src.DialoGPT.prepare_dataset import  create_context, ConversationDataset
from src.DialoGPT.conversation import chat_with_me

# Set parameters

In [2]:
SEED = 42
MODEL = "microsoft/DialoGPT-small"

NAME = "Iroh"
N = 7

TRAIN_SIZE = 0.8

TRAIN_BATCH = 4
EVAL_BATCH = 4
EPOCHS = 10
OUTPUT_DIR = "../outputs/DialoGPT"
OVERWRITE_OUTPUT_DIR = True
EVAL_STRATEGY="epoch"
LEARNING_RATE = 5e-5
LOAD_BEST_MODEL_AT_THE_END = True
PREDICTION_LOSS_ONLY = True

STEPS = 7

In [3]:
torch.manual_seed(SEED)
device = 'cuda' if cuda.is_available() else 'cpu'

# Load model and tokenizer

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenizer.pad_token = 0

model = AutoModelForCausalLM.from_pretrained(MODEL)
model.to(device)

# Read and reformat data, create train and eval split

In [None]:
train_size = TRAIN_SIZE

df = data.read_dataframe()
df = create_context(df, name=NAME, n=N)

train_dataset = df.sample(frac=train_size, random_state=SEED)
eval_dataset = df.drop(train_dataset.index).reset_index(drop = True)
train_dataset = train_dataset.reset_index(drop = True)

print(f"df shape: {df.shape}")
print(f"train shape: {train_dataset.shape}")
print(f"eval shape: {eval_dataset.shape}")

In [7]:
train_set = ConversationDataset(train_dataset, tokenizer)
eval_set = ConversationDataset(eval_dataset, tokenizer)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

# Fine-tune model (training)

In [8]:
args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    overwrite_output_dir=OVERWRITE_OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=TRAIN_BATCH,
    per_device_eval_batch_size=EVAL_BATCH,
    prediction_loss_only=PREDICTION_LOSS_ONLY,
    evaluation_strategy=EVAL_STRATEGY,
    save_strategy=EVAL_STRATEGY,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    load_best_model_at_end=LOAD_BEST_MODEL_AT_THE_END,
)

In [9]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_set,
    eval_dataset=eval_set,
    data_collator=data_collator,
)

In [None]:
trainer.train()

In [None]:
trainer.save_model(OUTPUT_DIR)

# Load saved model and chat

In [None]:
model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR)
tokenizer = AutoTokenizer.from_pretrained(MODEL)

In [None]:
chat_with_me(model, tokenizer, steps=STEPS)