In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments

In [None]:
# Load pre-trained DialoGPT model and tokenizer
model_name = "microsoft/DialoGPT-large"
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

In [None]:
# Load and preprocess your dataset
# Replace 'your_dataset.txt' with the path to your dataset
train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path='your_dataset.txt',
    block_size=128
)

In [None]:
# Define data collator
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Configure training arguments
training_args = TrainingArguments(
    output_dir="./dialoGPT-finetuned",
    overwrite_output_dir=True,
    num_train_epochs=1,
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2
)

In [None]:
# Create Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset
)


In [None]:
# Fine-tune the model
trainer.train()

In [None]:
# Save the fine-tuned model
model.save_pretrained("./dialoGPT-finetuned")
tokenizer.save_pretrained("./dialoGPT-finetuned")