In [41]:
# Text generation using GPT-2 model
# Load Libraries
from transformers import pipeline

# Load text generation pipeline with GPT-2
generator = pipeline("text-generation", model="gpt2")

# Prompt for the LLM
prompt = "BERT model is used for Classification"

# Generate text
output = generator(prompt, max_length=60, num_return_sequences=1)

print("Generated Text:\n", output[0]["generated_text"])

# Fine tuning
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import Dataset

class SimpleTextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=64):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

texts = [
    "I like this movie!",
    "This film was bad"]
labels = [1, 0]  # 1 = positive, 0 = negative

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

dataset = SimpleTextDataset(texts, labels, tokenizer)

training_args = TrainingArguments(
    output_dir="./bert-model",
    num_train_epochs=2,
    per_device_train_batch_size=2,
    logging_dir="./logs",
    logging_steps=1,
    overwrite_output_dir=True
)

trainer = Trainer(model=model,args=training_args,train_dataset=dataset)

trainer.train()