In [None]:
!pip install torch
!pip install transformers
!pip install pandas



In [None]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import TextDataset, DataCollatorForLanguageModeling
from torch.utils.data import DataLoader
from transformers import AdamW
from torch.cuda.amp import autocast, GradScaler

In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [None]:

model_name = "gpt2-medium"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)

In [None]:
file_path = "LLM-Sample-Input-File.csv"
train_data = TextDataset(
    tokenizer=tokenizer,
    file_path=file_path,
    block_size=128
)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)
train_dataloader = DataLoader(train_data, batch_size=4, shuffle=True)



In [None]:
optimizer = AdamW(model.parameters(), lr=5e-5)
scaler = GradScaler()

num_epochs = 1
accumulation_steps = 8

for epoch in range(num_epochs):
    for step, inputs in enumerate(train_dataloader):
        print(inputs.shape)
        inputs = inputs.to(device)

        labels = inputs.clone()

        with autocast():
            outputs = model(inputs, labels=labels)
            loss = outputs.loss / accumulation_steps

        scaler.scale(loss).backward()

        if (step + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()




torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([4, 128])
torch.Size([1, 128])


In [None]:

model.save_pretrained("./my_finetuned_model")
tokenizer.save_pretrained("./my_finetuned_model")


('./my_finetuned_model/tokenizer_config.json',
 './my_finetuned_model/special_tokens_map.json',
 './my_finetuned_model/vocab.json',
 './my_finetuned_model/merges.txt',
 './my_finetuned_model/added_tokens.json')

In [None]:
model_path = "./my_finetuned_model"
model = GPT2LMHeadModel.from_pretrained(model_path, pad_token_id=tokenizer.eos_token_id)
tokenizer = GPT2Tokenizer.from_pretrained(model_path)



In [None]:
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
import warnings
warnings.filterwarnings("ignore", message="`do_sample` is set to `False`. However, `top_p` is set to `0.95` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.")


In [None]:
def get_model_response(user_input):
    input_ids = tokenizer.encode(user_input, return_tensors="pt").to(device)
    attention_mask = torch.ones(input_ids.shape, device=device)

    with torch.no_grad():
        output = model.generate(input_ids, attention_mask=attention_mask, max_length=100, num_beams=5, no_repeat_ngram_size=2, top_k=50, top_p=0.95)

    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

In [None]:
print("Chatbot: Hi! I'm your chatbot. Type 'exit' to end the conversation.")

while True:
    user_input = input("You: ")

    if user_input.lower() == 'exit':
        print("Chatbot: Goodbye!")
        break

    model_response = get_model_response(user_input)
    print("Chatbot:", model_response)


Chatbot: Hi! I'm your chatbot. Type 'exit' to end the conversation.
You: How much revenue does Potato Inc. make from selling Smartphones
Chatbot: How much revenue does Potato Inc. make from selling Smartphones?

Potato Inc.'s revenue is based on the number of Smartphone units sold in the U.S. during the first quarter of 2013.
The company's revenue from the sale of smartphones is estimated to be between $1.5 million and $2 million per quarter, depending on how many units are sold and how long it takes for them to arrive in customers' hands. The company does not disclose how much it
You: How much revenue does Potato Inc. make from Japan
Chatbot: How much revenue does Potato Inc. make from Japan?

Potato Inc.'s total revenue in Japan is estimated to be around $1.5 billion. This figure is based on the assumption that the company will be able to sell its potato products in the country for at least one year, and that it will generate enough revenue to cover its operating expenses for the nex