In [15]:
import os
import torch
import numpy as np
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AdamW
from torch.utils.data import DataLoader, Dataset



In [16]:
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # Enable more detailed stack trace for debugging




In [17]:
# GPT-2 tokenizer download
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Setting a custom token for padding
# Dataset (example structure)
class CustomDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=512):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encodings['input_ids'].flatten(),
            'attention_mask': encodings['attention_mask'].flatten(),
            'labels': encodings['input_ids'].flatten()
        }

# Examples of texts
texts = ["This is a test", "Another test sentence"]
dataset = CustomDataset(texts, tokenizer)
train_loader = DataLoader(dataset, batch_size=2)



In [23]:
# Loading the GPT-2 model
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Moving the model to the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Update embed size if special tokens are added
model.resize_token_embeddings(len(tokenizer))

# Optimizer settings
optimizer = AdamW(model.parameters(), lr=1e-5)


In [27]:
from transformers import AdamW
from torch.utils.data import DataLoader

# Function for training epoch
def train_epoch(model, dataloader, optimizer):
    model.train()
    total_loss = 0
    for batch in dataloader:
        optimizer.zero_grad()

        # Moving data to the GPU
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        # Foresight
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    return total_loss / len(dataloader)

# A function to evaluate the model
def evaluate(model, dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss

            total_loss += loss.item()
    return total_loss / len(dataloader)



In [28]:
# Function for training the model
def train_epoch(model, data_loader, optimizer):
    model = model.train()  # We transfer the model to the learning mode
    losses = []

    for batch in data_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        losses.append(loss.item())

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    return np.mean(losses)



In [29]:
# Model learning process
for epoch in range(3):  # You can increase the number of epochs for better results
    train_loss = train_epoch(model, train_loader, optimizer)
    print(f'Epoch {epoch + 1}, Loss: {train_loss}')




Epoch 1, Loss: 8.059285163879395
Epoch 2, Loss: 8.004773139953613
Epoch 3, Loss: 6.78363561630249


In [31]:
model.eval()  # Transferring the model to evaluation mode

# Text prompt
input_text = "The future of AI is"
input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
attention_mask = torch.ones(input_ids.shape, device=device)  # Creating attention mask

# Text generation
output = model.generate(
    input_ids,
    attention_mask=attention_mask,
    max_length=100,  # Increasing the length of the generated text
    num_return_sequences=1,
    do_sample=True,  # We use the exemplary method for generation
    top_k=50,  # We consider only the 50 most likely tokens
    top_p=0.95,  # We use top-p sampling (Nucleus Sampling)
    temperature=0.7  # "Temperature" for diversity management
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print(generated_text)



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The future of AI is not in the future, but in the future it will be.

The future of AI is not in the future, but in the future it will be.

The future of AI is not in the future, but in the future it will be.

The future of AI is not in the future, but in the future it will be.

The future of AI is not in the future, but in the future it will be.




In [32]:
input_texts = [
    "The future of AI is",
    "Artificial intelligence will",
    "In the next decade, AI",
]

for input_text in input_texts:
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    attention_mask = torch.ones(input_ids.shape, device=device)
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=100,
        num_return_sequences=1,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7
    )
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Input: {input_text}\nGenerated: {generated_text}\n")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: The future of AI is
Generated: The future of AI is unknown.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The Future of AI is Alive.

The



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: Artificial intelligence will
Generated: Artificial intelligence will become an integral part of our lives, but I believe that it is not the future and will not be the future. The future will be technology that will be able to tell us what is real, what is in our dreams, what is in our dreams, what we want to do and what we need to do, and what we need to learn.

The future will be the creation of a world with a human population that is capable of doing anything and everything in a

Input: In the next decade, AI
Generated: In the next decade, AI will become a powerful tool to drive innovation, develop new technologies and improve our lives. In our view, the next generation of AI will be a technological revolution in the field of AI research.

AI is the future of human-centered decision making. It will transform the way we think and act. It will transform the way we live, work and think. AI is the future of personal choice. It is the future of our lives.

The future of



In [34]:
model.eval()  # Transferring the model to evaluation mode

# Text prompts
input_texts = [
    "The future of AI is",
    "Artificial intelligence will",
    "In the next decade, AI",
]

# Text generation with various parameters
for input_text in input_texts:
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    attention_mask = torch.ones(input_ids.shape, device=device)
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=100,  # Increasing the length of the generated text
        num_return_sequences=1,
        do_sample=True,
        top_k=50,  # We consider the 50 most likely tokens
        top_p=0.95,  # Використовуємо top-p sampling
        temperature=0.7  # "Temperature" for diversity management
    )
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Input: {input_text}\nGenerated: {generated_text}\n")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: The future of AI is
Generated: The future of AI is very bright. In our next post, we'll look at some of the potential applications of AI in the future.

The future of AI is very bright. In our next post, we'll look at some of the potential applications of AI in the future. How will AI evolve in the coming years?

A major shift in the world of AI will be the introduction of artificial intelligence (AI). This is a new technology that will enable AI to better understand and



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: Artificial intelligence will
Generated: Artificial intelligence will be used to create all sorts of ways to improve our lives and our lives will never be the same. We will never be able to solve any real world problem, because we will never be able to solve any real problem.

So if you want to know what is going on right now, you're going to want to read about the technology.

The technology is very different from the technology that we're currently using. We're using the technology to create the worlds

Input: In the next decade, AI
Generated: In the next decade, AI will be the most advanced and powerful tool in the computing world.

"It's going to be very important to get the world to embrace this," said Todashek. "But we need to do it with a very high degree of confidence."



In [35]:
model.eval()  # Transferring the model to evaluation mode

# Text prompts
input_texts = [
    "The future of AI is",
    "Artificial intelligence will",
    "In the next decade, AI",
]

# Text generation with various parameters
for input_text in input_texts:
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    attention_mask = torch.ones(input_ids.shape, device=device)
    output = model.generate(
        input_ids,
        attention_mask=attention_mask,
        max_length=100,  # Increasing the length of the generated text
        num_return_sequences=1,
        do_sample=True,
        top_k=50,  # We consider the 50 most likely tokens
        top_p=0.95,  # We use top-p sampling
        temperature=0.7  # "Temperature" for diversity management
    )
    generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
    print(f"Input: {input_text}\nGenerated: {generated_text}\n")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: The future of AI is
Generated: The future of AI is uncertain. There is no guarantee that the technology will continue to evolve, but the potential of AI is still great. This is why I am confident that AI will continue to revolutionize the world.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Input: Artificial intelligence will
Generated: Artificial intelligence will probably improve in the next decade as the technology progresses. But it's going to take a lot more than just the advent of AI to push humanity to the edge.

"It's going to take more than just the advent of AI to push humanity to the edge," said Scott Fitch, a professor of computer science at the University of California, Berkeley. "It's going to take more than just the advent of AI to push humanity to the edge."

We

Input: In the next decade, AI
Generated: In the next decade, AI and other advanced technologies will continue to make their way to the computer, but we will also see that many people will be completely different from us. The next generation of AI will be more complicated than humans were ever before, and many of the concepts we have developed will be the same as those we have developed for humans.

The most important thing is to be as open and tolerant as possible about the way you approach AI. Th