In [4]:
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import DataPreprocessor as dp
import pandas as pd
import time
import importlib

In [5]:
importlib.reload(dp)
interval = '1h'
start_date = "01 Jan, 2020"
end_date = "10 Jul, 2024"
symbol = "BTCUSDT"
prices = dp.get_close_price(symbol, interval, start_date, end_date)

[[1577836800000, '7195.24000000', '7196.25000000', '7175.46000000', '7177.02000000', '511.81490100', 1577840399999, '3675856.57948543', 7640, '226.15312600', '1624289.18965037', '0'], [1577840400000, '7176.47000000', '7230.00000000', '7175.71000000', '7216.27000000', '883.05260300', 1577843999999, '6365952.54111276', 9033, '570.72141900', '4114804.04969093', '0'], [1577844000000, '7215.52000000', '7244.87000000', '7211.41000000', '7242.85000000', '655.15680900', 1577847599999, '4736719.38819138', 7466, '357.18026300', '2582513.82508776', '0'], [1577847600000, '7242.66000000', '7245.00000000', '7220.00000000', '7225.01000000', '783.72486700', 1577851199999, '5667367.29300603', 8337, '394.81081100', '2854987.35529137', '0'], [1577851200000, '7225.00000000', '7230.00000000', '7215.03000000', '7217.27000000', '467.81257800', 1577854799999, '3379093.84979077', 5896, '243.51470600', '1759053.44836794', '0'], [1577854800000, '7217.26000000', '7229.76000000', '7216.65000000', '7224.21000000', 

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Maksimum sekans uzunluğu
max_seq_len = model.config.n_positions

# Tokenize edilmiş fiyat dizisini almak
encoded_prices = tokenizer.encode(" ".join([str(price) for price in prices]), return_tensors="pt")

# Verileri parçalara bölmek
chunks = [encoded_prices[:, i:i + max_seq_len] for i in range(0, encoded_prices.size(1), max_seq_len)]
last_chunk = chunks.pop()

In [None]:
# CUDA kullanarak modeli ve verileri GPU'ya taşıma
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Modeli eğitime hazırlama
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
model.resize_token_embeddings(len(tokenizer))

num_epochs = 3
for epoch in range(num_epochs):
    start_time = time.time()
    epoch_loss = 0
    for chunk in chunks:
        chunk = chunk.to(device)
        if chunk.size(1) <= max_seq_len:
            model.zero_grad()
            outputs = model(chunk, labels=chunk)
            loss = outputs.loss
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
    end_time = time.time()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(chunks)}, Time: {end_time - start_time:.2f} seconds")

torch.save(model.state_dict(), "llm_model.pth")
print("Model saved as llm_model.pth")

In [None]:
# Tahmin yapmak için son chunk'ı kullanmak
last_chunk = last_chunk.to(device)
if last_chunk.size(1) > max_seq_len:
    last_chunk = last_chunk[:, -max_seq_len:]
generated = model.generate(last_chunk, max_length=last_chunk.size(1) + 10, temperature=1.0, num_return_sequences=1)
generated_prices = tokenizer.decode(generated[0], skip_special_tokens=True).split()

# Sadece son chunk'ın tahminlerini almak
generated_prices = generated_prices[-10:]

# Grafikleme
plt.figure(figsize=(12, 6))
plt.plot(prices.index, prices, label="Historical Prices")
plt.plot(prices.index[-len(generated_prices):], [float(price) for price in generated_prices], "g^", label="Predicted Prices")
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.title(f"{symbol} - Historical and Predicted Stock Prices (GPT)")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()