In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
# Train ve Test veri setlerini okuma
train_data = pd.read_csv('DailyDelhiClimateTrain.csv')
test_data = pd.read_csv('DailyDelhiClimateTest.csv')

In [4]:
# Veriyi numpy dizilerine dönüştürme
train_np = train_data[['meantemp', 'humidity',
                       'wind_speed', 'meanpressure']].values.astype(np.float32)
test_np = test_data[['meantemp', 'humidity', 'wind_speed',
                     'meanpressure']].values.astype(np.float32)

array([[  10.       ,   84.5      ,    0.       , 1015.6667   ],
       [   7.4      ,   92.       ,    2.98     , 1017.8      ],
       [   7.1666665,   87.       ,    4.633333 , 1018.6667   ],
       ...,
       [  14.095238 ,   89.666664 ,    6.266667 , 1017.9048   ],
       [  15.052631 ,   87.       ,    7.325    , 1016.1      ],
       [  10.       ,  100.       ,    0.       , 1016.       ]],
      dtype=float32)

In [5]:
# Veriyi zaman serisi penceresi haline getirme fonksiyonu
def create_sequences(input_data, window_size):
    sequences = []
    for i in range(len(input_data) - window_size):
        sequence = input_data[i:i+window_size]
        sequences.append(sequence)
    return np.array(sequences)

In [6]:
# Zaman serisi penceresi boyutunu belirleme
window_size = 7  # Örneğin, bir haftalık pencere

In [7]:
# Train ve Test veri setlerini zaman serisi penceresi haline getirme
train_sequences = create_sequences(train_np, window_size)
test_sequences = create_sequences(test_np, window_size)

In [8]:
# Numpy dizilerini PyTorch Tensor'larına dönüştürme
train_sequences_tensor = torch.from_numpy(train_sequences)
test_sequences_tensor = torch.from_numpy(test_sequences)

In [9]:
# Tahmin edilecek hedef değişkeni oluşturma (meantemp)
train_targets_tensor = torch.from_numpy(
    train_data['meantemp'][window_size:].values.astype(np.float32))
test_targets_tensor = torch.from_numpy(
    test_data['meantemp'][window_size:].values.astype(np.float32))

In [10]:
# Veri yükleyici oluşturma
batch_size = 64
train_dataset = TensorDataset(train_sequences_tensor, train_targets_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_sequences_tensor, test_targets_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size,
                            num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(
            0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(
            0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])  # Son zaman adımı için sadece çıktıyı al
        return out

In [12]:
# Modelin parametrelerini tanımlama
# Veri setindeki özellik sayısı (meantemp, humidity, wind_speed, meanpressure)
input_size = 4
hidden_size = 64  # LSTM katmanının gizli birim sayısı
num_layers = 2  # LSTM katman sayısı
output_size = 1  # Tahmin edilecek hedef değişken (meantemp)

In [13]:
# LSTM modelini oluşturma
model = LSTM(input_size, hidden_size, num_layers, output_size)

In [14]:
# Eğitim için hata fonksiyonu ve optimizasyon algoritmasını tanımlama
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [15]:
# Eğitim döngüsü
num_epochs = 200
model.train()

LSTM(
  (lstm): LSTM(4, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
for epoch in range(num_epochs):
    # FILEPATH: /c:/Users/seyfullahkorkmaz/VsCodeProjects/climate-predict-lstm/main.ipynb
    for batch_inputs, batch_targets in train_loader:
        # Veriyi uygun cihaza taşıma
        batch_inputs, batch_targets = batch_inputs.to(
            device), batch_targets.to(device)

        # Modeli eğitme
        outputs = model(batch_inputs)
        loss = criterion(outputs, batch_targets.unsqueeze(1))

        # Geriye yayılım ve optimizasyon adımı
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Her epoch sonunda eğitim kaybını yazdırma
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/200], Loss: 535.2468
Epoch [2/200], Loss: 465.5289
Epoch [3/200], Loss: 338.2459
Epoch [4/200], Loss: 323.5674
Epoch [5/200], Loss: 227.4790
Epoch [6/200], Loss: 200.0331
Epoch [7/200], Loss: 200.6122
Epoch [8/200], Loss: 147.8798
Epoch [9/200], Loss: 132.5266
Epoch [10/200], Loss: 133.3691
Epoch [11/200], Loss: 120.7035
Epoch [12/200], Loss: 70.7717
Epoch [13/200], Loss: 73.3401
Epoch [14/200], Loss: 105.6875
Epoch [15/200], Loss: 66.7869
Epoch [16/200], Loss: 75.5688
Epoch [17/200], Loss: 62.8279
Epoch [18/200], Loss: 57.8318
Epoch [19/200], Loss: 62.4211
Epoch [20/200], Loss: 55.7328
Epoch [21/200], Loss: 54.9285
Epoch [22/200], Loss: 56.6852
Epoch [23/200], Loss: 57.3633
Epoch [24/200], Loss: 62.0987
Epoch [25/200], Loss: 56.8657
Epoch [26/200], Loss: 55.7242
Epoch [27/200], Loss: 47.4290
Epoch [28/200], Loss: 58.5065
Epoch [29/200], Loss: 35.5683
Epoch [30/200], Loss: 23.2174
Epoch [31/200], Loss: 32.5303
Epoch [32/200], Loss: 14.0134
Epoch [33/200], Loss: 12.8828
Epoch [

In [17]:
# Modeli değerlendirme (test veri seti üzerinde)
model.eval()
test_loss = 0
all_targets = []
all_predictions = []

with torch.no_grad():
    for batch_inputs, batch_targets in test_loader:
        batch_inputs, batch_targets = batch_inputs.to(
            device), batch_targets.to(device)
        outputs = model(batch_inputs)
        test_loss += criterion(outputs, batch_targets.unsqueeze(1)).item()
        all_targets.extend(batch_targets.cpu().numpy())
        all_predictions.extend(outputs.cpu().numpy())

In [18]:
# Test veri seti üzerindeki ortalama kaybı hesaplama
mean_test_loss = test_loss / len(test_loader)
print(f'Test Loss: {mean_test_loss:.4f}')

# Mean Squared Error ve R^2 Score hesaplama
mse = mean_squared_error(all_targets, all_predictions)
r2 = r2_score(all_targets, all_predictions)

print(f'Mean Squared Error: {mse:.4f}')
print(f'R^2 Score: {r2:.4f}')

Test Loss: 4.4735
Mean Squared Error: 4.7441
R^2 Score: 0.8852


In [19]:
print(all_targets[:5])
print(all_predictions[:5])

[15.684211, 14.571428, 12.111111, 11.0, 11.789474]
[array([28.027569], dtype=float32), array([15.272582], dtype=float32), array([14.947316], dtype=float32), array([14.326501], dtype=float32), array([13.310998], dtype=float32)]
