In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/weatherww2/Summary of Weather.csv
/kaggle/input/weatherww2/Weather Station Locations.csv


In [4]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import Dataset, Subset, DataLoader
from torch.utils.tensorboard import SummaryWriter

class WeatherRNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(
            input_size=1,
            hidden_size=64,
            num_layers=2,
            batch_first=True,
            dropout=0.2
        )
        self.fc = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 5)  
        )

    def forward(self, x):
        out, _ = self.rnn(x)
        return self.fc(out[:, -1, :])

if __name__ == "__main__":
    df = pd.read_csv('/kaggle/input/weatherww2/Summary of Weather.csv',
                    usecols=['Date', 'MaxTemp'],
                    parse_dates=['Date'])

    df['MaxTemp'] = df['MaxTemp'].ffill().bfill()
    scaler = MinMaxScaler()
    data = scaler.fit_transform(df[['MaxTemp']])
    data = data.astype(np.float32)

    seq_len = 30
    pred_days = 5
    X, y = [], []
    for i in range(len(data)-seq_len-pred_days):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len:i+seq_len+pred_days].flatten())
    X, y = np.array(X), np.array(y)

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32)
    dataset = torch.utils.data.TensorDataset(X_tensor, y_tensor)
    split = int(0.8 * len(X))
    train_set, test_set = torch.utils.data.random_split(dataset, [split, len(X) - split])
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=64)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = WeatherRNN().to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    writer = SummaryWriter()

    num_epochs = 20
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_idx, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)  

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            total_loss += loss.item()
            writer.add_scalar('train loss', loss.item(), epoch * len(train_loader) + batch_idx)
            avg_train_loss = total_loss / len(train_loader)
            if batch_idx % 200 == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
                writer.add_scalar('avg train loss', avg_train_loss, epoch)
        model.eval()
        with torch.no_grad():
            val_loss = 0
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                val_loss += criterion(outputs, targets).item()
            avg_val = val_loss / len(test_loader)
            print(f'Epoch [{epoch + 1}/{num_epochs}], Val Loss: {avg_val:.4f}')  
            writer.add_scalar('val loss', avg_val, epoch)

    torch.save(model.state_dict(), 'weather_rnn.pth')
    writer.close()



Epoch [1/20], Loss: 0.5031
Epoch [1/20], Loss: 0.0129
Epoch [1/20], Loss: 0.0010
Epoch [1/20], Loss: 0.0010
Epoch [1/20], Loss: 0.0009
Epoch [1/20], Loss: 0.0020
Epoch [1/20], Loss: 0.0013
Epoch [1/20], Loss: 0.0034
Epoch [1/20], Val Loss: 0.0013
Epoch [2/20], Loss: 0.0008
Epoch [2/20], Loss: 0.0007
Epoch [2/20], Loss: 0.0028
Epoch [2/20], Loss: 0.0020
Epoch [2/20], Loss: 0.0008
Epoch [2/20], Loss: 0.0010
Epoch [2/20], Loss: 0.0014
Epoch [2/20], Loss: 0.0010
Epoch [2/20], Val Loss: 0.0013
Epoch [3/20], Loss: 0.0010
Epoch [3/20], Loss: 0.0038
Epoch [3/20], Loss: 0.0012
Epoch [3/20], Loss: 0.0009
Epoch [3/20], Loss: 0.0018
Epoch [3/20], Loss: 0.0015
Epoch [3/20], Loss: 0.0014
Epoch [3/20], Loss: 0.0020
Epoch [3/20], Val Loss: 0.0013
Epoch [4/20], Loss: 0.0010
Epoch [4/20], Loss: 0.0008
Epoch [4/20], Loss: 0.0007
Epoch [4/20], Loss: 0.0007
Epoch [4/20], Loss: 0.0010
Epoch [4/20], Loss: 0.0008
Epoch [4/20], Loss: 0.0009
Epoch [4/20], Loss: 0.0005
Epoch [4/20], Val Loss: 0.0014
Epoch [5/20]

In [10]:
from tensorboard import notebook
import os
print(os.listdir("/kaggle/working/runs"))

['Apr03_08-04-45_e51fdfc42c67', 'Apr03_08-05-59_e51fdfc42c67', 'Apr03_08-11-08_e51fdfc42c67']


In [11]:
%load_ext tensorboard


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [18]:
%reload_ext tensorboard

In [20]:
%tensorboard --logdir /kaggle/working/runs

Reusing TensorBoard on port 6006 (pid 91), started 0:10:21 ago. (Use '!kill 91' to kill it.)

<IPython.core.display.Javascript object>