# LSTM-Based Time Series with PyTorch
* https://www.youtube.com/watch?v=hIQLy5zCgH4&list=PLjy4p-07OYzuy_lHcRW8lPTLPTTOmUpmi&index=49

In [4]:
# Make use of a GPU or MPS (Apple) if one is available.  (see module 3.2)
import torch
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


# Simple LSTM Example
The following code creates the LSTM network, an example of an RNN for classification. The following code trains on a data set (x) with a max sequence size of 6 (columns) and six training elements (rows)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Data
max_features = 4
x_data = [
    [[0], [1], [1], [0], [0], [0]],
    [[0], [0], [0], [2], [2], [0]],
    [[0], [0], [0], [0], [3], [3]],
    [[0], [2], [2], [0], [0], [0]],
    [[0], [0], [3], [3], [0], [0]],
    [[0], [0], [0], [0], [1], [1]]
]

x = torch.tensor(x_data, dtype=torch.float32)
y = torch.tensor([1, 2, 3, 2, 3, 1], dtype=torch.int64) # yはラベル

In [None]:
print(x.shape)
print(y.shape)

torch.Size([6, 6, 1])
torch.Size([6])


In [None]:
x

tensor([[[0.],
         [1.],
         [1.],
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         [2.],
         [2.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         [0.],
         [3.],
         [3.]],

        [[0.],
         [2.],
         [2.],
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [3.],
         [3.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         [0.],
         [1.],
         [1.]]])

In [2]:
# Convert labels to one-hot encoding
y2 = torch.nn.functional.one_hot(y, num_classes=max_features).to(torch.float32)
print(y2)

tensor([[0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.],
        [0., 1., 0., 0.]])


In [None]:
print(y2.shape)

torch.Size([6, 4])


In [5]:
# Model using a sequence
class LSTMLayer(nn.Module):
  def __init__(self, input_size, hidden_size):
    super(LSTMLayer, self).__init__()
    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)

  def forward(self, x):
    out, _ = self.lstm(x)
    return out

model = nn.Sequential(
    LSTMLayer(input_size=1, hidden_size=128),
    nn.Dropout(p=0.2),
    nn.Flatten(),
    nn.Linear(128*6, 4),
    nn.Sigmoid()
)

# Check for GPU availability
model.to(device)
x, y2 = x.to(device), y2.to(device)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

In [6]:
# Train the model
print('Training...')
for epoch in range(100):
  optimizer.zero_grad()
  outputs = model(x)
  loss = criterion(outputs, y2)
  loss.backward()
  optimizer.step()
  if (epoch+1) % 10 == 0:
    print(f'Epoch [{epoch+1}/{100}], Loss: {loss.item():.4f}')

Training...
Epoch [10/100], Loss: 0.6274
Epoch [20/100], Loss: 0.5098
Epoch [30/100], Loss: 0.4709
Epoch [40/100], Loss: 0.4419
Epoch [50/100], Loss: 0.4007
Epoch [60/100], Loss: 0.3599
Epoch [70/100], Loss: 0.2869
Epoch [80/100], Loss: 0.2653
Epoch [90/100], Loss: 0.2108
Epoch [100/100], Loss: 0.1715


In [7]:
# Prediction
with torch.no_grad():
  outputs = model(x)
  predicted_classes = torch.argmax(outputs, dim=1)
  print(f"Predicted classes: {predicted_classes.cpu().numpy()}")
  print(f"Expected classes: {y.cpu().numpy()}")

Predicted classes: [1 3 3 2 3 1]
Expected classes: [1 2 3 2 3 1]


In [8]:
# outputsは、yの4つのラベルそれぞれの出現確率が各y_iごとに出力される
# それをargmax()で、それぞれのy_iの中で一番確率が高いラベルのインデックスを取り出している
outputs

tensor([[9.1494e-04, 9.7081e-01, 2.2289e-01, 3.3682e-03],
        [3.5775e-04, 3.2562e-02, 3.1259e-01, 3.6237e-01],
        [8.1840e-04, 1.3511e-01, 1.4179e-01, 6.3435e-01],
        [5.3967e-05, 3.6495e-02, 6.9720e-01, 1.9378e-02],
        [3.3712e-05, 1.5734e-04, 3.4892e-01, 8.7253e-01],
        [3.0585e-03, 8.7291e-01, 1.1263e-01, 7.5980e-02]])

In [9]:
outputs.shape

torch.Size([6, 4])

We can now present a sequence directly to the model for classification

In [None]:
def runit(model, inp):
  inp = torch.tensor(inp, dtype=torch.float32)
  inp = inp.to(device)
  with torch.no_grad():
    out = model(inp)
    return torch.argmax(out, dim=1).cpu().numpy()

print(runit(model, [[[0], [2], [0], [2], [0], [1]]]))

[2]


In [None]:
tmp = torch.tensor([[[0], [2], [0], [2], [0], [1]]], dtype=torch.float32)
tmp.shape

torch.Size([1, 6, 1])

# Sun spot Example
This section shows an example of RNN regression to predict sunspots.
<br><br>
We begin by loading and preparing data for the LSTM model. Next, we define a list of the column headers for a dataset. Following that, we read a CSV file from the given URL using the pd.read_csv. This dataset is sunspot activity. The CSV is provided by the USA government and has certain specifications:
* It uses a semicolon (;) as a separator.
* The dataset doesn't have a header, so the header=None argument ensures pandas doesn't mistakenly take the first row as column names. Instead, the predefined names list is used as the columns' header.
* Any value of '-1' in the dataset is considered as a missing value (na_values=['-1']).
* The dataset is read without setting an index column (index_col=False), meaning the default integer index will be used.
* Once executed, this code will load the specified dataset into a pandas DataFrame named df.

In [None]:
# Data Loading
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau

names = ['year', 'month', 'day', 'dec_year', 'sn_value',
         'sn_error', 'obs_num', 'unused1']
df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/SN_d_tot_V2.0.csv",
    sep=';', header=None, names=names,
    na_values=['-1'], index_col=False)

In [None]:
df.head()

Unnamed: 0,year,month,day,dec_year,sn_value,sn_error,obs_num,unused1
0,1818,1,1,1818.001,-1,,0,1
1,1818,1,2,1818.004,-1,,0,1
2,1818,1,3,1818.007,-1,,0,1
3,1818,1,4,1818.01,-1,,0,1
4,1818,1,5,1818.012,-1,,0,1


Next we perform data preprocessing tasks for the sunspot dataset. Initially, the code identifies the last occurrence where the 'obs_num' column has a value, this is to strip off incomplete data near the beginning of the file. The <b>sn_value</b> column of the dataframe is then converted into floating-point numbers, ensuring numerical computations are consistent. After this, the dataframe is split into two parts based on the 'year' column: any data prior to the year 2000 is assigned to df_train, and data from the year 2000 onwards is assigned to df_test. Subsequently, the sn_value column from both the training and testing dataframes is extracted, <b>transformed into numpy arrays, and reshaped to form 2D arrays with a single column</b>.
<br><br>
This restructuring is done to meet the input requirements of PyTorch. The final segment of the code focuses on data normalization. A StandardScaler is initialized, which is a tool to standardize data to have a mean of 0 and a standard deviation of 1. This scaler is trained on the training data and then used to normalize both the training and testing data. After normalization, both datasets are flattened and converted to lists, resulting in one-dimensional lists of normalized sn_value data for both training and testing purposes.

## Data Preprocessing

In [None]:
start_id = max(df[df['obs_num'] == 0].index.tolist()) + 1
df = df[start_id:].copy()
df['sn_value'] = df['sn_value'].astype(float)
df_train = df[df['year'] < 2000].copy()
df_test = df[df['year'] >= 2000].copy()

df_train.tail()

Unnamed: 0,year,month,day,dec_year,sn_value,sn_error,obs_num,unused1
66469,1999,12,27,1999.988,95.0,6.9,15,1
66470,1999,12,28,1999.99,85.0,7.0,9,1
66471,1999,12,29,1999.993,103.0,5.6,13,1
66472,1999,12,30,1999.996,66.0,6.0,11,1
66473,1999,12,31,1999.999,79.0,5.8,13,1


In [None]:
# sn_value列をxとして利用するので取り出して、次元をPytorch用に2次元に変換する
spots_train = df_train['sn_value'].to_numpy().reshape(-1, 1)
spots_test = df_test['sn_value'].to_numpy().reshape(-1, 1)

spots_train.shape

(55160, 1)

In [None]:
spots_train[:10]

array([[353.],
       [240.],
       [275.],
       [352.],
       [268.],
       [285.],
       [343.],
       [340.],
       [238.],
       [287.]])

In [None]:
# 標準化
scaler = StandardScaler()
spots_train = scaler.fit_transform(spots_train).flatten().tolist()
spots_test = scaler.transform(spots_test).flatten().tolist()

ここで、flaten()で1次元に変えている意味がわからない？？？ -> 次のステップで、過去10過分のデータをlag特徴量として設定する処理をするため。

In [None]:
spots_train[:10]

[3.370924724576519,
 1.9389109571846703,
 2.3824550444299333,
 3.3582520363695116,
 2.293746226980881,
 2.509181926500008,
 3.244197842506444,
 3.2061797778854215,
 1.9135655807706553,
 2.5345273029140234]

The following code prepares the sequence data. This must be done in tasks like time series prediction or sequential data processing. The primary goal is to transform a list of observations into overlapping sequences of a specified length.
<br><br>
The constant <b>SEQUENCE_SIZE</b> is set to 10, meaning that each sequence (or window) will consist of 10 observations.
<br><br>
The function <b>to_sequnces</b> is defined to facilitate this transformation. This function takes in two arguments: the size of each sequence (seq_size) and the list of observations (obs). Within the function, two empty lists, <b>x</b> and <b>y</b>, are initialized. Iterating over the <b>obs</b> list, for every index <b>i</b>, a window of size <b>seq_size</b> is extracted from <b>obs</b> and appended to the <b>x</b> list. The observation immediately following this window, or the (i + seq_size)-th observation, is appended to the <b>y</b> list. Essentially, <b>x</b> contains the sequences, and <b>y</b> contains the observations immediately following each sequnce that are to be predicted. Once the lists are filled, they're converted into PyTorch tensors with the appropriate shapes and data type (torch.float32).
<br><br>
Using this <b>to_sequences</b> function, the previously prepared <b>spots_train</b> and <b>spots_test</b> lists are transformed into their correspondinig sequence datasets: <b>x_train</b>, <b>y_train</b>, <b>x_test</b>, and <b>y_test</b>. In this setup, if we consider <b>x_train</b> and <b>x_test</b>, each entry will represent a sequence of 10 observations, and the corresponding entry in <b>y_train</b> or <b>y_test</b> will represent the observation immediately following that sequence. This structure is particularly useful for tasks like predicting the next value in a time series based on a sequence of previous values.
<br><br>
要は、過去10日分のデータをxとして、その翌日のデータをyとして用意している

In [None]:
# Sequence Data Preparation
SEQUENCE_SIZE = 10

def to_sequences(seq_size, obs):
  x = []
  y = []
  for i in range(len(obs) - seq_size):
    window = obs[i:(i+seq_size)]
    y_after_window = obs[i+seq_size]
    x.append(window)
    y.append(y_after_window)
  return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, 1), torch.tensor(y, dtype=torch.float32).view(-1, 1)

x_train, y_train = to_sequences(SEQUENCE_SIZE, spots_train)
x_test, y_test = to_sequences(SEQUENCE_SIZE, spots_test)

In [None]:
x_train

tensor([[[ 3.3709],
         [ 1.9389],
         [ 2.3825],
         ...,
         [ 3.2062],
         [ 1.9136],
         [ 2.5345]],

        [[ 1.9389],
         [ 2.3825],
         [ 3.3583],
         ...,
         [ 1.9136],
         [ 2.5345],
         [ 2.6232]],

        [[ 2.3825],
         [ 3.3583],
         [ 2.2937],
         ...,
         [ 2.5345],
         [ 2.6232],
         [ 3.2315]],

        ...,

        [[ 0.6463],
         [ 0.6843],
         [ 0.5196],
         ...,
         [ 0.3675],
         [ 0.1014],
         [-0.0254]],

        [[ 0.6843],
         [ 0.5196],
         [ 0.5449],
         ...,
         [ 0.1014],
         [-0.0254],
         [ 0.2028]],

        [[ 0.5196],
         [ 0.5449],
         [ 0.4562],
         ...,
         [-0.0254],
         [ 0.2028],
         [-0.2661]]])

In [None]:
x_train.shape

torch.Size([55150, 10, 1])

In [None]:
y_train

tensor([[ 2.6232],
        [ 3.2315],
        [ 2.5345],
        ...,
        [ 0.2028],
        [-0.2661],
        [-0.1014]])

In [None]:
y_train.shape

torch.Size([55150, 1])

## Dataset & DataLoader

Next we set up data loaders for PyTroch, a crucial step when training neural networks in batches. First, the training dta (<b>x_train</b> and <b>y_train</b>) is encapsulated into a <b>TesorDataset</b>. This structure pair input data and its corresponding target, making it easier to manage. Similarly, the testing data (<b>x_test</b> and <b>y_test</b>) is also wrapped into a <b>TensorDataset</b>. Once the datasets are structured, they are passed to the <b>DataLoader</b> function.
<br><br>
<u>For the training data, a <b>DataLoader</b> is created with a batch size of 32, and shuffle parameter is set to `True`, which means the during each epoch of training, the training data will be divided into batches of 32 samples, and these batches will randomly shuffled. This shuffling is to ensure the model ins't exposed to any inherent order in the data during training, promoting better generalization.</u>
<br><br>
Conversely, for the testing data, while the batch size remains 32, the shuffle parameter is set to `False`, indicating that the order of the test data remains unchanged. This is typical as shuffling the test data isn't necessary and can often make evaluation metrics easier to interpret. By the end of this code, two data loaders (train_loader and test_loader) are established, ready to feed data in batches to a neural network during both training and evaluation phases.

In [None]:
# Set data loaders for batch
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

## Modeling

We define an <b>LSTMModel</b> class, which is a neural network architecture using PyTorch that provides more precise control than a sequence. This model is stuructured with an LSTM layer followed by a dropout layer for regularization and two subsequent fully connected layers. The LSTM layer processes input sequences, producing a series of hidden states. The model then utilizes only the last hidden state of this series, passing it through the dropout layer and the two linear layers in sequence. This architecture is geared towards processing sequential data, where the LSTM can capture temporal dependencies and the fully connected layers further refine the representation for the final output.


In [None]:
# Model definition
class LSTMModel(nn.Module):
  def __init__(self):
    super(LSTMModel, self).__init__()
    self.lstm = nn.LSTM(input_size=1, hidden_size=64, batch_first=True)
    self.dropout = nn.Dropout(0.2)
    self.fc1 = nn.Linear(64, 32)
    self.fc2 = nn.Linear(32, 1)

  def forward(self, x):
    x, _ = self.lstm(x)
    x = self.dropout(x[:, -1, :])
    x = self.fc1(x)
    x = self.fc2(x)
    return x

model = LSTMModel().to(device)

The code outlines the training process for a neural network model. It sets up a mean squared error(MSE) loss function and uses the Adam optimizer with a learning rate of 0.001. The learning rate scheduler, <b>ReduceLROnPlateau</b>, adjusts the learning rate when the validation loss plateaus, decreasing it by a factor of 0.5 if there's no improvement for three epochs. The model trains for a maximum of 1000 epochs but incorporates early stopping; if the validation loss doesn't improve for five consecutive epochs, the training halts prematurely. During each epoch, the model's weights are updated using the training data. Subsequently, the model's performance is evaluated on the validation data, and the average validation loss is computed. The progress of the training, including the current epoch and corresponding validation loss, is preinted to the console.

In [None]:
# Train the model
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

epochs = 100
early_stop_count = 0
min_val_loss = float('inf')

for epoch in range(epochs):
  model.train()
  for batch in train_loader:
    x_batch, y_batch = batch
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    optimizer.zero_grad()
    outputs = model(x_batch)
    loss = criterion(outputs, y_batch)
    loss.backward()
    optimizer.step()

  # Validation
  model.eval()
  val_losses = []
  with torch.no_grad():
    for batch in test_loader:
      x_batch, y_batch = batch
      x_batch = x_batch.to(device)
      y_batch = y_batch.to(device)

      outputs = model(x_batch)
      loss = criterion(outputs, y_batch)
      val_losses.append(loss.item())

  val_loss = np.mean(val_losses)
  scheduler.step(val_loss)

  if val_loss < min_val_loss:
    min_val_loss = val_loss
    early_stop_count = 0
  else:
    early_stop_count += 1

  if early_stop_count >= 5:
    print(f"Early stopping at epoch {epoch}")
    break

  print(f"Epoch {epoch+1}/{epochs}, Validation Loss: {val_loss:.4f}")



Epoch 1/100, Validation Loss: 0.0345
Epoch 2/100, Validation Loss: 0.0339
Epoch 3/100, Validation Loss: 0.0414
Epoch 4/100, Validation Loss: 0.0373
Epoch 5/100, Validation Loss: 0.0358
Epoch 6/100, Validation Loss: 0.0334
Epoch 7/100, Validation Loss: 0.0367
Epoch 8/100, Validation Loss: 0.0347
Epoch 9/100, Validation Loss: 0.0338
Epoch 10/100, Validation Loss: 0.0340
Early stopping at epoch 10


Finally, we evaluate the PyTorch neural network model on test data. It initiates by setting the model to evaluation mode using <b>model.eval</b>, which ensures that specific layers like dropout are fixed during inference. the predictions list is initialized to store the model's predictions on the test data. The <b>torch.no_grad</b> context is used to disable gradient calculations, optimizing memory usage and speed during evaluation. Inside this context, the code iterates over the <b>test_loader</b> to fetch batches of test data.
<br><br>
Each data batch is then transferred to the computing device, and the model is subsequently used to generate predictions on this batch. These predictions are added to the predictions list. After prodessing all test batches, the code calculates the Root Mean Square Error (RMSE) between the predicted values and the actual targets (<b>y_test</b>). Notably, the predictions and targets are inverse-transformed using scaler to revert the normalization and compute the RMSE in the original data scale. The computed RMSE, a measure of prediction error, is then printed to the console.

In [None]:
# Evaluation
model.eval()
predictions = []
with torch.no_grad():
  for batch in test_loader:
    x_batch, y_batch = batch
    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    outputs = model(x_batch)
    predictions.extend(outputs.squeeze().tolist())

# Calculate RMSE
rmse = np.sqrt(np.mean((scaler.inverse_transform(np.array(predictions).reshape(-1, 1)) \
                        - scaler.inverse_transform(y_test.numpy().reshape(-1, 1)))**2))
print(f"Score (RMSE): {rmse:.4f}")

Score (RMSE): 14.5082


In [None]:
np.array(predictions).reshape(-1, 1)

array([[ 0.1050934 ],
       [ 0.52086931],
       [ 1.34994817],
       ...,
       [-0.83832437],
       [-0.86051649],
       [-0.9202171 ]])

In [None]:
y_test.numpy().reshape(-1, 1)

array([[ 0.5829333 ],
       [ 1.406658  ],
       [ 1.7614933 ],
       ...,
       [-0.8870985 ],
       [-0.950462  ],
       [-0.96313465]], dtype=float32)

## 1期のみ予測

In [None]:
batch = next(iter(test_loader))
x_batch, y_batch = batch

In [None]:
x_batch.shape

torch.Size([32, 10, 1])

In [None]:
x_batch[0,:]
# 1バッチにつき32個のデータセットが渡される。
# 以下は、最初のバッチの最初のxのデータセット(ラグ1~ラグ10までの10個がシーケンスとしてモデルに渡される)
# この１０個をシーケンスとして渡した最終出力値が、このxにおける予測値

tensor([[-0.2028],
        [-0.1521],
        [-0.0887],
        [ 0.1014],
        [ 0.2661],
        [ 0.4942],
        [ 0.4942],
        [ 0.3041],
        [ 0.3168],
        [ 0.1140]])

In [None]:
y_batch.shape

torch.Size([32, 1])

In [None]:
# 予測テスト
test_x = x_batch[0,:]
print(test_x.shape)
tst_x = test_x.unsqueeze(0) # batch_sizeの次元も入れるので、次元を一つ増やす
print(tst_x.shape)
tst_x = tst_x.to(device)
with torch.no_grad():
  tst_y = model(tst_x)
print(tst_y)
tst_y = tst_y.squeeze().item()
print(tst_y)

torch.Size([10, 1])
torch.Size([1, 10, 1])
tensor([[0.1051]])
0.10509340465068817
