### LSTM model pipeline for time series forecasting using TensorFlow/Keras

> pip install tensorflow numpy pandas matplotlib scikit-learn

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [None]:
# Helper function for evaluation
def evaluate_model(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"MAE: {mae:.3f}, RMSE: {rmse:.3f}")
    return mae, rmse

In [None]:
# Function to prepare data for LSTM
def prepare_data(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        X.append(data[i:i + n_steps])
        y.append(data[i + n_steps])
    return np.array(X), np.array(y)

In [None]:
# Main LSTM pipeline
def lstm_forecast(data, n_steps=30, n_epochs=50, batch_size=16):
    # Scale the data
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(data.values.reshape(-1, 1))

    # Prepare training data
    X, y = prepare_data(data_scaled, n_steps)
    X_train, y_train = X[:-15], y[:-15]
    X_test, y_test = X[-15:], y[-15:]

    # Reshape data for LSTM (samples, timesteps, features)
    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps, 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(50, activation='relu', return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mse')

    # Fit the model
    model.fit(X_train, y_train, epochs=n_epochs, batch_size=batch_size, validation_split=0.1, verbose=1)

    # Make predictions
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)  # Inverse scale
    y_test = scaler.inverse_transform(y_test.reshape(-1, 1))  # Inverse scale

    # Evaluate the model
    evaluate_model(y_test, predictions)

    # Plot results
    plt.figure(figsize=(10, 6))
    plt.plot(y_test, label="Actual", color="blue")
    plt.plot(predictions, label="Predicted", color="orange")
    plt.title("LSTM Forecast vs Actual")
    plt.legend()
    plt.show()

    # Forecast future steps
    future_input = data_scaled[-n_steps:]
    future_input = future_input.reshape((1, n_steps, 1))
    future_forecast = model.predict(future_input)
    future_forecast = scaler.inverse_transform(future_forecast)  # Inverse scale

    print(f"Future forecast (next step): {future_forecast.flatten()[0]:.3f}")
    return predictions

In [None]:
# Main function
def main():
    # Load and preprocess the dataset
    data = pd.read_csv("cpu_utilization_sample_data.tsv", sep="\t")
    data["date"] = pd.to_datetime(data["date"])
    data = data.set_index("date")
    data = data[data["namespace"] == "test-1"]["cpu_used"]  # Filter namespace

    # Apply the LSTM pipeline
    lstm_forecast(data)

In [None]:
if __name__ == "__main__":
    main()

### Explanation of the Code
1. Data Preparation:

    * Scaling: The data is normalized using MinMaxScaler to fit the input range for LSTM.
    * Sequence Creation: The prepare_data function creates sequences of size n_steps for LSTM training.

2. Model Architecture:

    * The LSTM model has:
        * Two LSTM layers (one with return_sequences=True to stack them).
        * Two Dropout layers to prevent overfitting.
        * A Dense layer to output the next time step value.

3. Training:

    * The model is trained with a specified number of epochs and batch size.
    * The training process includes validation to monitor performance.

4. Evaluation:

    * Predictions are evaluated using MAE and RMSE.
    * A plot compares actual and predicted values.

5. Forecasting:

    * The model predicts the next time step based on the last n_steps of data.

### Key Notes
* Parameter Tuning: You can adjust n_steps, n_epochs, and batch_size to optimize model performance for your dataset.

* Future Forecasting: Extend future_forecast for multiple time steps by feeding predictions back into the model.

* Scaling: Proper scaling and inverse scaling are crucial for accurate results.