In [1]:
!pip install bayesian-optimization



Collecting bayesian-optimization
  Downloading bayesian_optimization-1.4.3-py3-none-any.whl (18 kB)
Collecting colorama>=0.4.6 (from bayesian-optimization)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-1.4.3 colorama-0.4.6


In [2]:
import requests
import pandas as pd
def get_fred_series_observations(series_id, api_key):
    # Endpoint for series observations
    base_url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json"
    }
    response = requests.get(base_url, params=params)
    return response.json()

api_key = 'ef8c3af7f7bebd62ffff5b460d66375a'
series_id = 'CPIAUCSL'

# Fetch the data points for the series
data = get_fred_series_observations(series_id, api_key)

# Check if observations are in the response and create a DataFrame
if 'observations' in data:
    df = pd.DataFrame(data['observations'])
    df = df[['date', 'value']]  # Select only the 'date' and 'value' columns
# Convert 'value' column to float64
df['value'] = df['value'].astype('float64')
df['value'] = (df['value'].pct_change(periods=12) * 100).round(2)  # Calculate percent change
df = df.dropna(subset=['value'])
# Set the 'year' column as the index
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
1948-01-01,10.24
1948-02-01,9.48
1948-03-01,6.82
1948-04-01,8.27
1948-05-01,9.38
...,...
2023-07-01,3.30
2023-08-01,3.71
2023-09-01,3.69
2023-10-01,3.23


In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense
# Normalize the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

In [4]:
import numpy as np
# Split data into train and test sets
train_size = int(len(data_scaled) * 0.8)
train_data, test_data = data_scaled[:train_size], data_scaled[train_size:]

# Create sequences for LSTM training
def create_sequences(df, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[i:i+seq_length])
        y.append(df[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 1  # Length of sequences for LSTM
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)

In [5]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
from bayes_opt import BayesianOptimization
import numpy as np

# Define your data: X_train, y_train, X_test, y_test

def create_model(lstm_units, optimizer):
    model = Sequential()
    model.add(LSTM(units=int(lstm_units), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(units=1))
    model.compile(optimizer=optimizer, loss='mse')
    return model

def fit_evaluate_model(lstm_units, epochs, batch_size, optimizer_index):
    optimizer_list = ['adam', 'rmsprop', 'sgd']
    optimizer = optimizer_list[int(optimizer_index)]

    model = create_model(lstm_units, optimizer)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)

    # Use a fraction of the data for quicker evaluation
    model.fit(X_train, y_train, epochs=int(epochs), batch_size=int(batch_size), callbacks=[early_stopping], validation_split=0.2)

    # Evaluate the model
    mse = model.evaluate(X_test, y_test)
    return -mse  # Negative MSE because Bayesian optimization maximizes the function

# Define bounds for hyperparameters
pbounds = {
    'lstm_units': (20, 100),  # Example range for LSTM units
    'epochs': (10, 100),      # Example range for epochs
    'batch_size': (10, 64),   # Example range for batch size
    'optimizer_index': (0, 2) # Index to choose optimizer
}

optimizer = BayesianOptimization(
    f=fit_evaluate_model,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(init_points=2, n_iter=10)

print("Best hyperparameters found: ", optimizer.max['params'])


|   iter    |  target   | batch_... |  epochs   | lstm_u... | optimi... |
-------------------------------------------------------------------------
Epoch 1/74
Epoch 2/74
Epoch 3/74
Epoch 4/74
Epoch 5/74
Epoch 6/74
Epoch 7/74
Epoch 8/74
Epoch 9/74
Epoch 10/74
Epoch 11/74
Epoch 12/74
Epoch 13/74
Epoch 14/74
Epoch 15/74
Epoch 16/74
Epoch 17/74
| [0m1        [0m | [0m-0.01131 [0m | [0m32.52    [0m | [0m74.83    [0m | [0m20.01    [0m | [0m0.6047   [0m |
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
| [95m2        [0m | [95m-0.000970[0m | [95m17.92    [0m | [95m18.31    [0m | [95m34.9     [0m | [95m0.6911   [0m |
Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch

In [None]:
def fit_evaluate_model(lstm_units, epochs, batch_size, optimizer_index):
    print("Starting model training with parameters:", lstm_units, epochs, batch_size, optimizer_index)

    # Define and compile your LSTM model
    model = Sequential()
    model.add(LSTM(units=lstm_units, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(units=1))

    # Select optimizer
    optimizer_list = ['adam', 'rmsprop', 'sgd']
    optimizer = optimizer_list[optimizer_index]
    model.compile(optimizer=optimizer, loss='mse')

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=[early_stopping], validation_split=0.2)

    # Evaluate the model to get the mean squared error (mse)
    mse = model.evaluate(X_test, y_test)
    print(f"Current MSE: {mse}")  # Print the MSE for this model configuration

    return -mse  # Return negative MSE for the optimizer

fit_evaluate_model(64, 36, 10, 0)

In [11]:
# Build and train the LSTM model
# Best hyperparameters found:  {'batch_size': 10.0, 'epochs': 36.26537885024413, 'lstm_units': 64.58503730855179}
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=36, batch_size=10)

# Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform(y_train)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test)

Epoch 1/36
Epoch 2/36
Epoch 3/36
Epoch 4/36
Epoch 5/36
Epoch 6/36
Epoch 7/36
Epoch 8/36
Epoch 9/36
Epoch 10/36
Epoch 11/36
Epoch 12/36
Epoch 13/36
Epoch 14/36
Epoch 15/36
Epoch 16/36
Epoch 17/36
Epoch 18/36
Epoch 19/36
Epoch 20/36
Epoch 21/36
Epoch 22/36
Epoch 23/36
Epoch 24/36
Epoch 25/36
Epoch 26/36
Epoch 27/36
Epoch 28/36
Epoch 29/36
Epoch 30/36
Epoch 31/36
Epoch 32/36
Epoch 33/36
Epoch 34/36
Epoch 35/36
Epoch 36/36


In [13]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': np.concatenate((df.index[seq_length:seq_length+len(train_predict)],
                            df.index[seq_length+len(train_predict):seq_length+len(train_predict)+len(test_predict)])),
    'Predicted CPI': np.concatenate((train_predict.flatten(), test_predict.flatten())),
    'Actual CPI': np.concatenate((y_train.flatten(), y_test.flatten()))
})

# Create an interactive line plot using Plotly Express
fig = px.line(plot_data, x='Date', y=['Predicted CPI', 'Actual CPI'], title='CPI Forecasting with LSTM')
fig.update_layout(xaxis_title='Date', yaxis_title='CPI', legend_title='Data')
fig.show()

In [14]:
# For the training data
train_years = df.index[seq_length:len(y_train) + seq_length]
# For the testing data
test_years = df.index[len(y_train) + seq_length:len(y_train) + seq_length + len(y_test)]

# Combine years
combined_years = np.concatenate((train_years, test_years))

# Creating a DataFrame for the actual and predicted values
results = pd.DataFrame({
    'Year': combined_years,
    'Actual': np.concatenate((y_train.flatten(), y_test.flatten())),
    'Predicted': np.concatenate((train_predict.flatten(), test_predict.flatten()))
})

# Display the DataFrame
print(results.tail(13))  # Shows the first few rows

           Year  Actual  Predicted
896  2022-10-01    7.14   7.689881
897  2022-11-01    6.44   7.061095
898  2022-12-01    6.35   6.357295
899  2023-01-01    5.99   6.267285
900  2023-02-01    4.99   5.908371
901  2023-03-01    4.96   4.920951
902  2023-04-01    4.13   4.891550
903  2023-05-01    3.09   4.083366
904  2023-06-01    3.30   3.085349
905  2023-07-01    3.71   3.285532
906  2023-08-01    3.69   3.678330
907  2023-09-01    3.23   3.659109
908  2023-10-01    3.12   3.218727


In [21]:
# Assuming you have already trained your model and have the scaler
# Forecasted value for the new month

# Prepare new data (here, we are using the last available data point)
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the new data
new_data_scaled = scaler.transform(new_data)

# Reshape for LSTM input
new_data_scaled = new_data_scaled.reshape(1, seq_length, 1)

# Forecast
new_month_prediction_scaled = model.predict(new_data_scaled)

# Inverse transform the prediction
new_month_prediction = scaler.inverse_transform(new_month_prediction_scaled)

print(f"Forecasted value for the new month: {new_month_prediction[0][0]}")





Forecasted value for the new month: 3.1139039993286133



X does not have valid feature names, but MinMaxScaler was fitted with feature names



In [22]:
# Assuming your model and scaler are already defined and trained
# Forecasted value for the new 3 months:

# Number of months to forecast
months_to_forecast = 3

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the initial sequence
new_data_scaled = scaler.transform(new_data)

# Iteratively forecast the next months
for _ in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    print(f"Forecasted value for the next month: {next_month_prediction[0][0]}")


Forecasted value for the next month: 3.1139039993286133



X does not have valid feature names, but MinMaxScaler was fitted with feature names



Forecasted value for the next month: 3.108100414276123
Forecasted value for the next month: 3.1025750637054443


In [23]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import plotly.express as px

# Assuming 'model' and 'scaler' are your trained LSTM model and MinMaxScaler, respectively

# Function to forecast the next month
def forecast_next_month(model, scaler, last_data_scaled, seq_length):
    lstm_input = last_data_scaled.reshape(1, seq_length, 1)
    next_month_prediction_scaled = model.predict(lstm_input)
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))
    return next_month_prediction[0][0]

# Start with the last available sequence for forecasting
last_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)
last_data_scaled = scaler.transform(last_data)

# Forecast the next three months
forecasted_values = []
for _ in range(3):
    next_month_prediction = forecast_next_month(model, scaler, last_data_scaled, seq_length)
    forecasted_values.append(next_month_prediction)
    # Update last_data_scaled for the next iteration
    next_month_scaled = scaler.transform(np.array([[next_month_prediction]]))
    last_data_scaled = np.append(last_data_scaled, next_month_scaled)[-seq_length:]

# Get the last date from your DataFrame and generate new dates for the forecasted months
last_date = pd.to_datetime(df.index[-1])
new_dates = [last_date + pd.DateOffset(months=i) for i in range(1, 4)]

# Append the forecasted values and new dates to the plot_data DataFrame
for date, value in zip(new_dates, forecasted_values):
    plot_data = plot_data.append({'Date': date, 'Predicted CPI': value, 'Actual CPI': np.nan}, ignore_index=True)

# Plot the extended data
fig = px.line(plot_data, x='Date', y=['Predicted CPI', 'Actual CPI'], title='Extended CPI Forecast with LSTM')
fig.update_layout(xaxis_title='Date', yaxis_title='CPI', legend_title='Data')
fig.show()


X does not have valid feature names, but MinMaxScaler was fitted with feature names






X does not have valid feature names, but MinMaxScaler was fitted with feature names


X does not have valid feature names, but MinMaxScaler was fitted with feature names






X does not have valid feature names, but MinMaxScaler was fitted with feature names


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

