In [1]:
pip install bayesian-optimization

Collecting bayesian-optimization
  Downloading bayesian_optimization-1.4.3-py3-none-any.whl (18 kB)
Collecting colorama>=0.4.6 (from bayesian-optimization)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-1.4.3 colorama-0.4.6
Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
import pandas as pd
def get_fred_series_observations(series_id, api_key):
    # Endpoint for series observations
    base_url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json"
    }
    response = requests.get(base_url, params=params)
    return response.json()

api_key = 'ef8c3af7f7bebd62ffff5b460d66375a'
series_id = 'CPIAUCSL'

# Fetch the data points for the series
data = get_fred_series_observations(series_id, api_key)

# Check if observations are in the response and create a DataFrame
if 'observations' in data:
    df = pd.DataFrame(data['observations'])
    df = df[['date', 'value']]  # Select only the 'date' and 'value' columns
# Convert 'value' column to float64
df['value'] = df['value'].astype('float64')
df['value'] = (df['value'].pct_change(periods=12) * 100).round(2)  # Calculate percent change
df = df.dropna(subset=['value'])
# Set the 'year' column as the index
df.set_index('date', inplace=True)

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense
# Normalize the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)
     

2024-01-10 19:20:47.252689: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
import numpy as np
# Split data into train and test sets
train_size = int(len(data_scaled) * 0.8)
train_data, test_data = data_scaled[:train_size], data_scaled[train_size:]

# Create sequences for LSTM training
def create_sequences(df, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[i:i+seq_length])
        y.append(df[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 1  # Length of sequences for LSTM
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)

In [5]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
from bayes_opt import BayesianOptimization
import numpy as np

# Define your data: X_train, y_train, X_test, y_test

def create_model(lstm_units, optimizer):
    model = Sequential()
    model.add(LSTM(units=int(lstm_units), input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(units=1))
    model.compile(optimizer=optimizer, loss='mse')
    return model

def fit_evaluate_model(lstm_units, epochs, batch_size, optimizer_index):
    optimizer_list = ['adam', 'rmsprop', 'sgd']
    optimizer = optimizer_list[int(optimizer_index)]

    model = create_model(lstm_units, optimizer)
    early_stopping = EarlyStopping(monitor='val_loss', patience=10)

    # Use a fraction of the data for quicker evaluation
    model.fit(X_train, y_train, epochs=int(epochs), batch_size=int(batch_size), callbacks=[early_stopping], validation_split=0.2)

    # Evaluate the model
    mse = model.evaluate(X_test, y_test)
    return -mse  # Negative MSE because Bayesian optimization maximizes the function

# Define bounds for hyperparameters
pbounds = {
    'lstm_units': (20, 100),  # Example range for LSTM units
    'epochs': (10, 100),      # Example range for epochs
    'batch_size': (10, 64),   # Example range for batch size
    'optimizer_index': (0, 2) # Index to choose optimizer
}

optimizer = BayesianOptimization(
    f=fit_evaluate_model,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(init_points=2, n_iter=10)

print("Best hyperparameters found: ", optimizer.max['params'])

|   iter    |  target   | batch_... |  epochs   | lstm_u... | optimi... |
-------------------------------------------------------------------------
Epoch 1/74
Epoch 2/74
Epoch 3/74
Epoch 4/74
Epoch 5/74
Epoch 6/74
Epoch 7/74
Epoch 8/74
Epoch 9/74
Epoch 10/74
Epoch 11/74
Epoch 12/74
Epoch 13/74
Epoch 14/74
Epoch 15/74
| [0m1        [0m | [0m-0.006339[0m | [0m32.52    [0m | [0m74.83    [0m | [0m20.01    [0m | [0m0.6047   [0m |
Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18
| [95m2        [0m | [95m-0.001278[0m | [95m17.92    [0m | [95m18.31    [0m | [95m34.9     [0m | [95m0.6911   [0m |
Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19
| [9

In [7]:
# Build and train the LSTM model
# Best hyperparameters found:  {'batch_size': 10.0, 'epochs': 37.479764513462875, 'lstm_units': 40.76236351251674, 'optimizer_index': 0.0}
model = Sequential()
model.add(LSTM(units=40, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=37, batch_size=10)

# Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform(y_train)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test)

Epoch 1/37
Epoch 2/37
Epoch 3/37
Epoch 4/37
Epoch 5/37
Epoch 6/37
Epoch 7/37
Epoch 8/37
Epoch 9/37
Epoch 10/37
Epoch 11/37
Epoch 12/37
Epoch 13/37
Epoch 14/37
Epoch 15/37
Epoch 16/37
Epoch 17/37
Epoch 18/37
Epoch 19/37
Epoch 20/37
Epoch 21/37
Epoch 22/37
Epoch 23/37
Epoch 24/37
Epoch 25/37
Epoch 26/37
Epoch 27/37
Epoch 28/37
Epoch 29/37
Epoch 30/37
Epoch 31/37
Epoch 32/37
Epoch 33/37
Epoch 34/37
Epoch 35/37
Epoch 36/37
Epoch 37/37


In [8]:
# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': np.concatenate((df.index[seq_length:seq_length+len(train_predict)],
                            df.index[seq_length+len(train_predict):seq_length+len(train_predict)+len(test_predict)])),
    'Predicted CPI': np.concatenate((train_predict.flatten(), test_predict.flatten())),
    'Actual CPI': np.concatenate((y_train.flatten(), y_test.flatten()))
})
# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Shift the dates for the test predictions forward by one month
# This is done for the part of the DataFrame that contains test data
plot_data.loc[len(train_predict):, 'Date'] += pd.DateOffset(months=1)

In [10]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Create traces for Actual and Predicted CPI
trace1 = go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI')
trace2 = go.Scatter(x=plot_data['Date'], y=plot_data['Predicted CPI'], mode='lines', name='Predicted CPI')

# Layout with range selector buttons and range slider
layout = go.Layout(
    title='CPI Forecasting with LSTM',
    xaxis=dict(
        title='Date',
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(count=1, label='YTD', step='year', stepmode='todate'),
                dict(count=1, label='1y', step='year', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis=dict(
        title='CPI'
    )
)

# Create figure and add traces
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Show plot
fig.show()


In [11]:
# Display the last 10 rows of the DataFrame
display_data = plot_data.tail(10)

# Optional: Round the CPI values for better readability
display_data['Predicted CPI'] = display_data['Predicted CPI'].round(2)
display_data['Actual CPI'] = display_data['Actual CPI'].round(2)

# Show the DataFrame
display_data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Date,Predicted CPI,Actual CPI
899,2023-02-01,6.28,5.99
900,2023-03-01,5.92,4.99
901,2023-04-01,4.93,4.96
902,2023-05-01,4.9,4.13
903,2023-06-01,4.09,3.09
904,2023-07-01,3.09,3.3
905,2023-08-01,3.29,3.71
906,2023-09-01,3.68,3.69
907,2023-10-01,3.66,3.23
908,2023-11-01,3.22,3.12


In [12]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go

# Assuming df, model, and scaler are already defined

# Number of months to forecast
months_to_forecast = 5

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the initial sequence
new_data_scaled = scaler.transform(new_data)

# Initialize arrays for storing forecasts and dates
forecast_values = []
forecast_dates = []
last_known_date = pd.to_datetime(plot_data['Date'].iloc[-1])

for i in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    # Store forecasted value
    forecast_values.append(next_month_prediction[0][0])

    # Calculate the forecast date
    forecast_date = last_known_date + pd.DateOffset(months=i + 1)
    forecast_dates.append(forecast_date)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

# Creating a DataFrame for forecasted data
forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Forecasted CPI': forecast_values
})

# Plotting the data
fig = go.Figure()

# Add actual data trace
fig.add_trace(go.Scatter(x=plot_data['Date'], y=df['value'], mode='lines', name='Actual CPI'))

# Add forecasted data trace
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Forecasted CPI'], mode='lines', name='Forecasted CPI'))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()




X does not have valid feature names, but MinMaxScaler was fitted with feature names





In [13]:
# Display the forecasted values along with dates in a table format
print(forecast_df)

        Date  Forecasted CPI
0 2023-12-01        3.117328
1 2024-01-01        3.114771
2 2024-02-01        3.112324
3 2024-03-01        3.109984
4 2024-04-01        3.107744


In [14]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go

# Assuming df, model, and scaler are already defined

# Number of months to forecast
months_to_forecast = 5

# Initialize arrays for storing forecasts, dates, and bounds
forecast_values = []
forecast_dates = []
lower_bounds = []  # Initialize the lower bounds list
upper_bounds = []  # Initialize the upper bounds list

last_known_date = pd.to_datetime(plot_data['Date'].iloc[-1])

# Calculate the residuals on the training data
residuals = y_train.flatten() - model.predict(X_train).flatten()

# Calculate the standard deviation of the residuals
std_residuals = np.std(residuals)

# Assuming a normal distribution, calculate the 95% confidence interval
z_score = 1.28  # Corresponds to 95% confidence

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the initial sequence
new_data_scaled = scaler.transform(new_data)

for i in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    # Store forecasted value
    forecast_value = next_month_prediction[0][0]
    forecast_values.append(forecast_value)

    # Calculate the forecast date
    forecast_date = last_known_date + pd.DateOffset(months=i + 1)
    forecast_dates.append(forecast_date)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

    # Calculate confidence intervals
    lower_bound = forecast_value - z_score * std_residuals
    upper_bound = forecast_value + z_score * std_residuals
    lower_bounds.append(lower_bound)
    upper_bounds.append(upper_bound)

# Creating a DataFrame for forecasted data
forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Forecasted CPI': forecast_values,
    'Lower Bound CI': lower_bounds,
    'Upper Bound CI': upper_bounds
})

# Plotting the data
fig = go.Figure()

# Add actual data trace
fig.add_trace(go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI'))

# Add forecasted data trace
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Forecasted CPI'], mode='lines', name='Forecasted CPI'))

# Add confidence interval traces
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Lower Bound CI'], mode='lines', name='Lower Bound CI', line=dict(color='rgba(173, 216, 230, 0.4)')))
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Upper Bound CI'], mode='lines', name='Upper Bound CI', line=dict(color='rgba(173, 216, 230, 0.4)'), fill='tonexty'))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()





X does not have valid feature names, but MinMaxScaler was fitted with feature names





In [15]:
# Check the end of the actual data
print(plot_data.tail())

# Check the beginning of the forecasted data
print(forecast_df.head())

          Date  Predicted CPI  Actual CPI
904 2023-07-01       3.088629        3.30
905 2023-08-01       3.289778        3.71
906 2023-09-01       3.684228        3.69
907 2023-10-01       3.664934        3.23
908 2023-11-01       3.222660        3.12
        Date  Forecasted CPI  Lower Bound CI  Upper Bound CI
0 2023-12-01        3.117328       -0.502122        6.736778
1 2024-01-01        3.114771       -0.504679        6.734221
2 2024-02-01        3.112324       -0.507126        6.731774
3 2024-03-01        3.109984       -0.509466        6.729434
4 2024-04-01        3.107744       -0.511706        6.727194


In [16]:
import plotly.graph_objs as go

# Assuming plot_data and forecast_df are already defined and contain the necessary data

# Combine actual and forecasted data into a single DataFrame for plotting
combined_df = pd.concat([plot_data[['Date', 'Predicted CPI']], forecast_df])

# Plotting the data
fig = go.Figure()

# Trace for the actual and previously predicted CPI
fig.add_trace(go.Scatter(
    x=combined_df['Date'],
    y=combined_df['Predicted CPI'],
    mode='lines+markers',  # Change to 'lines+markers' if you want to highlight individual points
    name='Actual & Predicted CPI'
))

# Traces for the forecasted CPI with confidence intervals
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Forecasted CPI'],
    mode='lines+markers',
    name='Forecasted CPI'
))
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Lower Bound CI'],
    mode='lines',
    line=dict(width=0),
    showlegend=False
))
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Upper Bound CI'],
    mode='lines',
    fill='tonexty',
    fillcolor='rgba(173, 216, 230, 0.5)',  # Adjust opacity for visibility
    line=dict(width=0),
    showlegend=False
))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()