In [2]:
import requests
import pandas as pd
def get_fred_series_observations(series_id, api_key):
    # Endpoint for series observations
    base_url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json"
    }
    response = requests.get(base_url, params=params)
    return response.json()

api_key = 'ef8c3af7f7bebd62ffff5b460d66375a'
series_id = 'CPIAUCSL'

# Fetch the data points for the series
data = get_fred_series_observations(series_id, api_key)

# Check if observations are in the response and create a DataFrame
if 'observations' in data:
    df = pd.DataFrame(data['observations'])
    df = df[['date', 'value']]  # Select only the 'date' and 'value' columns
# Convert 'value' column to float64
df['value'] = df['value'].astype('float64')
df['value'] = (df['value'].pct_change(periods=12) * 100).round(2)  # Calculate percent change
df = df.dropna(subset=['value'])
# Set the 'year' column as the index
df.set_index('date', inplace=True)

In [22]:
df.tail()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2023-12-01,3.32
2024-01-01,3.11
2024-02-01,3.17
2024-03-01,3.48
2024-04-01,3.36


In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense
# Normalize the data
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

In [4]:
import numpy as np
# Split data into train and test sets
train_size = int(len(data_scaled) * 0.8)
train_data, test_data = data_scaled[:train_size], data_scaled[train_size:]

# Create sequences for LSTM training
def create_sequences(df, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[i:i+seq_length])
        y.append(df[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 1  # Length of sequences for LSTM
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)

In [5]:
import tensorflow as tf
import keras

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)

TensorFlow version: 2.15.0
Keras version: 2.15.0


In [6]:
from sklearn.model_selection import ParameterGrid, KFold
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping
import numpy as np

# Define your data: X_train, y_train, X_test, y_test

# Define a parameter grid
param_grid = {
    'lstm_units': [20, 50, 100],
    'epochs': [50, 100, 150],
    'batch_size': [16, 32, 64]
}

# Initialize K-Fold Cross-Validation
k = 5  # Number of folds
kf = KFold(n_splits=k)

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

best_mse = float('inf')
best_params = {}

# Iterate over all combinations of parameters
for params in ParameterGrid(param_grid):
    fold_mse = []
    for train_index, val_index in kf.split(X_train):
        # Split data into training and validation sets for this fold
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Build the LSTM model
        model = Sequential()
        model.add(LSTM(units=params['lstm_units'], input_shape=(X_train_fold.shape[1], X_train_fold.shape[2])))
        model.add(Dense(units=1))
        model.compile(optimizer='adam', loss='mse')

        # Train the model with early stopping
        model.fit(X_train_fold, y_train_fold, epochs=params['epochs'], batch_size=params['batch_size'], callbacks=[early_stopping], validation_data=(X_val_fold, y_val_fold))

        # Evaluate the model on the validation set
        mse = model.evaluate(X_val_fold, y_val_fold)
        fold_mse.append(mse)

    # Calculate the average MSE over all folds
    avg_mse = np.mean(fold_mse)
    if avg_mse < best_mse:
        best_mse = avg_mse
        best_params = params

# Print the best Mean Squared Error and corresponding parameters
print(f"Best MSE: {best_mse}")
print(f"Best Params: {best_params}")

# Finally, you might want to retrain your model with the best parameters on the entire training set and then evaluate it on your test set.

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100


In [8]:
# Build and train the LSTM model
# Best MSE: 0.522433403134346
# Best Params: {'batch_size': 16, 'epochs': 100, 'lstm_units': 100}

model = Sequential()
model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=100, batch_size=16)

# Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform(y_train)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [9]:
# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': np.concatenate((df.index[seq_length:seq_length+len(train_predict)],
                            df.index[seq_length+len(train_predict):seq_length+len(train_predict)+len(test_predict)])),
    'Predicted CPI': np.concatenate((train_predict.flatten(), test_predict.flatten())),
    'Actual CPI': np.concatenate((y_train.flatten(), y_test.flatten()))
})
# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Shift the dates for the test predictions forward by one month
# This is done for the part of the DataFrame that contains test data
plot_data.loc[len(train_predict):, 'Date'] += pd.DateOffset(months=1)

In [10]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Create traces for Actual and Predicted CPI
trace1 = go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI')
trace2 = go.Scatter(x=plot_data['Date'], y=plot_data['Predicted CPI'], mode='lines', name='Predicted CPI')

# Layout with range selector buttons and range slider
layout = go.Layout(
    title='CPI Forecasting with LSTM',
    xaxis=dict(
        title='Date',
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(count=1, label='YTD', step='year', stepmode='todate'),
                dict(count=1, label='1y', step='year', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis=dict(
        title='CPI'
    )
)

# Create figure and add traces
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Show plot
fig.show()

In [19]:
# Display the last 10 rows of the DataFrame
display_data = plot_data.tail(10)

# Optional: Round the CPI values for better readability
display_data['Predicted CPI'] = display_data['Predicted CPI'].round(2)
display_data['Actual CPI'] = display_data['Actual CPI'].round(2)

# Show the DataFrame
display_data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Date,Predicted CPI,Actual CPI
904,2023-07-01,3.04,3.27
905,2023-08-01,3.26,3.72
906,2023-09-01,3.7,3.69
907,2023-10-01,3.67,3.25
908,2023-11-01,3.24,3.14
909,2023-12-01,3.13,3.32
910,2024-01-01,3.31,3.11
911,2024-02-01,3.1,3.17
912,2024-03-01,3.16,3.48
913,2024-04-01,3.46,3.36


In [13]:
# Display the forecasted values along with dates in a table format
print(forecast_df)

        Date  Forecasted CPI
0 2024-05-01        3.345053
1 2024-06-01        3.330450
2 2024-07-01        3.316188
3 2024-08-01        3.302257
4 2024-09-01        3.288653


In [14]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go

# Assuming df, model, and scaler are already defined

# Number of months to forecast
months_to_forecast = 5

# Initialize arrays for storing forecasts, dates, and bounds
forecast_values = []
forecast_dates = []
lower_bounds = []  # Initialize the lower bounds list
upper_bounds = []  # Initialize the upper bounds list

last_known_date = pd.to_datetime(plot_data['Date'].iloc[-1])

# Calculate the residuals on the training data
residuals = y_train.flatten() - model.predict(X_train).flatten()

# Calculate the standard deviation of the residuals
std_residuals = np.std(residuals)

# Assuming a normal distribution, calculate the 95% confidence interval
z_score = 1.28  # Corresponds to 95% confidence

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the initial sequence
new_data_scaled = scaler.transform(new_data)

for i in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    # Store forecasted value
    forecast_value = next_month_prediction[0][0]
    forecast_values.append(forecast_value)

    # Calculate the forecast date
    forecast_date = last_known_date + pd.DateOffset(months=i + 1)
    forecast_dates.append(forecast_date)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

    # Calculate confidence intervals
    lower_bound = forecast_value - z_score * std_residuals
    upper_bound = forecast_value + z_score * std_residuals
    lower_bounds.append(lower_bound)
    upper_bounds.append(upper_bound)

# Creating a DataFrame for forecasted data
forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Forecasted CPI': forecast_values,
    'Lower Bound CI': lower_bounds,
    'Upper Bound CI': upper_bounds
})

# Plotting the data
fig = go.Figure()

# Add actual data trace
fig.add_trace(go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI'))

# Add forecasted data trace
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Forecasted CPI'], mode='lines', name='Forecasted CPI'))

# Add confidence interval traces
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Lower Bound CI'], mode='lines', name='Lower Bound CI', line=dict(color='rgba(173, 216, 230, 0.4)')))
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Upper Bound CI'], mode='lines', name='Upper Bound CI', line=dict(color='rgba(173, 216, 230, 0.4)'), fill='tonexty'))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()




X does not have valid feature names, but MinMaxScaler was fitted with feature names





In [15]:
# Check the end of the actual data
print(plot_data.tail())

# Check the beginning of the forecasted data
print(forecast_df.head())

          Date  Predicted CPI  Actual CPI
909 2023-12-01       3.130285        3.32
910 2024-01-01       3.305982        3.11
911 2024-02-01       3.101021        3.17
912 2024-03-01       3.159553        3.48
913 2024-04-01       3.462328        3.36
        Date  Forecasted CPI  Lower Bound CI  Upper Bound CI
0 2024-05-01        3.345053       -0.270975        6.961081
1 2024-06-01        3.330450       -0.285578        6.946478
2 2024-07-01        3.316188       -0.299841        6.932216
3 2024-08-01        3.302257       -0.313772        6.918285
4 2024-09-01        3.288653       -0.327375        6.904682


In [16]:
import plotly.graph_objs as go

# Assuming plot_data and forecast_df are already defined and contain the necessary data

# Combine actual and forecasted data into a single DataFrame for plotting
combined_df = pd.concat([plot_data[['Date', 'Predicted CPI']], forecast_df])

# Plotting the data
fig = go.Figure()

# Trace for the actual and previously predicted CPI
fig.add_trace(go.Scatter(
    x=combined_df['Date'],
    y=combined_df['Predicted CPI'],
    mode='lines+markers',  # Change to 'lines+markers' if you want to highlight individual points
    name='Actual & Predicted CPI'
))

# Traces for the forecasted CPI with confidence intervals
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Forecasted CPI'],
    mode='lines+markers',
    name='Forecasted CPI'
))
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Lower Bound CI'],
    mode='lines',
    line=dict(width=0),
    showlegend=False
))
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Upper Bound CI'],
    mode='lines',
    fill='tonexty',
    fillcolor='rgba(173, 216, 230, 0.5)',  # Adjust opacity for visibility
    line=dict(width=0),
    showlegend=False
))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()