In [83]:
import requests
import pandas as pd
def get_fred_series_observations(series_id, api_key):
    # Endpoint for series observations
    base_url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json"
    }
    response = requests.get(base_url, params=params)
    return response.json()

api_key = 'ef8c3af7f7bebd62ffff5b460d66375a'
series_id = 'CPIAUCSL'

# Fetch the data points for the series
data = get_fred_series_observations(series_id, api_key)

# Check if observations are in the response and create a DataFrame
if 'observations' in data:
    df = pd.DataFrame(data['observations'])
    df = df[['date', 'value']]  # Select only the 'date' and 'value' columns
# Convert 'value' column to float64
df['value'] = df['value'].astype('float64')
df['value'] = (df['value'].pct_change(periods=12) * 100).round(2)  # Calculate percent change
df = df.dropna(subset=['value'])

In [84]:
# Set the 'year' column as the index
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
1948-01-01,10.24
1948-02-01,9.48
1948-03-01,6.82
1948-04-01,8.27
1948-05-01,9.38
...,...
2023-07-01,3.30
2023-08-01,3.71
2023-09-01,3.69
2023-10-01,3.23


In [85]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Normalize the data: MinMaxScaler
# data_scaled= ( df- df.min()  /( df.max()- df.min())
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

In [86]:
import numpy as np
# Split data into train and test sets
train_size = int(len(data_scaled) * 0.8)
train_data, test_data = data_scaled[:train_size], data_scaled[train_size:]

# Create sequences for LSTM training
def create_sequences(df, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[i:i+seq_length])
        y.append(df[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 1  # Length of sequences for LSTM
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)

In [None]:
'''
# Print the first few elements of each array
print("X_train:", X_train[:5])
print("y_train:", y_train[:5])
print("X_test:", X_test[:5])
print("y_test:", y_test[:5])
# y=x(1)
# Print the shapes of the arrays
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
'''

In [59]:
from sklearn.model_selection import ParameterGrid
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define a parameter grid
param_grid = {
    'lstm_units': [20, 50, 100],
    'epochs': [50, 100, 150],
    'batch_size': [16, 32, 64]
}

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

best_mse = float('inf')
best_params = {}

# Iterate over all combinations of parameters
for params in ParameterGrid(param_grid):
    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(units=params['lstm_units'], input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mse')

    # Train the model with early stopping and validation split
    model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], callbacks=[early_stopping], validation_split=0.2)

    # Evaluate the model on the test set
    mse = model.evaluate(X_test, y_test)

    if mse < best_mse:
        best_mse = mse
        best_params = params

# Print the best Mean Squared Error and corresponding parameters
print(f"Best MSE: {best_mse}")
print(f"Best Params: {best_params}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 3

In [87]:
# Build and train the LSTM model
# Best MSE: 0.0007103609386831522
# Best params from batch : 16 epochs :150 and units:100
model = Sequential()
model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=150, batch_size=16)

# Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform(y_train)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [88]:
# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': np.concatenate((df.index[seq_length:seq_length+len(train_predict)],
                            df.index[seq_length+len(train_predict):seq_length+len(train_predict)+len(test_predict)])),
    'Predicted CPI': np.concatenate((train_predict.flatten(), test_predict.flatten())),
    'Actual CPI': np.concatenate((y_train.flatten(), y_test.flatten()))
})
# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Shift the dates for the test predictions forward by one month
# This is done for the part of the DataFrame that contains test data
plot_data.loc[len(train_predict):, 'Date'] += pd.DateOffset(months=1)


In [89]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Create traces for Actual and Predicted CPI
trace1 = go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI')
trace2 = go.Scatter(x=plot_data['Date'], y=plot_data['Predicted CPI'], mode='lines', name='Predicted CPI')

# Layout with range selector buttons and range slider
layout = go.Layout(
    title='CPI Forecasting with LSTM',
    xaxis=dict(
        title='Date',
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(count=1, label='YTD', step='year', stepmode='todate'),
                dict(count=1, label='1y', step='year', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis=dict(
        title='CPI'
    )
)

# Create figure and add traces
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Show plot
fig.show()




In [90]:
# Display the last 10 rows of the DataFrame
display_data = plot_data.tail(10)

# Optional: Round the CPI values for better readability
display_data['Predicted CPI'] = display_data['Predicted CPI'].round(2)
display_data['Actual CPI'] = display_data['Actual CPI'].round(2)

# Show the DataFrame
display_data




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Date,Predicted CPI,Actual CPI
899,2023-02-01,6.2,5.99
900,2023-03-01,5.85,4.99
901,2023-04-01,4.86,4.96
902,2023-05-01,4.83,4.13
903,2023-06-01,4.01,3.09
904,2023-07-01,2.98,3.3
905,2023-08-01,3.19,3.71
906,2023-09-01,3.59,3.69
907,2023-10-01,3.57,3.23
908,2023-11-01,3.12,3.12


In [91]:
# Assuming you have already trained your model and have the scaler
# Forecasted value for the new month

# Prepare new data (here, we are using the last available data point)
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the new data
new_data_scaled = scaler.transform(new_data)

# Reshape for LSTM input
new_data_scaled = new_data_scaled.reshape(1, seq_length, 1)

# Forecast
new_month_prediction_scaled = model.predict(new_data_scaled)

# Inverse transform the prediction
new_month_prediction = scaler.inverse_transform(new_month_prediction_scaled)

print(f"Forecasted value for the new month: {new_month_prediction[0][0]}")

Forecasted value for the new month: 3.0112686157226562



X does not have valid feature names, but MinMaxScaler was fitted with feature names



In [92]:
import pandas as pd
import plotly.graph_objs as go

# Assuming df is your original DataFrame used for LSTM prediction with 'Date' and 'value' columns
# Assuming plot_data is your DataFrame that contains the 'Date', 'Actual CPI', and 'Predicted CPI'

# Add the forecasted value to the plot_data DataFrame
last_date = plot_data['Date'].iloc[-1]  # Get the last date from the plot data
forecast_date = last_date + pd.DateOffset(months=1)  # Forecast for the next month
forecasted_value = new_month_prediction[0][0]  # This is your forecasted value for the new month

# Append the forecasted data point to the plot_data DataFrame
forecast_data = pd.DataFrame({
    'Date': [forecast_date],
    'Predicted CPI': [forecasted_value],
    'Actual CPI': [None]  # No actual value for the forecasted date
})

# Combine with the existing plot_data DataFrame
plot_data = pd.concat([plot_data, forecast_data], ignore_index=True)

# Create traces for Actual and Predicted CPI along with the Forecasted value
trace_actual = go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI')
trace_predicted = go.Scatter(x=plot_data['Date'], y=plot_data['Predicted CPI'], mode='lines', name='Predicted CPI')
trace_forecasted = go.Scatter(x=[forecast_date], y=[forecasted_value], mode='markers', name='Forecasted CPI', marker=dict(size=10, color='red'))

# Define the layout with range selector buttons
layout = go.Layout(
    title='CPI Forecasting with LSTM',
    xaxis=dict(
        title='Date',
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(count=1, label='YTD', step='year', stepmode='todate'),
                dict(count=1, label='1y', step='year', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis=dict(
        title='CPI'
    )
)

# Combine the traces and layout into a figure
fig = go.Figure(data=[trace_actual, trace_predicted, trace_forecasted], layout=layout)

# Show the figure
fig.show()


In [96]:
# Assuming your model and scaler are already defined and trained
# Forecasted value for the new 3 months:

# Number of months to forecast
months_to_forecast = 5

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the initial sequence
new_data_scaled = scaler.transform(new_data)

# Iteratively forecast the next months
for _ in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    print(f"Forecasted value for the next month: {next_month_prediction[0][0]}")





X does not have valid feature names, but MinMaxScaler was fitted with feature names



Forecasted value for the next month: 3.0112686157226562
Forecasted value for the next month: 2.9044153690338135
Forecasted value for the next month: 2.799450635910034
Forecasted value for the next month: 2.696382522583008
Forecasted value for the next month: 2.595219373703003


In [98]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go

# Assume model, scaler, and df are predefined

# Number of months to forecast
months_to_forecast = 5

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)
new_data_scaled = scaler.transform(new_data)

# Store forecasted values and their corresponding dates and confidence intervals
forecast_dates = []
forecast_values = []
lower_bounds = []
upper_bounds = []

last_known_date = pd.to_datetime(df['Date'].iloc[-1])


for i in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    # Calculate the date for the forecasted value
    forecast_date = last_known_date + pd.DateOffset(months=i + 1)
    forecast_dates.append(forecast_date)
    forecast_values.append(next_month_prediction[0][0])

    # Calculate confidence intervals
    lower_bound = next_month_prediction[0][0] - z_score * std_residuals
    upper_bound = next_month_prediction[0][0] + z_score * std_residuals
    lower_bounds.append(lower_bound)
    upper_bounds.append(upper_bound)

# Append the forecasted data to the plot_data DataFrame
forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Predicted CPI': forecast_values,
    'Actual CPI': [None] * months_to_forecast,
    'Lower Bound CI': lower_bounds,
    'Upper Bound CI': upper_bounds
})

# Combine with the existing plot_data DataFrame
plot_data_extended = pd.concat([plot_data, forecast_df], ignore_index=True)

# Ensure 'Date' is in datetime format
plot_data_extended['Date'] = pd.to_datetime(plot_data_extended['Date'])

# Create traces for Actual CPI, Predicted CPI, and Confidence Intervals
trace_actual = go.Scatter(x=plot_data_extended['Date'], y=plot_data_extended['Actual CPI'], mode='lines', name='Actual CPI')
trace_predicted = go.Scatter(x=plot_data_extended['Date'], y=plot_data_extended['Predicted CPI'], mode='lines', name='Predicted CPI')
trace_ci_lower = go.Scatter(x=forecast_df['Date'], y=forecast_df['Lower Bound CI'], mode='lines', line=dict(width=0), showlegend=False)
trace_ci_upper = go.Scatter(x=forecast_df['Date'], y=forecast_df['Upper Bound CI'], mode='lines', fill='tonexty', fillcolor='rgba(173, 216, 230, 0.5)', line=dict(width=0), showlegend=False)

# Define the layout with range selector buttons
layout = go.Layout(
    title='CPI Forecasting with LSTM',
    xaxis=dict(
        title='Date',
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(count=1, label='YTD', step='year', stepmode='todate'),
                dict(count=1, label='1y', step='year', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis=dict(title='CPI')
)

# Combine the traces and layout into a figure
fig = go.Figure(data=[trace_actual, trace_predicted, trace_ci_lower, trace_ci_upper], layout=layout)

# Show the figure
fig.show()



X does not have valid feature names, but MinMaxScaler was fitted with feature names



KeyError: 'Date'