In [41]:
import requests
import pandas as pd
def get_fred_series_observations(series_id, api_key):
    # Endpoint for series observations
    base_url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json"
    }
    response = requests.get(base_url, params=params)
    return response.json()

api_key = 'ef8c3af7f7bebd62ffff5b460d66375a'
series_id = 'CPIAUCSL'

# Fetch the data points for the series
data = get_fred_series_observations(series_id, api_key)

# Check if observations are in the response and create a DataFrame
if 'observations' in data:
    df = pd.DataFrame(data['observations'])
    df = df[['date', 'value']]  # Select only the 'date' and 'value' columns
# Convert 'value' column to float64
df['value'] = df['value'].astype('float64')
df['value'] = (df['value'].pct_change(periods=12) * 100).round(2)  # Calculate percent change
df = df.dropna(subset=['value'])

In [42]:
# Set the 'year' column as the index
df.set_index('date', inplace=True)


In [43]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Normalize the data: MinMaxScaler
# data_scaled= ( df- df.min()  /( df.max()- df.min())
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(df)

In [44]:
import numpy as np
# Split data into train and test sets
train_size = int(len(data_scaled) * 0.8)
train_data, test_data = data_scaled[:train_size], data_scaled[train_size:]

# Create sequences for LSTM training
def create_sequences(df, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[i:i+seq_length])
        y.append(df[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 1  # Length of sequences for LSTM
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)

In [None]:
'''
# Print the first few elements of each array
print("X_train:", X_train[:5])
print("y_train:", y_train[:5])
print("X_test:", X_test[:5])
print("y_test:", y_test[:5])
# y=x(1)
# Print the shapes of the arrays
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)
'''

In [None]:
from sklearn.model_selection import ParameterGrid
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Define a parameter grid
param_grid = {
    'lstm_units': [20, 50, 100],
    'epochs': [50, 100, 150],
    'batch_size': [16, 32, 64]
}

# Initialize early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

best_mse = float('inf')
best_params = {}

# Iterate over all combinations of parameters
for params in ParameterGrid(param_grid):
    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(units=params['lstm_units'], input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mse')

    # Train the model with early stopping and validation split
    model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], callbacks=[early_stopping], validation_split=0.2)

    # Evaluate the model on the test set
    mse = model.evaluate(X_test, y_test)

    if mse < best_mse:
        best_mse = mse
        best_params = params

# Print the best Mean Squared Error and corresponding parameters
print(f"Best MSE: {best_mse}")
print(f"Best Params: {best_params}")

In [45]:
# Build and train the LSTM model
# Best MSE: 0.0007103609386831522
# Best params from batch : 16 epochs :150 and units:100
model = Sequential()
model.add(LSTM(units=100, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=150, batch_size=16)

# Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict)
y_train = scaler.inverse_transform(y_train)
test_predict = scaler.inverse_transform(test_predict)
y_test = scaler.inverse_transform(y_test)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [46]:
# Create a DataFrame for plotting
plot_data = pd.DataFrame({
    'Date': np.concatenate((df.index[seq_length:seq_length+len(train_predict)],
                            df.index[seq_length+len(train_predict):seq_length+len(train_predict)+len(test_predict)])),
    'Predicted CPI': np.concatenate((train_predict.flatten(), test_predict.flatten())),
    'Actual CPI': np.concatenate((y_train.flatten(), y_test.flatten()))
})
# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Shift the dates for the test predictions forward by one month
# This is done for the part of the DataFrame that contains test data
plot_data.loc[len(train_predict):, 'Date'] += pd.DateOffset(months=1)


In [47]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go

# Ensure 'Date' is in datetime format
plot_data['Date'] = pd.to_datetime(plot_data['Date'])

# Create traces for Actual and Predicted CPI
trace1 = go.Scatter(x=plot_data['Date'], y=plot_data['Actual CPI'], mode='lines', name='Actual CPI')
trace2 = go.Scatter(x=plot_data['Date'], y=plot_data['Predicted CPI'], mode='lines', name='Predicted CPI')

# Layout with range selector buttons and range slider
layout = go.Layout(
    title='CPI Forecasting with LSTM',
    xaxis=dict(
        title='Date',
        rangeselector=dict(
            buttons=list([
                dict(count=1, label='1m', step='month', stepmode='backward'),
                dict(count=6, label='6m', step='month', stepmode='backward'),
                dict(count=1, label='YTD', step='year', stepmode='todate'),
                dict(count=1, label='1y', step='year', stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(visible=True),
        type='date'
    ),
    yaxis=dict(
        title='CPI'
    )
)

# Create figure and add traces
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Show plot
fig.show()




In [48]:
# Display the last 10 rows of the DataFrame
display_data = plot_data.tail(10)

# Optional: Round the CPI values for better readability
display_data['Predicted CPI'] = display_data['Predicted CPI'].round(2)
display_data['Actual CPI'] = display_data['Actual CPI'].round(2)

# Show the DataFrame
display_data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Date,Predicted CPI,Actual CPI
899,2023-02-01,6.12,5.99
900,2023-03-01,5.76,4.99
901,2023-04-01,4.77,4.96
902,2023-05-01,4.75,4.13
903,2023-06-01,3.93,3.09
904,2023-07-01,2.91,3.3
905,2023-08-01,3.11,3.71
906,2023-09-01,3.52,3.69
907,2023-10-01,3.5,3.23
908,2023-11-01,3.05,3.12


In [49]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go

# Assuming df, model, and scaler are already defined

# Number of months to forecast
months_to_forecast = 12

# Start with the last available sequence
new_data = df.iloc[-seq_length:]['value'].values.reshape(-1, 1)

# Scale the initial sequence
new_data_scaled = scaler.transform(new_data)

# Initialize arrays for storing forecasts and dates
forecast_values = []
forecast_dates = []
last_known_date = pd.to_datetime(plot_data['Date'].iloc[-1])

for i in range(months_to_forecast):
    # Reshape for LSTM input
    lstm_input = new_data_scaled.reshape(1, seq_length, 1)

    # Forecast the next month
    next_month_prediction_scaled = model.predict(lstm_input)

    # Inverse transform the prediction
    next_month_prediction = scaler.inverse_transform(next_month_prediction_scaled.reshape(-1, 1))

    # Store forecasted value
    forecast_values.append(next_month_prediction[0][0])

    # Calculate the forecast date
    forecast_date = last_known_date + pd.DateOffset(months=i + 1)
    forecast_dates.append(forecast_date)

    # Append the prediction for the next iteration
    new_data_scaled = np.append(new_data_scaled, next_month_prediction_scaled)[-seq_length:]

# Creating a DataFrame for forecasted data
forecast_df = pd.DataFrame({
    'Date': forecast_dates,
    'Forecasted CPI': forecast_values
})

# Plotting the data
fig = go.Figure()

# Add actual data trace
fig.add_trace(go.Scatter(x=plot_data['Date'], y=df['value'], mode='lines', name='Actual CPI'))

# Add forecasted data trace
fig.add_trace(go.Scatter(x=forecast_df['Date'], y=forecast_df['Forecasted CPI'], mode='lines', name='Forecasted CPI'))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()





X does not have valid feature names, but MinMaxScaler was fitted with feature names





In [50]:
# Assuming forecast_df is your DataFrame containing the forecasted data.
forecast_df['Date'] = forecast_df['Date'].dt.strftime('%Y-%m-%d')  # Format the dates for display.
print(forecast_df.to_string(index=False))

      Date  Forecasted CPI
2023-12-01        2.938392
2024-01-01        2.760715
2024-02-01        2.587004
2024-03-01        2.417291
2024-04-01        2.251598
2024-05-01        2.089941
2024-06-01        1.932329
2024-07-01        1.778768
2024-08-01        1.629254
2024-09-01        1.483779
2024-10-01        1.342326
2024-11-01        1.204877


In [51]:
# ... [previous code to prepare the data] ...

# Plotting the data
fig = go.Figure()

# Add actual data trace
fig.add_trace(go.Scatter(
    x=plot_data['Date'],
    y=plot_data['Actual CPI'],
    mode='lines',
    name='Actual CPI'
))

# To visually connect the data, make the first forecasted date the same as the last actual date
first_forecasted_date = plot_data['Date'].iloc[-1]
first_forecasted_value = plot_data['Actual CPI'].iloc[-1]
forecast_df.at[0, 'Date'] = first_forecasted_date
forecast_df.at[0, 'Forecasted CPI'] = first_forecasted_value

# Add forecasted data trace with a different style to distinguish it
fig.add_trace(go.Scatter(
    x=forecast_df['Date'],
    y=forecast_df['Forecasted CPI'],
    mode='lines',
    name='Forecasted CPI',
    line=dict(dash='dash')  # This makes the forecast line dashed
))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()


In [52]:
# Convert the 'Date' columns to datetime if they are not already
plot_data['Date'] = pd.to_datetime(plot_data['Date'])
forecast_df['Date'] = pd.to_datetime(forecast_df['Date'])

# Define the cutoff date
cutoff_date = pd.Timestamp('2023-07-01')

# Filter the data to include only points after November 2023
plot_data_filtered = plot_data[plot_data['Date'] > cutoff_date]
forecast_df_filtered = forecast_df[forecast_df['Date'] > cutoff_date]

# Now create the Plotly figure with the filtered data
fig = go.Figure()

# Add actual data trace if there is any data after the cutoff date
if not plot_data_filtered.empty:
    fig.add_trace(go.Scatter(
        x=plot_data_filtered['Date'],
        y=plot_data_filtered['Actual CPI'],
        mode='lines',
        name='Actual CPI'
    ))

# Add forecasted data trace
fig.add_trace(go.Scatter(
    x=forecast_df_filtered['Date'],
    y=forecast_df_filtered['Forecasted CPI'],
    mode='lines',
    name='Forecasted CPI',
    line=dict(dash='dash')  # This makes the forecast line dashed
))

# Update layout
fig.update_layout(
    title='CPI Forecasting with LSTM (Post November 2023)',
    xaxis_title='Date',
    yaxis_title='CPI',
    xaxis_rangeslider_visible=True
)

# Show the figure
fig.show()
