In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv('/content/UK-HPI-full-file-2025-01.csv', parse_dates=['Date'], index_col='Date')
data.head()




FileNotFoundError: [Errno 2] No such file or directory: '/content/UK-HPI-full-file-2025-01.csv'

In [None]:
data.describe()

In [None]:
data.info()

In [None]:
# Check stationarity using ADF test
from statsmodels.tsa.stattools import adfuller

result = adfuller(data['Index'])
print('ADF Statistic:', result[0])
print('p-value:', result[1])

# If p-value < 0.05, the data is stationary


In [None]:
#data['HPI_diff'] = data['Index'].diff().dropna()

data = data.reset_index()
data['HPI_diff'] = data['Index'].diff().dropna()
# Set 'Date' back as index
data = data.set_index('Date')

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data[['Index']])

# Prepare data for LSTM (supervised learning format)
import numpy as np

def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), 0])
        y.append(data[i + time_step, 0])
    return np.array(X), np.array(y)

time_step = 60  # use 60 days for prediction
X, y = create_dataset(data_scaled, time_step)
X = X.reshape(X.shape[0], X.shape[1], 1)


In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Assuming 'data' is your DataFrame with a date index and 'Index' column
# ... (Load your data into 'data' here) ...

# Fit ARIMA model
model = ARIMA(data['Index'], order=(5, 1, 0))
model_fit = model.fit()

# Make prediction for the next 12 periods
forecast = model_fit.forecast(steps=12)

# Create a date range for the forecast
forecast_index = pd.date_range(start=data.index[-1], periods=13, freq='M')[1:]  # Monthly frequency

# Plot the historical data
plt.plot(data.index, data['Index'], label='Historical HPI')

# Plot the forecast with the correct x-axis
plt.plot(forecast_index, forecast, label='ARIMA Forecast', color='red')

# Set labels and title
plt.xlabel('Date')
plt.ylabel('Index')
plt.title('Historical HPI and ARIMA Forecast')

# Add legend
plt.legend()

# Show the plot
plt.show()

In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# ... (Your existing code to fit the ARIMA model and generate the forecast) ...

# Plot the historical data
plt.plot(data.index, data['Index'], label='Historical HPI')

# Plot the forecast
plt.plot(forecast_index, forecast, label='ARIMA Forecast', color='red')

# Adjust x-axis limits to zoom in on the forecast
plt.xlim(forecast_index[0] - pd.Timedelta(days=365*2), forecast_index[-1] + pd.Timedelta(days=365*2))  # Adjust as needed

# Add labels and title
plt.xlabel('Date')
plt.ylabel('Index')
plt.title('Historical HPI and ARIMA Forecast (Zoomed)')

# Add legend
plt.legend()

# Show the plot
plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# Define the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X, y, epochs=20, batch_size=32)

# Predict using the LSTM model
predictions = model.predict(X)

# Invert scaling to get original values
predictions = scaler.inverse_transform(predictions)

# Plot the results
plt.plot(data.index[time_step:], scaler.inverse_transform(data_scaled[time_step:]), label='True HPI')
plt.plot(data.index[time_step:], predictions, label='LSTM Prediction', color='red')
plt.legend()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error

# ARIMA Model Evaluation
arima_rmse = np.sqrt(mean_squared_error(data['HPI'][-12:], forecast))
print(f'ARIMA RMSE: {arima_rmse}')

# LSTM Model Evaluation
lstm_rmse = np.sqrt(mean_squared_error(data['HPI'][time_step:], predictions))
print(f'LSTM RMSE: {lstm_rmse}')
