<a href="https://colab.research.google.com/github/yuri-spizhovyi-mit/housing-insights-risk-dashboard/blob/main/ml/notebooks/forecasting_analysis.ipynb" target="_parent">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

Housing Insights & Risk Dashboard  
## Data Engineering & Forecasting Notebook  
### Models: ARIMA, Prophet, LSTM
#### Author: Yuri Spizhovyi
#### Environment: Google Colab + Python + Pandas + Statsmodels + TensorFlow
#### Objective:
- Load datasets (HPI, rent, demographics, macro, metrics)
- Explore trends, seasonality, missingness
- Define feature engineering strategy
- Prepare feature tables for ARIMA, Prophet, LSTM

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("seaborn-v0_8")
pd.set_option("display.max_columns", None)

!git clone https://github.com/yuri-spizhovyi-mit/housing-insights-risk-dashboard.git
%cd ET6-ML/data/raw

In [None]:
# Load datasets
df_hpi = pd.read_csv("/data/raw/house_price_index.csv")
df_rent = pd.read_csv("/data/raw/rent_index.csv")
df_demo = pd.read_csv("/data/raw/demographics.csv")
df_macro = pd.read_csv("/data/raw/macro_economic_data.csv")
df_metrics = pd.read_csv("/data/raw/metrics.csv")

dfs = {
    "house_price_index": df_hpi,
    "rent_index": df_rent,
    "demographics": df_demo,
    "macro_economic": df_macro,
    "metrics": df_metrics,
}

for name, df in dfs.items():
    print(f"\n===== {name.upper()} =====")
    print(df.head())
    print(df.info())

In [None]:
for name, df in dfs.items():
    if "date" in df.columns:
        df["date"] = pd.to_datetime(df["date"])
        df.sort_values("date", inplace=True)
        dfs[name] = df

print("Date columns converted and sorted.")

In [None]:
for name, df in dfs.items():
    print(f"\n{name}:")
    print("Date range:", df["date"].min(), "â†’", df["date"].max())
    print("Missing values:\n", df.isna().sum())

In [None]:
# Plot HPI trend
plt.figure(figsize=(12, 5))
sns.lineplot(data=df_hpi, x="date", y="value")
plt.title("House Price Index Trend")
plt.show()

In [None]:
# Plot rent trend
plt.figure(figsize=(12, 5))
sns.lineplot(data=df_rent, x="date", y="value")
plt.title("Rent Index Trend")
plt.show()