In [None]:
!pip install hopsworks[python] statsmodels requests matplotlib pandas numpy scikit-learn


Collecting hopsworks[python]
  Downloading hopsworks-4.3.1-py3-none-any.whl.metadata (11 kB)
Collecting pyhumps==1.6.1 (from hopsworks[python])
  Downloading pyhumps-1.6.1-py3-none-any.whl.metadata (3.7 kB)
Collecting furl (from hopsworks[python])
  Downloading furl-2.1.4-py2.py3-none-any.whl.metadata (25 kB)
Collecting boto3 (from hopsworks[python])
  Downloading boto3-1.40.11-py3-none-any.whl.metadata (6.7 kB)
Collecting numpy
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyjks (from hopsworks[python])
  Downloading pyjks-20.0.0-py2.py3-none-any.whl.metadata (1.7 kB)
Collecting mock (from hopsworks[python])
  Downloading mock-5.2.0-py3-none-any.whl.metadata (3.1 kB)
Collecting avro==1.11.3 (from hopsworks[python])
  Downloading avro-1.11.3.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import hopsworks

FEATURE_GROUP_NAME = "aqi_weather_features"
FEATURE_GROUP_VER  = 2
LATITUDE  = 33.5973
LONGITUDE = 73.0479
HORIZON_H = 72
TZ = "Asia/Karachi"
ARTIFACT_DIR = "arima_aqi_artifacts"
PLOTS_DIR    = os.path.join(ARTIFACT_DIR, "plots")
os.makedirs(PLOTS_DIR, exist_ok=True)

# ------------------------
# 1) Load data
# ------------------------
print("[1/6] Loading Feature Group from Hopsworks...")
project = hopsworks.login()
fs = project.get_feature_store()
fg = fs.get_feature_group(name=FEATURE_GROUP_NAME, version=FEATURE_GROUP_VER)
df_raw = fg.read()

df_raw = df_raw.sort_values("time", ascending=True).reset_index(drop=True)
df_raw["time"] = pd.to_datetime(df_raw["time"]).dt.tz_localize(None)

# ------------------------
# 2) Train ARIMA
# ------------------------
print("[2/6] Training ARIMA model...")
y = df_raw["us_aqi"].values
# Example ARIMA order — can be tuned via AIC/BIC
model = ARIMA(y, order=(5,1,2))
model_fit = model.fit()

# ------------------------
# 3) Forecast
# ------------------------
print("[3/6] Forecasting next 72h...")
forecast = model_fit.forecast(steps=HORIZON_H)
last_time = df_raw["time"].iloc[-1]
future_times = pd.date_range(start=last_time + pd.Timedelta(hours=1), periods=HORIZON_H, freq="H", tz=TZ)

forecast_df = pd.DataFrame({
    "datetime": future_times,
    "predicted_us_aqi": forecast
})
forecast_path = os.path.join(ARTIFACT_DIR, "arima_72h_forecast.csv")
forecast_df.to_csv(forecast_path, index=False)

# ------------------------
# 4) Plot
# ------------------------
print("[4/6] Saving forecast plot...")
plt.figure(figsize=(12,6))
plt.plot(df_raw["time"].tail(200), df_raw["us_aqi"].tail(200), label="History")
plt.plot(forecast_df["datetime"], forecast_df["predicted_us_aqi"], label="ARIMA Forecast", linestyle="--")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(os.path.join(PLOTS_DIR, "arima_forecast.png"), dpi=140)
plt.close()

# ------------------------
# 5) Evaluate (last 20% of history)
# ------------------------
print("[5/6] Evaluating ARIMA on last 20% history...")
split_idx = int(len(y) * 0.8)
y_train, y_test = y[:split_idx], y[split_idx:]
model_val = ARIMA(y_train, order=(5,1,2)).fit()
pred_test = model_val.forecast(steps=len(y_test))
mae = mean_absolute_error(y_test, pred_test)
rmse = mean_squared_error(y_test, pred_test)
r2 = r2_score(y_test, pred_test)
print(f"MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.4f}")

print("\n✅ ARIMA pipeline done.")






[1/6] Loading Feature Group from Hopsworks...
Connection closed.
Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated

Paste it here: ··········







Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1239199
Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.60s) 
[2/6] Training ARIMA model...
[3/6] Forecasting next 72h...
[4/6] Saving forecast plot...




[5/6] Evaluating ARIMA on last 20% history...
MAE: 30.09, RMSE: 1544.45, R²: -0.2713

✅ ARIMA pipeline done.


In [None]:
# arima_forecast.py
import requests
import pandas as pd
import joblib
from datetime import datetime, timedelta

# # === Load trained ARIMA model ===
# MODEL_PATH = "arima_model.pkl"
# model = joblib.load(MODEL_PATH)

# === Config ===
lat, lon = 33.6844, 73.0479  # Example: Islamabad
forecast_hours = 72

# === Step 1: Fetch AQI Forecast from Open-Meteo ===
aqi_url = (
    "https://air-quality-api.open-meteo.com/v1/air-quality"
    f"?latitude={lat}&longitude={lon}&hourly=us_aqi"
)

aqi_data = requests.get(aqi_url).json()
df_aqi = pd.DataFrame({
    "datetime": aqi_data["hourly"]["time"],
    "aqi": aqi_data["hourly"]["us_aqi"]
})

# Convert datetime to pandas datetime
df_aqi["datetime"] = pd.to_datetime(df_aqi["datetime"])
df_aqi.set_index("datetime", inplace=True)

# === Step 2: Forecast with ARIMA ===
# Use the fitted model object (model_fit) and the forecast method
forecast = model_fit.forecast(steps=forecast_hours)
# The forecast method returns a numpy array, so create a DataFrame manually
pred_df = pd.DataFrame({"mean": forecast})


# === Step 3: Combine timestamps with predictions ===
# Use the timestamps from the fetched aqi data for the forecast period
pred_df["datetime"] = df_aqi.index[:forecast_hours].values
pred_df.reset_index(drop=True, inplace=True)

print("\n=== ARIMA Forecast ===")
print(pred_df[["datetime", "mean"]])


=== ARIMA Forecast ===
              datetime        mean
0  2025-08-15 00:00:00   97.358120
1  2025-08-15 01:00:00  100.567657
2  2025-08-15 02:00:00  103.138915
3  2025-08-15 03:00:00  105.055568
4  2025-08-15 04:00:00  106.229798
..                 ...         ...
67 2025-08-17 19:00:00  105.713944
68 2025-08-17 20:00:00  105.697435
69 2025-08-17 21:00:00  105.713804
70 2025-08-17 22:00:00  105.697574
71 2025-08-17 23:00:00  105.713666

[72 rows x 2 columns]
