<a href="https://colab.research.google.com/github/windyrahayu45/ML-PadangPanjang/blob/main/Forecast_Migrasi_%26_Pertumbuhan_Penduduk_Kota.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# 1) Load data final (yang sama dipakai untuk use case lain)
df = pd.read_csv("dtsen_with_scores.csv")

# 2) Populasi baseline per kelurahan (estimasi dari data rumah tangga)
pop_base = (
    df.groupby("kelurahan")["jumlah_anggota_keluarga"]
      .sum()
      .rename("pop_base")
      .reset_index()
)

kelurahan_list = pop_base["kelurahan"].tolist()

# 3) Bangun rentang waktu bulanan 2019-01 s/d 2025-09 (atau sesuaikan)
dates = pd.date_range(start="2019-01-01", end="2025-09-01", freq="MS")

# 4) Skenario growth & migrasi sintetis per kelurahan
rows = []
rng = np.random.default_rng(42)
for _, r in pop_base.iterrows():
    kel = r["kelurahan"]
    base = r["pop_base"]
    # growth tahunan 0.2%–1.5%, noise musiman ringan
    annual_growth = rng.uniform(0.002, 0.015)
    monthly_growth = (1 + annual_growth) ** (1/12) - 1
    # komponenn migrasi bulanan random kecil (-0.25% s/d 0.25%)
    mig_sigma = 0.0025

    level = base * 0.9  # start a bit lower in 2019
    for i, d in enumerate(dates):
        # seasonality sederhana: puncak pertengahan tahun
        season = 1.0 + 0.003*np.sin(2*np.pi*(d.month/12))
        # migrasi acak
        migration = rng.normal(0, mig_sigma)
        level = level * (1 + monthly_growth) * season * (1 + migration)
        rows.append({"date": d, "kelurahan": kel, "population": max(level, 10)})

ts = pd.DataFrame(rows)

# 5) Tambahkan metrik migrasi sintetis (inflow/outflow) agar bisa dianalisis
# (sekadar proporsi kecil dari populasi + noise)
ts["mig_in"]  = (ts["population"] * 0.003 * (1 + rng.normal(0, 0.2, len(ts)))).clip(lower=0).round()
ts["mig_out"] = (ts["population"] * 0.0025 * (1 + rng.normal(0, 0.2, len(ts)))).clip(lower=0).round()

# 6) Simpan time-series sintetis (untuk referensi & reproducibility)
ts.to_csv("ts_penduduk_kelurahan_2019_2025.csv", index=False)
print("✅ Saved ts_penduduk_kelurahan_2019_2025.csv")


✅ Saved ts_penduduk_kelurahan_2019_2025.csv


In [2]:
# !pip install prophet
from prophet import Prophet

def forecast_kelurahan_prophet(ts_df, kelurahan, horizon_months=60):
    df_k = ts_df[ts_df["kelurahan"] == kelurahan][["date","population"]].copy()
    df_k.columns = ["ds","y"]  # Prophet needs ds, y

    m = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        seasonality_mode="additive",
        interval_width=0.9
    )
    m.fit(df_k)

    future = m.make_future_dataframe(periods=horizon_months, freq="MS")
    fcst  = m.predict(future)
    fcst["kelurahan"] = kelurahan
    return fcst[["ds","kelurahan","yhat","yhat_lower","yhat_upper"]]

# Forecast semua kelurahan
all_fcst = []
for kel in kelurahan_list:
    fc = forecast_kelurahan_prophet(ts, kel, horizon_months=60)
    all_fcst.append(fc)
fcst_prophet = pd.concat(all_fcst, ignore_index=True)

# Simpan
fcst_prophet.to_csv("forecast_penduduk_prophet_5y.csv", index=False)
print("✅ Saved forecast_penduduk_prophet_5y.csv")


DEBUG:cmdstanpy:input tempfile: /tmp/tmpm9a8dfvt/cmcs8z6p.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm9a8dfvt/deynkys3.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=59527', 'data', 'file=/tmp/tmpm9a8dfvt/cmcs8z6p.json', 'init=/tmp/tmpm9a8dfvt/deynkys3.json', 'output', 'file=/tmp/tmpm9a8dfvt/prophet_modelebpqg8m7/prophet_model-20250919160347.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
16:03:47 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
16:03:48 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm9a8dfvt/o52pm8bw.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpm9a8dfvt/lh6qsobs.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local

✅ Saved forecast_penduduk_prophet_5y.csv


In [3]:
# Pilih salah satu output forecast (Prophet atau SARIMAX)
fcst = pd.read_csv("forecast_penduduk_prophet_5y.csv")  # atau '...sarimax_5y.csv'
hist = pd.read_csv("ts_penduduk_kelurahan_2019_2025.csv")

# Agregasi bulanan per kota
hist_city = hist.groupby("date")["population"].sum().reset_index(name="population")
fcst_city = fcst.groupby("ds")["yhat"].sum().reset_index(name="yhat")

# Simpan untuk Streamlit
hist_city.rename(columns={"date":"period"}, inplace=True)
fcst_city.rename(columns={"ds":"period"}, inplace=True)

hist_city.to_csv("hist_penduduk_kota.csv", index=False)
fcst_city.to_csv("forecast_penduduk_kota_5y.csv", index=False)
print("✅ Saved hist_penduduk_kota.csv & forecast_penduduk_kota_5y.csv")


✅ Saved hist_penduduk_kota.csv & forecast_penduduk_kota_5y.csv
