## Daily Weather Data Retrieval for Bandung 2020–2025 from NASA POWER API

In [None]:
import requests, pandas as pd

# Koordinat Bandung
lat, lon = -6.9, 107.6

# Pilih parameter utama (bisa ditambah sesuai kebutuhan)
parameters = [
    "T2M","T2M_MAX","T2M_MIN","RH2M","WS2M","WS10M","WS50M",
    "T2MDEW","T2MWET","PS","PRECTOTCORR",
    "ALLSKY_SFC_SW_DWN","ALLSKY_TOA_SW_DWN"
]
param_str = ",".join(parameters)

# Ambil data dari 2020 sampai 2025
url = f"https://power.larc.nasa.gov/api/temporal/daily/point?parameters={param_str}&community=AG&longitude={lon}&latitude={lat}&start=20200101&end=20251231&format=JSON"

# Request API
r = requests.get(url).json()

# Ambil parameter
params = r["properties"]["parameter"]

# Ubah ke DataFrame
df = pd.DataFrame(params)
df["Date"] = df.index
df["City"] = "Bandung"

# Simpan ke CSV
df.to_csv("weather_bandung_2020_2025.csv", index=False)

print("Jumlah data:", len(df))
print(df.head())


Jumlah data: 2104
          WS50M     PS  T2MWET  ALLSKY_SFC_SW_DWN   RH2M  PRECTOTCORR  \
20200101   4.44  91.07   20.82               7.61  94.14        49.22   
20200102   3.89  91.16   20.35              15.15  90.21         6.75   
20200103   4.25  91.04   20.13              15.74  90.19         4.13   
20200104   3.67  90.89   21.08              13.54  88.39         3.25   
20200105   3.74  90.91   21.43               8.42  90.51         7.44   

          T2M_MIN  WS10M    T2M  T2MDEW  WS2M  T2M_MAX  TOA_SW_DWN      Date  \
20200101    19.37   3.12  21.33   20.32  2.14    22.71       38.13  20200101   
20200102    18.79   2.73  21.25   19.46  1.85    24.85       38.15  20200102   
20200103    17.92   3.00  21.03   19.24  2.03    24.14       38.16  20200103   
20200104    18.26   2.55  22.17   20.00  1.71    26.57       38.18  20200104   
20200105    20.08   2.61  22.30   20.56  1.73    25.38       38.19  20200105   

             City  
20200101  Bandung  
20200102  Bandung  
20

## Cleaning and Extraction of Bandung Weather Data Dates 2020–2025

In [None]:
import pandas as pd

# Baca file Excel yang diupload user
file_path = "weather_bandung_2020_2025.xlsx"
df = pd.read_excel(file_path)

# Ubah kolom 'Date' jadi datetime
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d')

# Tambahkan kolom Year, Month, Day
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

# Simpan hasil dataframe dengan kolom Date (datetime) + Year, Month, Day ke file Excel baru
output_path = "weather_bandung_2020_2025_clean_data.xlsx"
df.to_excel(output_path, index=False)

output_path

# Lihat hasil 5 baris pertama
df.head()

'weather_bandung_2020_2025_clean_data.xlsx'

In [None]:
# Lihat hasil 5 baris pertama
df.head()

Unnamed: 0,WS50M,PS,T2MWET,ALLSKY_SFC_SW_DWN,RH2M,PRECTOTCORR,T2M_MIN,WS10M,T2M,T2MDEW,WS2M,T2M_MAX,TOA_SW_DWN,Date,City,Year,Month,Day
0,4.44,91.07,20.82,7.61,94.14,49.22,19.37,3.12,21.33,20.32,2.14,22.71,38.13,2020-01-01,Bandung,2020,1,1
1,3.89,91.16,20.35,15.15,90.21,6.75,18.79,2.73,21.25,19.46,1.85,24.85,38.15,2020-01-02,Bandung,2020,1,2
2,4.25,91.04,20.13,15.74,90.19,4.13,17.92,3.0,21.03,19.24,2.03,24.14,38.16,2020-01-03,Bandung,2020,1,3
3,3.67,90.89,21.08,13.54,88.39,3.25,18.26,2.55,22.17,20.0,1.71,26.57,38.18,2020-01-04,Bandung,2020,1,4
4,3.74,90.91,21.43,8.42,90.51,7.44,20.08,2.61,22.3,20.56,1.73,25.38,38.19,2020-01-05,Bandung,2020,1,5


## Weather Forecast Modeling for Bandung (2020–2025) with Random Forest, XGBoost, and Neural Networks

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)


# =======================
# 1. Load Dataset
# =======================
df = pd.read_excel("weather_bandung_2020_2025_clean_data.xlsx")

# Pastikan tidak ada NaN
df = df.dropna()

# =======================
# 2. Fungsi Evaluasi
# =======================
def evaluate(y_true, y_pred, model_name, target_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"[{model_name}] Target={target_name} | MAE={mae:.3f} | RMSE={rmse:.3f}")
    return {"Model": model_name, "Target": target_name, "MAE": mae, "RMSE": rmse}

results = []

# =======================
# 3. Prediksi Suhu (T2M)
# =======================
X_temp = df[["PS", "RH2M", "T2MDEW", "ALLSKY_SFC_SW_DWN", "TOA_SW_DWN"]]
y_temp = df["T2M"]

X_train, X_test, y_train, y_test = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

models = {
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=200, random_state=42),
    "NeuralNet": MLPRegressor(hidden_layer_sizes=(64,32), max_iter=500, random_state=42)
}

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    results.append(evaluate(y_test, preds, name, "T2M"))

# =======================
# 4. Prediksi Hujan (PRECTOTCORR)
# =======================
X_rain = df[["PS", "RH2M", "T2MWET", "T2MDEW", "WS2M", "ALLSKY_SFC_SW_DWN"]]
y_rain = df["PRECTOTCORR"]

X_train, X_test, y_train, y_test = train_test_split(X_rain, y_rain, test_size=0.2, random_state=42)

for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    results.append(evaluate(y_test, preds, name, "PRECTOTCORR"))

# =======================
# 5. Multi-target: [T2M, PRECTOTCORR, RH2M]
# =======================

# Ambil semua kolom numerik untuk X
X_multi = df.select_dtypes(include=[np.number]).drop(columns=["T2M", "PRECTOTCORR", "RH2M"])
y_multi = df[["T2M", "PRECTOTCORR", "RH2M"]]

X_train, X_test, y_train, y_test = train_test_split(X_multi, y_multi, test_size=0.2, random_state=42)

for name, model in models.items():
    multi_model = MultiOutputRegressor(model)
    multi_model.fit(X_train, y_train)
    preds = multi_model.predict(X_test)
    mae = mean_absolute_error(y_test, preds)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    print(f"[{name}] Multi-Target | MAE={mae:.3f} | RMSE={rmse:.3f}")
    results.append({"Model": name, "Target": "Multi(T2M,PRECTOTCORR,RH2M)", "MAE": mae, "RMSE": rmse})

# =======================
# 6. Hasil Akhir
# =======================
results_df = pd.DataFrame(results)
print("\n=== Ringkasan Hasil ===")
print(results_df)


[RandomForest] Target=T2M | MAE=0.062 | RMSE=0.135
[XGBoost] Target=T2M | MAE=0.064 | RMSE=0.126
[NeuralNet] Target=T2M | MAE=0.046 | RMSE=0.078
[RandomForest] Target=PRECTOTCORR | MAE=6.256 | RMSE=13.542
[XGBoost] Target=PRECTOTCORR | MAE=6.654 | RMSE=14.244
[NeuralNet] Target=PRECTOTCORR | MAE=6.524 | RMSE=13.233
[RandomForest] Multi-Target | MAE=2.180 | RMSE=7.687
[XGBoost] Multi-Target | MAE=2.248 | RMSE=7.794
[NeuralNet] Multi-Target | MAE=3.029 | RMSE=7.904

=== Ringkasan Hasil ===
          Model                       Target       MAE       RMSE
0  RandomForest                          T2M  0.061873   0.135065
1       XGBoost                          T2M  0.063794   0.126219
2     NeuralNet                          T2M  0.046392   0.077922
3  RandomForest                  PRECTOTCORR  6.256013  13.542352
4       XGBoost                  PRECTOTCORR  6.653742  14.244156
5     NeuralNet                  PRECTOTCORR  6.524387  13.233169
6  RandomForest  Multi(T2M,PRECTOTCORR,RH2M) 

## RandomForest Multi-Output Regression for Weather Forecasting and Gzip Model Export

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
import joblib
import gzip
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)

# =======================
# 1. Load Dataset
# =======================
df = pd.read_excel("weather_bandung_2020_2025_clean_data.xlsx")
df = df.dropna()

# =======================
# 2. Fungsi Evaluasi
# =======================
def evaluate(y_true, y_pred, model_name, target_name):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"[{model_name}] Target={target_name} | MAE={mae:.3f} | RMSE={rmse:.3f}")
    return {"Model": model_name, "Target": target_name, "MAE": mae, "RMSE": rmse}

results = []

# =======================
# 3. Multi-target RandomForest
# =======================
# Pilih semua kolom numerik untuk X, kecuali target
X_multi = df.select_dtypes(include=[np.number]).drop(columns=["T2M", "PRECTOTCORR", "RH2M"])
y_multi = df[["T2M", "PRECTOTCORR", "RH2M"]]

X_train, X_test, y_train, y_test = train_test_split(X_multi, y_multi, test_size=0.2, random_state=42)

# Buat model multi-output RandomForest
rf_model = MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42))
rf_model.fit(X_train, y_train)

# Prediksi
preds = rf_model.predict(X_test)

# Evaluasi
mae = mean_absolute_error(y_test, preds)
rmse = np.sqrt(mean_squared_error(y_test, preds))
print(f"[RandomForest Multi-Target] MAE={mae:.3f} | RMSE={rmse:.3f}")
results.append({"Model": "RandomForest Multi", "Target": "Multi(T2M,PRECTOTCORR,RH2M)", "MAE": mae, "RMSE": rmse})

# =======================
# 4. Simpan model ke file (gzip compress)
# =======================
model_filename = "rf_multi_weather_model_compressed.joblib.gz"
with gzip.open(model_filename, 'wb') as f:
    joblib.dump(rf_model, f, compress=3)  # compress level 3
print(f"Model RandomForest multi-output berhasil disimpan sebagai '{model_filename}' (terkompresi)")

# =======================
# 5. Ringkasan Hasil
# =======================
results_df = pd.DataFrame(results)
print("\n=== Ringkasan Hasil ===")
print(results_df)


[RandomForest Multi-Target] MAE=2.180 | RMSE=7.687
Model RandomForest multi-output berhasil disimpan sebagai 'rf_multi_weather_model_compressed.joblib.gz' (terkompresi)

=== Ringkasan Hasil ===
                Model                       Target       MAE      RMSE
0  RandomForest Multi  Multi(T2M,PRECTOTCORR,RH2M)  2.179871  7.687241
