In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [2]:
data = pd.read_csv('dataset.csv')

In [3]:
data.head()

Unnamed: 0,name,tanggal,bulan,tahun,temp,humidity,cloudcover,sealevelpressure,windspeed,precip,temp_celsius
0,ACEH,1,1,2014,24.5,89.2,85.8,1010.7,13.0,8.806,24.5
1,ACEH,2,1,2014,24.7,85.5,78.7,1011.1,14.8,76.537,24.7
2,ACEH,3,1,2014,23.9,90.4,90.1,1012.4,16.2,1.14,23.9
3,ACEH,4,1,2014,24.5,86.1,83.4,1011.6,10.8,1.5,24.5
4,ACEH,5,1,2014,24.6,85.5,66.4,1010.3,9.7,10.012,24.6


In [4]:
results = []
mae_results = []

In [5]:
unique_names = data['name'].unique()

In [None]:
for name in unique_names:

    province_data = data[data['name'] == name]
    
    X = province_data[['tanggal', 'bulan', 'tahun']]
    y_temp = province_data['temp_celsius']
    y_humidity = province_data['humidity']
    y_cloudcover = province_data['cloudcover']
    y_sealevelpressure = province_data['sealevelpressure']
    y_windspeed = province_data['windspeed']
    y_precip = province_data['precip']

    X_train, X_test, y_temp_train, y_temp_test = train_test_split(
        X, y_temp, test_size=0.2, random_state=42)
    _, _, y_humidity_train, y_humidity_test = train_test_split(
        X, y_humidity, test_size=0.2, random_state=42)
    _, _, y_cloudcover_train, y_cloudcover_test = train_test_split(
        X, y_cloudcover, test_size=0.2, random_state=42)
    _, _, y_sealevelpressure_train, y_sealevelpressure_test = train_test_split(
        X, y_sealevelpressure, test_size=0.2, random_state=42)
    _, _, y_windspeed_train, y_windspeed_test = train_test_split(
        X, y_windspeed, test_size=0.2, random_state=42)
    _, _, y_precip_train, y_precip_test = train_test_split(
        X, y_precip, test_size=0.2, random_state=42)

    rf_temp = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_humidity = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_cloudcover = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_sealevelpressure = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_windspeed = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_precip = RandomForestRegressor(n_estimators=100, random_state=42)

    rf_temp.fit(X_train, y_temp_train)
    rf_humidity.fit(X_train, y_humidity_train)
    rf_cloudcover.fit(X_train, y_cloudcover_train)
    rf_sealevelpressure.fit(X_train, y_sealevelpressure_train)
    rf_windspeed.fit(X_train, y_windspeed_train)
    rf_precip.fit(X_train, y_precip_train)

    temp_pred_test = rf_temp.predict(X_test)
    humidity_pred_test = rf_humidity.predict(X_test)
    cloudcover_pred_test = rf_cloudcover.predict(X_test)
    sealevelpressure_pred_test = rf_sealevelpressure.predict(X_test)
    windspeed_pred_test = rf_windspeed.predict(X_test)
    precip_pred_test = rf_precip.predict(X_test)

    mae_temp = mean_absolute_error(y_temp_test, temp_pred_test)
    mae_humidity = mean_absolute_error(y_humidity_test, humidity_pred_test)
    mae_cloudcover = mean_absolute_error(y_cloudcover_test, cloudcover_pred_test)
    mae_sealevelpressure = mean_absolute_error(y_sealevelpressure_test, sealevelpressure_pred_test)
    mae_windspeed = mean_absolute_error(y_windspeed_test, windspeed_pred_test)
    mae_precip = mean_absolute_error(y_precip_test, precip_pred_test)

    mae_results.append({
        'name': name,
        'mae_temp': mae_temp.round(1),
        'mae_humidity': mae_humidity.round(1),
        'mae_cloudcover': mae_cloudcover.round(1),
        'mae_sealevelpressure': mae_sealevelpressure.round(1),
        'mae_windspeed': mae_windspeed.round(1),
        'mae_precip': mae_precip.round(1)
    })

    date_range = pd.date_range(start='2024-01-01', end='2030-12-30')
    future_dates = pd.DataFrame({
        'tanggal': date_range.day,
        'bulan': date_range.month,
        'tahun': date_range.year,
        'name': name
    })

    pred_temp = rf_temp.predict(future_dates[['tanggal', 'bulan', 'tahun']])
    pred_humidity = rf_humidity.predict(future_dates[['tanggal', 'bulan', 'tahun']])
    pred_cloudcover = rf_cloudcover.predict(future_dates[['tanggal', 'bulan', 'tahun']])
    pred_sealevelpressure = rf_sealevelpressure.predict(future_dates[['tanggal', 'bulan', 'tahun']])
    pred_windspeed = rf_windspeed.predict(future_dates[['tanggal', 'bulan', 'tahun']])
    pred_precip = rf_precip.predict(future_dates[['tanggal', 'bulan', 'tahun']])

    temp_results = pd.DataFrame({
        'name': future_dates['name'],
        'tanggal': future_dates['tanggal'],
        'bulan': future_dates['bulan'],
        'tahun': future_dates['tahun'],
        'temp': pred_temp.round(1),
        'humidity': pred_humidity.round(1),
        'cloudcover': pred_cloudcover.round(1),
        'sealevelpressure': pred_sealevelpressure.round(1),
        'windspeed': pred_windspeed.round(1),
        'precip': pred_precip.round(1)
    })

    results.append(temp_results)

final_results = pd.concat(results, ignore_index=True)

In [None]:
mae_df = pd.DataFrame(mae_results)

In [None]:
final_results.to_csv('prediksi_harian_2024_2030.csv', index=False)
mae_df.to_csv('mae_per_provinsi.csv', index=False)

print("Prediksi harian untuk semua provinsi telah disimpan ke 'prediksi_harian_2024_2030.csv'")
print("MAE untuk setiap provinsi telah disimpan ke 'mae_per_provinsi.csv'")