In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [2]:
data = pd.read_csv("dataset_convert/jawa timur.csv")

In [3]:
X = data[['tanggal', 'bulan', 'tahun']]
y_temp = data['temp']
y_humidity = data['humidity']
y_cloudcover = data['cloudcover']
y_sealevelpressure = data['sealevelpressure']
y_windspeed = data['windspeed']
y_precip = data['precip']

In [4]:
X_train, X_test, y_temp_train, y_temp_test = train_test_split(X, y_temp, test_size=0.2, random_state=42)
_, _, y_humidity_train, y_humidity_test = train_test_split(X, y_humidity, test_size=0.2, random_state=42)
_, _, y_cloudcover_train, y_cloudcover_test = train_test_split(X, y_cloudcover, test_size=0.2, random_state=42)
_, _, y_sealevelpressure_train, y_sealevelpressure_test = train_test_split(X, y_sealevelpressure, test_size=0.2, random_state=42)
_, _, y_windspeed_train, y_windspeed_test = train_test_split(X, y_windspeed, test_size=0.2, random_state=42)
_, _, y_precip_train, y_precip_test = train_test_split(X, y_precip, test_size=0.2, random_state=42)

In [5]:
rf_temp = RandomForestRegressor(n_estimators=100, random_state=42)
rf_humidity = RandomForestRegressor(n_estimators=100, random_state=42)
rf_cloudcover = RandomForestRegressor(n_estimators=100, random_state=42)
rf_sealevelpressure = RandomForestRegressor(n_estimators=100, random_state=42)
rf_windspeed = RandomForestRegressor(n_estimators=100, random_state=42)
rf_precip = RandomForestRegressor(n_estimators=100, random_state=42)

In [6]:
rf_temp.fit(X_train, y_temp_train)
rf_humidity.fit(X_train, y_humidity_train)
rf_cloudcover.fit(X_train, y_cloudcover_train)
rf_sealevelpressure.fit(X_train, y_sealevelpressure_train)
rf_windspeed.fit(X_train, y_windspeed_train)
rf_precip.fit(X_train, y_precip_train)

In [7]:
temp_pred_test = rf_temp.predict(X_test)
humidity_pred_test = rf_humidity.predict(X_test)
cloudcover_pred_test = rf_cloudcover.predict(X_test)
sealevelpressure_pred_test = rf_sealevelpressure.predict(X_test)
windspeed_pred_test = rf_windspeed.predict(X_test)
precip_pred_test = rf_precip.predict(X_test)

In [8]:
mae_temp = mean_absolute_error(y_temp_test, temp_pred_test)
mae_humidity = mean_absolute_error(y_humidity_test, humidity_pred_test)
mae_cloudcover = mean_absolute_error(y_cloudcover_test, cloudcover_pred_test)
mae_sealevelpressure = mean_absolute_error(y_sealevelpressure_test, sealevelpressure_pred_test)
mae_windspeed = mean_absolute_error(y_windspeed_test, windspeed_pred_test)
mae_precip = mean_absolute_error(y_precip_test, precip_pred_test)

In [9]:
print(f"Mean Absolute Error (MAE) untuk suhu (temp): {mae_temp}")
print(f"Mean Absolute Error (MAE) untuk kelembapan (humidity): {mae_humidity}")
print(f"Mean Absolute Error (MAE) untuk tutupan awan (cloudcover): {mae_cloudcover}")
print(f"Mean Absolute Error (MAE) untuk tekanan (sealevelpressure): {mae_sealevelpressure}")
print(f"Mean Absolute Error (MAE) untuk kecepatan angin (windspeed): {mae_windspeed}")
print(f"Mean Absolute Error (MAE) untuk curah hujan (precip): {mae_precip}")

Mean Absolute Error (MAE) untuk suhu (temp): 0.6291949384404928
Mean Absolute Error (MAE) untuk kelembapan (humidity): 2.3828891928864624
Mean Absolute Error (MAE) untuk tutupan awan (cloudcover): 6.5471422708618325
Mean Absolute Error (MAE) untuk tekanan (sealevelpressure): 0.529666894664816
Mean Absolute Error (MAE) untuk kecepatan angin (windspeed): 3.2900745554035566
Mean Absolute Error (MAE) untuk curah hujan (precip): 4.1799439398084814


In [10]:
date_range = pd.date_range(start='2024-01-01', end='2025-12-30')

In [11]:
future_dates = pd.DataFrame({
    'tanggal': date_range.day,
    'bulan': date_range.month,
    'tahun': date_range.year
})

In [12]:
unique_name = data['name'].unique()[0] 
future_dates['name'] = unique_name

In [13]:
pred_temp = rf_temp.predict(future_dates[['tanggal', 'bulan', 'tahun']])
pred_humidity = rf_humidity.predict(future_dates[['tanggal', 'bulan', 'tahun']])
pred_cloudcover = rf_cloudcover.predict(future_dates[['tanggal', 'bulan', 'tahun']])
pred_sealevelpressure = rf_sealevelpressure.predict(future_dates[['tanggal', 'bulan', 'tahun']])
pred_windspeed = rf_windspeed.predict(future_dates[['tanggal', 'bulan', 'tahun']])
pred_precip = rf_precip.predict(future_dates[['tanggal', 'bulan', 'tahun']])

In [14]:
results = pd.DataFrame({
    'name': future_dates['name'],
    'tanggal': future_dates['tanggal'],
    'bulan': future_dates['bulan'],
    'tahun': future_dates['tahun'],
    'temp': pred_temp,
    'humidity': pred_humidity,
    'cloudcover': pred_cloudcover,
    'sealevelpressure': pred_sealevelpressure,
    'windspeed': pred_windspeed,
    'precip': pred_precip
})

In [15]:
results.to_csv('prediksi_harian_2024_2025.csv', index=False)

print("Prediksi harian telah disimpan ke 'prediksi_harian_2024_2025.csv'")

Prediksi harian telah disimpan ke 'prediksi_harian_2024_2025.csv'
