In [22]:
#Import library yang dibutuhkan
import pandas as pd
import streamlit as st
import pickle
from scipy.stats.mstats import winsorize
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error,root_mean_squared_error, accuracy_score
from sklearn.model_selection import train_test_split;
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [23]:
data = pd.read_csv('stunting.csv')
data.head(10)


Unnamed: 0,Kabupaten,Tahun,PrevalensiStunting,BayiBBLR,IbuNifasVitA,K4,IPM,MinumLayak,SanitasiLayak
0,KAB ASAHAN,2023,11.0,0.69,86.75,92.6,71.56,93.94,92.13
1,KAB ASAHAN,2022,15.3,0.4,62.23,87.23,71.13,92.92,89.33
2,KAB ASAHAN,2021,18.9,0.23,95.29,80.0,70.49,95.78,89.09
3,KAB BATU BARA,2023,17.7,0.59,97.13,86.0,70.31,97.21,92.54
4,KAB BATU BARA,2022,21.7,0.09,99.7,94.98,69.51,96.96,88.25
5,KAB BATU BARA,2021,30.9,0.86,99.7,92.0,68.58,97.83,88.04
6,KAB DAIRI,2023,32.6,1.62,98.73,71.3,73.27,92.31,94.42
7,KAB DAIRI,2022,28.6,0.95,99.05,66.43,72.56,96.68,94.14
8,KAB DAIRI,2021,34.2,0.98,89.72,113.0,71.84,91.9,92.35
9,KAB DELI SERDANG,2023,33.8,0.75,88.95,98.2,76.52,95.51,96.78


In [24]:
data.shape

(99, 9)

In [25]:
# Menentukan fitur dan target
features = ['BayiBBLR','IbuNifasVitA', 'K4', 'IPM', 'MinumLayak', 'SanitasiLayak']
target = 'PrevalensiStunting'

In [26]:
#Inisialisasi DataFrame kosong untuk menyimpan data yang diwinsorized
winsorized_data = pd.DataFrame()

# Step 1: Winsorizing data per kabupaten
for kabupaten in data["Kabupaten"].unique():
    # Filter data untuk kabupaten saat ini
    kabupaten_data = data[data["Kabupaten"] == kabupaten].copy()

    # Terapkan Winsorizing pada setiap kolom numerik
    for col in features:
        lower_limit = kabupaten_data[col].quantile(0.05)
        upper_limit = kabupaten_data[col].quantile(0.95)
        kabupaten_data[col] = kabupaten_data[col].clip(lower=lower_limit, upper=upper_limit)

    # Gabung data yang telah diwinsorasi untuk setiap kabupaten.
    winsorized_data = pd.concat([winsorized_data, kabupaten_data], ignore_index=True)

In [27]:
# Membagi data menjadi training dan testing berdasarkan kolom tahun
train_data = winsorized_data[winsorized_data["Tahun"].isin([2021, 2022])]
test_data = winsorized_data[winsorized_data["Tahun"] == 2023]

In [28]:
# Tentukan kamus untuk menyimpan hasil
results = {'Kabupaten': [], 'Model': [], 'MSE': [], 'MAE': [], 'RMSE': [], 'MAPE': []}
predicted_actual_results = {
    'Kabupaten': [],
    'Model': [],
    'Actual': [],
    'Predicted': []
}

# Latih dan evaluasi model per kabupaten
for city in test_data['Kabupaten']:
    # Memfilter data untuk kota saat ini
    city_train_data = train_data[train_data['Kabupaten'] == city]
    city_test_data = test_data[test_data['Kabupaten'] == city]

    # Lewati jika tidak ada data untuk pelatihan atau pengujian
    if city_train_data.empty or city_test_data.empty:
        continue

    # Fitur dan target terpisah
    X_train_city = city_train_data[features]
    y_train_city = city_train_data[target]
    X_test_city = city_test_data[features]
    y_test_city = city_test_data[target]

    # Standarisasi X_train_city dan X_test_city untuk SVR.
    scaler = StandardScaler()
    X_train_city_scaled = scaler.fit_transform(X_train_city)
    X_test_city_scaled = scaler.transform(X_test_city)

    # SVR model
    svr_model = SVR(kernel='linear')
    svr_model.fit(X_train_city_scaled, y_train_city)
    y_pred_svr = svr_model.predict(X_test_city_scaled)

    # Decision Tree model
    dt_model = DecisionTreeRegressor(random_state=42)
    dt_model.fit(X_train_city, y_train_city)
    y_pred_dt = dt_model.predict(X_test_city)

    # Random Forest model
    rf_model = RandomForestRegressor(n_estimators=3, random_state=42)
    rf_model.fit(X_train_city, y_train_city)
    y_pred_rf = rf_model.predict(X_test_city)

    # Simpan prediksi dan hitung metrik untuk setiap model
    for model_name, y_pred in zip(['SVR', 'Decision Tree', 'Random Forest'], [y_pred_svr, y_pred_dt, y_pred_rf]):
        mse = mean_squared_error(y_test_city, y_pred)
        mae = mean_absolute_error(y_test_city, y_pred)
        rmse = np.sqrt(mse)
        mape = np.mean(np.abs((y_test_city - y_pred) / y_test_city)) * 100

        # Tambahkan metrik ke hasil
        results['Kabupaten'].append(city)
        results['Model'].append(model_name)
        results['MSE'].append(mse)
        results['MAE'].append(mae)
        results['RMSE'].append(rmse)
        results['MAPE'].append(mape)

        # Tambahkan hasil aktual vs prediksi
        predicted_actual_results['Kabupaten'].extend([city] * len(y_test_city))
        predicted_actual_results['Model'].extend([model_name] * len(y_test_city))
        predicted_actual_results['Actual'].extend(y_test_city.values)
        predicted_actual_results['Predicted'].extend(y_pred)

# Mengonversi hasil ke DataFrame
results_df = pd.DataFrame(results)
predicted_actual_df = pd.DataFrame(predicted_actual_results)

In [29]:
print("\nPredicted vs Actual:")
predicted_actual_df


Predicted vs Actual:


Unnamed: 0,Kabupaten,Model,Actual,Predicted
0,KAB ASAHAN,SVR,11.0,7.642992
1,KAB ASAHAN,Decision Tree,11.0,15.300000
2,KAB ASAHAN,Random Forest,11.0,16.500000
3,KAB BATU BARA,SVR,17.7,-9.409582
4,KAB BATU BARA,Decision Tree,17.7,21.700000
...,...,...,...,...
94,KOTA TANJUNG BALAI,Decision Tree,5.7,26.900000
95,KOTA TANJUNG BALAI,Random Forest,5.7,26.633333
96,KOTA TEBING TINGGI,SVR,10.4,19.492893
97,KOTA TEBING TINGGI,Decision Tree,10.4,17.300000


In [30]:
# Menampilkan hasil
print("Evaluation Metrics:")
results_df

Evaluation Metrics:


Unnamed: 0,Kabupaten,Model,MSE,MAE,RMSE,MAPE
0,KAB ASAHAN,SVR,11.269502,3.357008,3.357008,30.518254
1,KAB ASAHAN,Decision Tree,18.490000,4.300000,4.300000,39.090909
2,KAB ASAHAN,Random Forest,30.250000,5.500000,5.500000,50.000000
3,KAB BATU BARA,SVR,734.929439,27.109582,27.109582,153.161480
4,KAB BATU BARA,Decision Tree,16.000000,4.000000,4.000000,22.598870
...,...,...,...,...,...,...
94,KOTA TANJUNG BALAI,Decision Tree,449.440000,21.200000,21.200000,371.929825
95,KOTA TANJUNG BALAI,Random Forest,438.204444,20.933333,20.933333,367.251462
96,KOTA TEBING TINGGI,SVR,82.680704,9.092893,9.092893,87.431664
97,KOTA TEBING TINGGI,Decision Tree,47.610000,6.900000,6.900000,66.346154


In [31]:
# Simpan hasil model ke dalam file terpisah berdasarkan model
def save_model_results(results_df):
    # Filter data berdasarkan model
    for model_name in results_df['Model'].unique():
        # Ambil data untuk model tertentu
        model_data = results_df[results_df['Model'] == model_name]
        
        # Tentukan nama file berdasarkan nama model
        file_name = f"{model_name.replace(' ', '_').lower()}_models.pkl"
        
        # Simpan data ke file dengan format pickle
        with open(file_name, 'wb') as file:
            pickle.dump(model_data, file)

        # Tambahkan tombol unduh di Streamlit
        st.download_button(
            label=f"Download {model_name} Results",
            data=open(file_name, 'rb').read(),
            file_name=file_name,
            mime="application/octet-stream"
        )

# Streamlit App
st.title("Download Model Results")

# Tampilkan hasil evaluasi (jika tersedia)
st.subheader("Evaluation Metrics")
if not results_df.empty:
    st.write(results_df)
    
    # Simpan dan tambahkan fitur unduh
    st.subheader("Download Results")
    save_model_results(results_df)
else:
    st.warning("Hasil evaluasi tidak ditemukan. Pastikan data tersedia.")




In [33]:
file_path = 'svr_models.pkl'

# Attempting to load and inspect the structure of the file
with open(file_path, 'rb') as file:
    svr_models = pickle.load(file)

# Check the structure of the loaded data
type(svr_models), svr_models.keys(), {key: type(value) for key, value in svr_models.items()}


(pandas.core.frame.DataFrame,
 Index(['Kabupaten', 'Model', 'MSE', 'MAE', 'RMSE', 'MAPE'], dtype='object'),
 {'Kabupaten': pandas.core.series.Series,
  'Model': pandas.core.series.Series,
  'MSE': pandas.core.series.Series,
  'MAE': pandas.core.series.Series,
  'RMSE': pandas.core.series.Series,
  'MAPE': pandas.core.series.Series})