In [27]:
import pandas as pd
import numpy as np
from colorama import Fore, Style
from scipy.stats import norm

# Atur opsi tampilan pandas
pd.set_option('display.float_format', lambda x: '%.2f' % x)

# Baca data dari file CSV
df = pd.read_csv('../dataset/Weather.csv')

# Hapus kolom 'date'
df.drop('date', axis=1, inplace=True, errors='ignore')

# Hitung rata-rata dan standar deviasi untuk setiap kelas
stats = df.groupby('weather').agg(['mean', 'std'])

In [28]:
df.head()

Unnamed: 0,precipitation,temp_max,temp_min,wind,weather
0,0.0,12.8,5.0,4.7,drizzle
1,10.9,10.6,2.8,4.5,rain
2,0.8,11.7,7.2,2.3,rain
3,20.3,12.2,5.6,4.7,rain
4,1.3,8.9,2.8,6.1,rain


In [29]:
# Menghitung nilai rata-rata dan standar deviasi untuk setiap fitur berdasarkan kelas cuaca
for weather in df['weather'].unique():
    print(Fore.GREEN + f'Weather: {weather}' + Style.RESET_ALL)
    for feature in ['precipitation', 'temp_max', 'temp_min', 'wind']:
        mean = stats.loc[weather, (feature, 'mean')]
        std = stats.loc[weather, (feature, 'std')]
        print(Fore.RED + f'Feature: {feature}' + Style.RESET_ALL)
        print(f'Mean: {mean:.2f}')
        print(f'Standard Deviation: {std:.2f}\n')

[32mWeather: drizzle[0m
[31mFeature: precipitation[0m
Mean: 0.00
Standard Deviation: 0.00

[31mFeature: temp_max[0m
Mean: 15.93
Standard Deviation: 8.81

[31mFeature: temp_min[0m
Mean: 7.11
Standard Deviation: 6.18

[31mFeature: wind[0m
Mean: 2.37
Standard Deviation: 0.91

[32mWeather: rain[0m
[31mFeature: precipitation[0m
Mean: 6.56
Standard Deviation: 8.65

[31mFeature: temp_max[0m
Mean: 13.45
Standard Deviation: 4.97

[31mFeature: temp_min[0m
Mean: 7.59
Standard Deviation: 3.95

[31mFeature: wind[0m
Mean: 3.67
Standard Deviation: 1.59

[32mWeather: sun[0m
[31mFeature: precipitation[0m
Mean: 0.00
Standard Deviation: 0.00

[31mFeature: temp_max[0m
Mean: 19.86
Standard Deviation: 7.70

[31mFeature: temp_min[0m
Mean: 9.34
Standard Deviation: 5.51

[31mFeature: wind[0m
Mean: 2.96
Standard Deviation: 1.16

[32mWeather: snow[0m
[31mFeature: precipitation[0m
Mean: 8.55
Standard Deviation: 7.02

[31mFeature: temp_max[0m
Mean: 5.57
Standard Deviation: 3.11


In [36]:
# Hitung prior
prior = df['weather'].value_counts() / len(df)

print(prior)

# Misalkan new_instance adalah instance baru untuk prediksi
new_instance = {'precipitation': 0.0,
                'temp_max': 12.8, 'temp_min': 5.0, 'wind': 4.7}

weather
rain      0.44
sun       0.44
fog       0.07
drizzle   0.04
snow      0.02
Name: count, dtype: float64


In [31]:
# Hitung likelihood dan posterior untuk setiap kelas
best_posterior = -1
best_weather = None
for weather in df['weather'].unique():
    print(Fore.GREEN + f'Weather: {weather}' + Style.RESET_ALL)
    likelihood = 1
    for feature in ['precipitation', 'temp_max', 'temp_min', 'wind']:
        mean = stats.loc[weather, (feature, 'mean')]
        std = stats.loc[weather, (feature, 'std')]
        # Jika standar deviasi adalah nol
        if std == 0:
            # Jika nilai fitur sama dengan mean, likelihood tetap 1
            # Jika tidak, likelihood menjadi 0 dan loop bisa dihentikan
            if new_instance[feature] != mean:
                likelihood = 0
                break
        else:
            # Tambahkan smoothing untuk mencegah likelihood menjadi 0
            std += 1e-6
            # Hitung likelihood fitur yang diberikan kelas
            likelihood *= norm.pdf(new_instance[feature], mean, std)
    # Hitung probabilitas posterior kelas
    posterior = prior[weather] * likelihood
    print(f'Original Posterior Probability: {format(posterior, ".10f")}')
    # Transformasi logaritmik dan invers
    transformed_posterior = (-np.log(posterior))**-1
    print(
    f'Transformed Posterior Probability: {format(transformed_posterior, ".10f")}\n')

[32mWeather: drizzle[0m
Original Posterior Probability: 0.0000015562
Transformed Posterior Probability: 0.0747761605

[32mWeather: rain[0m
Original Posterior Probability: 0.0000200346
Transformed Posterior Probability: 0.0924380851

[32mWeather: sun[0m
Original Posterior Probability: 0.0000882943
Transformed Posterior Probability: 0.1071256256

[32mWeather: snow[0m
Original Posterior Probability: 0.0000000191
Transformed Posterior Probability: 0.0562583684

[32mWeather: fog[0m
Original Posterior Probability: 0.0000136796
Transformed Posterior Probability: 0.0892888736



In [32]:
   # Tentukan kelas dengan probabilitas posterior terbaik
if transformed_posterior > best_posterior:
        best_posterior = transformed_posterior
        best_weather = weather

print(
    f"Best posterior: {best_weather} with transformed probability {format(best_posterior, '.6f')}")

Best posterior: fog with transformed probability 0.089289
