In [1]:
from naiveBayesModel import NaiveBayesClass
import pandas as pd
from collections import Counter

In [2]:
data = pd.read_pickle("data/train/indonesian_pos_train.pkl")
df = pd.DataFrame(data)

X = df['words'].values.tolist()
Y = df['pos'].values.tolist()

model = NaiveBayesClass(
    X=X, 
    Y=Y,
    type="discreate"
)
model.save_parameters(
    filepath="naive_bayes_pos.json"
)

Saved model in model/naive_bayes_pos.json


In [3]:
model = NaiveBayesClass()
model.load_parameters('model/naive_bayes_pos.json')

In [4]:
data_test = pd.read_pickle("data/test/indonesian_pos_test.pkl")
df_test = pd.DataFrame(data_test)

X_test = df['words'].values.tolist()
Y_test = df['pos'].values.tolist()

In [5]:
states = list(set([s for seq in Y_test for s in seq]))
tp_all = Counter()
total_all = sum([len(seq) for seq in Y_test])
for s in states:
    tp = 0
    total = 0
    for i in range(len(X_test)):
        for j in range(len(X_test[i])):
            Y_pred = model.predict(
                X=[X_test[i][j]],
                type="discreate"
            )
            if Y_test[i][j] == s:
                if Y_pred == Y_test[i][j]:
                    tp += 1
                    tp_all[s] += 1
                total += 1
    print(f"\t{s:<10} | {tp/total:.2f}")
print(f"\n{'Accuracy total':>10} | {sum(tp_all.values())/total_all:.2f}")

	PRON       | 0.98
	SCONJ      | 0.32
	NUM        | 0.77
	PART       | 0.95
	CCONJ      | 0.99
	SYM        | 0.00
	DET        | 0.90
	VERB       | 0.96
	X          | 0.00
	ADP        | 0.98
	AUX        | 0.97
	_          | 0.29
	NOUN       | 0.96
	PROPN      | 0.97
	PUNCT      | 1.00
	ADJ        | 0.68
	ADV        | 0.73
	INTJ       | 0.00

Accuracy total | 0.92


In [120]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.naive_bayes import GaussianNB
import seaborn as sns
import matplotlib.pyplot as plt

le = LabelEncoder()

In [None]:
data = pd.read_csv("data/raw/Heart_Attack_Risk_Levels_Dataset.csv")
df = pd.DataFrame(data)
df = df.drop(columns=['Recommendation'])

df['Result'] = le.fit_transform(df['Result'])
df['Risk_Level'] = le.fit_transform(df['Risk_Level'])


columns = ['Age', 'Gender', 'Heart rate', 'Systolic blood pressure', 'Diastolic blood pressure', 
           'Blood sugar', 'CK-MB', 'Troponin']

for feature in columns:
    mean = df[feature].mean()
    std = df[feature].std()
    df[feature] = (df[feature] - mean) / std

X = df.drop(columns=['Result'])
y = df['Result']
    
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = GaussianNB()

model.fit(X_train, y_train)
y_pred = model.predict(X_test)


print(classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f'Naive Bayes - Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

In [127]:
data = pd.read_csv("data/raw/Heart_Attack_Risk_Levels_Dataset.csv")
df = pd.DataFrame(data)


columns = ['Age', 'Heart rate', 'Systolic blood pressure', 'Diastolic blood pressure', 
           'Blood sugar', 'CK-MB', 'Troponin']
split_index = int(len(df) * 0.8)
x_train, x_test = df[columns][:split_index], df[columns][split_index:]
y_train, y_test = df["Result"][:split_index], df["Result"][split_index:]

In [128]:
# Gabungkan X dan Y sebagai kolom
train_data = pd.concat([x_train, y_train], axis=1)
test_data = pd.concat([x_test, y_test], axis=1)

train_data.to_pickle("data/train/heart_attack_train.pkl")
test_data.to_pickle("data/test/heart_attack_test.pkl")

In [109]:
data = pd.read_pickle("data/train/heart_attack_train.pkl")
df = pd.DataFrame(data)

X = df[columns].values.tolist()
Y = df['Result'].values.tolist()

model = NaiveBayesClass(
    X=X, 
    Y=Y,
    type="continous"
)
model.save_parameters(
    filepath="heart_attack_classifier.json"
)

Saved model in model/heart_attack_classifier.json


In [110]:
from naiveBayesModel import NaiveBayesClass
import pandas as pd
from collections import Counter

In [111]:
model = NaiveBayesClass()
model.load_parameters('model/heart_attack_classifier.json')

In [112]:
data_test = pd.read_pickle("data/test/heart_attack_test.pkl")
df_test = pd.DataFrame(data_test)

X_test = df_test[columns].values.tolist()
Y_test = df_test['Result'].values.tolist()

In [113]:
states = list(set(Y_test))
tp_all = Counter()
total_all = len(Y_test)
for s in states:
    tp = 0
    total = 0
    for i in range(len(X_test)):
        Y_pred = model.predict(
            X=X_test[i],
            type="continous"
        )
        if Y_test[i] == s:
            if Y_pred == Y_test[i]:
                tp += 1
                tp_all[s] += 1
            total += 1
    print(f"\t{s:<10} | {tp/total:.2f}")

print(f"\n{'Accuracy total':>10} | {sum(tp_all.values())/total_all:.2f}")

	negative   | 0.99
	positive   | 0.50

Accuracy total | 0.69
