In [1]:
import numpy as np
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split

class NaiveBayes:
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
        self.classes = np.unique(y_train)
        self.mean = {}
        self.var = {}
        self.prior = {}
        for c in self.classes:
            X_c = X_train[y_train == c]
            self.mean[str(c)] = np.mean(X_c, axis=0)
            self.var[str(c)] = np.var(X_c, axis=0)
            self.prior[str(c)] = X_c.shape[0] / X_train.shape[0]

    def predict(self, X_test):
        y_pred = []
        for x in X_test:
            posteriors = []
            for c in self.classes:
                prior = np.log(self.prior[str(c)])
                likelihood = np.sum(np.log(self.pdf(x, self.mean[str(c)], self.var[str(c)])))
                posterior = prior + likelihood
                posteriors.append(posterior)
            y_pred.append(self.classes[np.argmax(posteriors)])
        return y_pred
    # 概率密度方程 (probability density function, PDF)
    def pdf(self, x, mean, var):
        exponent = np.exp(-((x - mean) ** 2 / (2 * var + 1e-9)))
        return (1 / np.sqrt(2 * np.pi * var + 1e-9)) * exponent
    
    def score(self, X_test, y_test):
        y_pred = self.predict(X_test)
        accuracy = np.sum(y_pred == y_test) / len(y_test)
        return accuracy

# read data
data = pd.read_csv('wine_data.csv',header=None, delimiter=",")
# 第一行為label 顯示 酒的總類
label=data[0]
# 其13行分別為13總酒成分特徵
feature=data.iloc[:, 1:]

# Splitting the dataset into training and testing subsets
# 切分訓練集和測試集各半
X_train, X_test, y_train, y_test = train_test_split(feature, label, test_size=0.5, random_state=42)
X_train=np.array(X_train)
X_test=np.array(X_test)
y_train=np.array(y_train)
y_test=np.array(y_test)

# Train the classifier
nb = NaiveBayes()
nb.fit(X_train, y_train)

# Predict the classes of the test data
y_pred = nb.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = nb.score(X_test, y_test)

# Print the predicted classes and accuracy
print("Predicted classes:", y_pred)
print('Training Set Score:',nb.score(X_train,y_train))
print('Test Set Score:',nb.score(X_test,y_test))

Predicted classes: [1, 1, 3, 1, 2, 1, 2, 3, 2, 3, 1, 3, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 3, 3, 3, 2, 2, 2, 1, 1, 2, 3, 1, 1, 1, 3, 3, 2, 3, 1, 2, 2, 2, 3, 1, 2, 2, 3, 1, 2, 1, 1, 3, 3, 2, 2, 1, 2, 1, 3, 2, 2, 3, 1, 1, 1, 3, 1, 1, 2, 3, 2, 1, 3, 2, 1, 3, 2, 2, 1, 2, 1, 1, 2, 2, 1, 3, 2]
Training Set Score: 0.9775280898876404
Test Set Score: 0.9887640449438202


In [2]:
from sklearn.naive_bayes import GaussianNB

modelg=GaussianNB()
modelg.fit(X_train,y_train)
modelg.predict(X_test)
modelg.predict_proba(X_test)
print('Training Set Score:',modelg.score(X_train,y_train))
print('Test Set Score:',modelg.score(X_test,y_test))

Training Set Score: 0.9775280898876404
Test Set Score: 0.9887640449438202
