In [4]:
import numpy as np
import pandas as pd
import math as m
import matplotlib.pyplot as plt

class NaiveBayesClassifier:
    def __init__(self, filename):
        self.df = self._load_data(filename)
        self.fac = list(self.df.columns)

    def _load_data(self, filename):
        # Load data from the given CSV file
        return pd.read_csv(filename)

    def _normdist(self, x, m1, s1):
        # Calculate the probability density of a Gaussian distribution
        p = 1 / ((2 * m.pi) ** 0.5 * s1)
        prob = p * m.e ** (-0.5 * ((x - m1) / s1) ** 2)
        return prob

    def _naive_bayes(self, df, target_column, input_features):
        # Extract necessary data
        a = df[target_column].to_list()
        dec = ':'
        factor = []

        if dec in input_features:
            x = input_features.split(':')
            for i in range(int(x[0]), int(x[1]) + 1):
                factor.append(self.fac[i])
        else:
            x = input_features.split(',')
            for i in x:
                factor.append(self.fac[int(i)])
        factor.append(target_column)
        df = df[factor]
        df = df.sort_index(axis=1)
        cols = list(df.columns)

        mean = df.pivot_table(index=target_column, values=cols, aggfunc=np.mean)
        std = df.pivot_table(index=target_column, values=cols, aggfunc=np.std)
        std = std.to_numpy()
        mean = mean.to_numpy()
        p = m.pi
        e = m.e

        def normdist(x, m1, s1):
            k = 1 / ((pow(2 * p, 0.5)) * s1)
            prob = k * pow(e, -0.5 * pow((x - m1) / (s1), 2))
            return prob

        f = dict()
        for i in range(len(a)):
            if a[i] in f:
                f[a[i]] += 1
            else:
                f[a[i]] = 1
        keys = list(f.keys())
        val = list(f.values())
        val = np.array(val)
        val = val / len(df)
        f = dict(zip(keys, val))
        p1 = list(f.values())
        f = dict(sorted(f.items()))
        keys = list(f.keys())
        df.drop(target_column, axis=1, inplace=True)
        cols = list(df.columns)

        for w in keys:
            pro = []
            for i in range(len(df)):
                l = df.iloc[i].to_list()
                temp = 1
                for j in range(len(cols)):
                    temp *= normdist(l[j], mean[keys.index(w), j], std[keys.index(w), j])
                pro.append(temp)
            pro = np.array(pro)
            pro = list(pro * f[w])
            df[w] = pro

        m1 = []
        t = []
        for i in range(len(df)):
            l = df.iloc[i].to_list()[len(cols):len(df.columns)]
            test = max(l)
            t.append(test)
            index = l.index(test)
            m1.append(keys[index])
        count = 0
        for i in range(len(df)):
            if m1[i] == a[i]:
                count += 1
        return count / len(df)

    def train_and_test(self, target_column, input_features):
        # Split the dataset into training and testing sets
        train_df = self.df.sample(frac=0.80)
        test_df = self.df.drop(train_df.index)
        
        # Train the Naive Bayes classifier
        train_accuracy = self._naive_bayes(train_df, target_column, input_features)
        test_accuracy = self._naive_bayes(test_df, target_column, input_features)
        
        return train_accuracy, test_accuracy

if __name__ == "__main__":
    filename = input("Enter your file name: ")
    print("The features are: ")
    print(list(pd.read_csv(filename).columns))
    target_column = input("Enter your target name: ")
    input_features = input('Input your variable features (e.g., "0:3" or "1,2,3"): ')

    classifier = NaiveBayesClassifier(filename)
    train_accuracy, test_accuracy = classifier.train_and_test(target_column, input_features)

    print("Naive Bayes accuracy for training dataset:", train_accuracy * 100, "%")
    print("Naive Bayes accuracy for testing dataset:", test_accuracy * 100, "%")


Enter your file name: iris.csv
The features are: 
['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']
Enter your target name: Species
Input your variable features (e.g., "0:3" or "1,2,3"): 0:3
Naive Bayes accuracy for training dataset: 98.33333333333333 %
Naive Bayes accuracy for testing dataset: 93.33333333333333 %
