#### Here I am implementing Naive Bayes algorithm from scratch. There are many forms of Naive Bayes algorithm like: bernoulli NB, gaussian NB,multinomial NB. I am implementing Bernoulli NB as I am using the famouus weather dataset which has binary features

In [6]:
# imports 
import numpy as np
import pandas as pd

In [7]:
# Defining the accuracy_score function as its implemented in sklearn
def accuracy_score(y_true, y_pred):
	return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)

In [8]:
def split(df):
	# separate dataset into features and target
	X = df.drop([df.columns[-1]], axis = 1)
	y = df[df.columns[-1]]

	return X, y

In [9]:
class  Bernoulli_NB_Classifier:

	def __init__(self):
		# initializing the various parameters to be used
		self.features = list
		self.likelihoods = {}
		self.class_priors = {}
		self.pred_priors = {}

		self.X = np.array
		self.y = np.array
		self.n = int
		self.m = int

	def calculate_prior_prob(self):

		for outcome in np.unique(self.y):
			outcome_count = sum(self.y == outcome)
			self.class_priors[outcome] = outcome_count / self.n

	def calculate_likelihoods(self):
		for feature in self.features:
			for outcome in np.unique(self.y):
				outcome_count = sum(self.y == outcome)
				feat_likelihood = self.X[feature][self.y[self.y == outcome].index.values.tolist()].value_counts().to_dict()
				for feat_val, count in feat_likelihood.items():
					self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count

	def calc_predictor(self):
		for feature in self.features:
			feat_vals = self.X[feature].value_counts().to_dict()
			for feat_val, count in feat_vals.items():
				self.pred_priors[feature][feat_val] = count/self.n

	def fit(self, X, y):
		# fit method to fit the NB model to dataset
		self.features = list(X.columns)
		self.X = X
		self.y = y
		self.n = X.shape[0]
		self.m = X.shape[1]

		for feature in self.features:
			self.likelihoods[feature] = {}
			self.pred_priors[feature] = {}

			for feat_val in np.unique(self.X[feature]):
				self.pred_priors[feature].update({feat_val: 0})

				for outcome in np.unique(self.y):
					self.likelihoods[feature].update({feat_val+'_'+outcome:0})
					self.class_priors.update({outcome: 0})

		self.calculate_prior_prob()
		self.calculate_likelihoods()
		self.calc_predictor()

	

	def predict(self, X):
		results = []
		X = np.array(X)
		for query in X:
			probs_outcome = {}
			for outcome in np.unique(self.y):
				prior = self.class_priors[outcome]
				likelihood = 1
				evidence = 1
				for feat, feat_val in zip(self.features, query):
					likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]
					evidence *= self.pred_priors[feat][feat_val]

				posterior = (likelihood * prior) / (evidence)

				probs_outcome[outcome] = posterior

			result = max(probs_outcome, key = lambda x: probs_outcome[x])
			results.append(result)

		return np.array(results)

In [10]:
df=pd.read_csv('weather.csv')

In [11]:
if __name__ == "__main__":
	#Split fearures and target
	X,y  = split(df)

	nb = Bernoulli_NB_Classifier()
	nb.fit(X, y)

	print("Training Accuracy: {}".format(accuracy_score(y, nb.predict(X))))


Training Accuracy: 92.86
