In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('cleaned_data.csv')
df = df[['Support', 'Resistance', 'Hammer', 'Williams%R', 'ZeroCross', 'SignalCross', 'Decision']]
df = df[33:-2]  # We exclud the first 33 days and the last two days
df

Unnamed: 0,Support,Resistance,Hammer,Williams%R,ZeroCross,SignalCross,Decision
33,0,0,0.0,0,1.0,1.0,1
34,1,0,0.0,0,1.0,1.0,1
35,0,0,0.0,1,1.0,1.0,0
36,0,0,0.0,1,1.0,1.0,0
37,0,0,0.0,1,1.0,1.0,0
...,...,...,...,...,...,...,...
1035,0,0,0.0,0,0.0,1.0,1
1036,0,1,0.0,1,0.0,1.0,1
1037,0,0,0.0,0,0.0,1.0,1
1038,0,0,0.0,1,0.0,1.0,1


In [3]:
def cal_prior(df, Y):

    classes = sorted(list(df[Y].unique()))
    prior = []

    for i in classes:
        prior.append(len(df[df[Y] == i]) / len(df[Y]))
    
    return prior

In [4]:
def cal_likelihood(df, feature_name, feature_value, Y, label):

    df = df[df[Y] == label]
    mean = df[feature_name].mean()
    std = df[feature_name].std()

    p_x_given_y = len(df[df[feature_name] == feature_value]) / len(df)

    return p_x_given_y

In [5]:
def naive_bayes(df, X, Y):

    features = list(df.columns)[:-1]
    prior = cal_prior(df, Y)

    Y_pred = []

    for x in X:
        labels = sorted(list(df[Y].unique()))
        likelihood = [1]*len(labels)
        for i in range(len(labels)):
            for j in range(len(features)):
                likelihood[i] *= cal_likelihood(df, features[j], x[j], Y, labels[i])
        
        # Calculate posterior probability (we ignore denominator)
        post_prob = [1]*len(labels)
        for i in range(len(labels)):
            post_prob[i] = likelihood[i] * prior[i]
        
        Y_pred.append(np.argmax(post_prob))

    return np.array(Y_pred)

In [6]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.25, random_state=40, shuffle = False)

X_test = test.iloc[:, :-1].values
Y_test = test.iloc[:, -1].values
Y_pred = naive_bayes(train, X=X_test, Y='Decision')

from sklearn.metrics import confusion_matrix, f1_score
from sklearn.metrics import balanced_accuracy_score
print(confusion_matrix(Y_test, Y_pred))
print(f1_score(Y_test, Y_pred, average='micro'))
print(balanced_accuracy_score(Y_test, Y_pred))

[[ 18 103]
 [  5 126]]
0.5714285714285714
0.5552961958236073


## Comparing results with pre-built models
### taken on https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.CategoricalNB.html#sklearn.naive_bayes.CategoricalNB

In [7]:
from sklearn.naive_bayes import CategoricalNB

In [8]:
classifier = CategoricalNB()
classifier.fit(train.iloc[:,:-1].values, train.iloc[:,-1].values)

CategoricalNB()

In [9]:
Y_pred2 = classifier.predict(test.iloc[:, :-1].values)

In [10]:
print(confusion_matrix(Y_test, Y_pred2))
print(f1_score(Y_test, Y_pred2, average='micro'))

[[ 18 103]
 [  5 126]]
0.5714285714285714


In [11]:
classifier.score(test.iloc[:, :-1].values,Y_test)

0.5714285714285714

In [12]:
from sklearn.naive_bayes import BernoulliNB

In [13]:
clf = BernoulliNB(binarize =1);
clf.fit(train.iloc[:,:-1].values, train.iloc[:,-1].values)
clf.predict(test.iloc[:, :-1].values)
clf.score(test.iloc[:, :-1].values,Y_test)

0.5198412698412699

In [14]:
from sklearn.naive_bayes import MultinomialNB

In [15]:
mnb = MultinomialNB(class_prior = [1.025,1])
mnb.fit(train.iloc[:,:-1].values, train.iloc[:,-1].values)
mnb.predict(test.iloc[:, :-1].values)
print(mnb.score(test.iloc[:, :-1].values,Y_test))
print(balanced_accuracy_score(Y_test, mnb.predict(test.iloc[:, :-1].values)))

0.6071428571428571
0.6095199041069964


In [16]:
pred = mnb.predict(test.iloc[:, :-1].values)

In [17]:
print(confusion_matrix(Y_test,pred))

[[81 40]
 [59 72]]


In [18]:
mnb.classes_

array([0, 1], dtype=int64)

In [19]:
print(train["Decision"].value_counts())
print(test["Decision"].value_counts())

1    420
0    335
Name: Decision, dtype: int64
1    131
0    121
Name: Decision, dtype: int64


In [20]:
pred

array([1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 1], dtype=int64)

In [21]:
test

Unnamed: 0,Support,Resistance,Hammer,Williams%R,ZeroCross,SignalCross,Decision
788,1,0,0.0,0,1.0,0.0,1
789,0,0,0.0,0,1.0,0.0,1
790,0,0,0.0,1,1.0,1.0,0
791,0,0,0.0,1,1.0,1.0,0
792,0,0,0.0,0,1.0,1.0,0
...,...,...,...,...,...,...,...
1035,0,0,0.0,0,0.0,1.0,1
1036,0,1,0.0,1,0.0,1.0,1
1037,0,0,0.0,0,0.0,1.0,1
1038,0,0,0.0,1,0.0,1.0,1
