In [1]:
import numpy as np
import pandas as pd
import math
import matplotlib as plt

In [2]:
data_path = "data/"
gen_data_path = "generated/"
m0_file = "DS1_m_0.txt"
m1_file = "DS1_m_1.txt"
cov_file = "DS1_Cov.txt"
num_features = 20
train_val_test_sizes = [0.6, 0.2, 0.2]

In [3]:
def get_matrix(feat, file_path, squeeze = True):
    if squeeze:
        return np.squeeze(pd.read_csv(file_path, header = None).drop(columns = feat).values, axis = 0)
    else:
        return pd.read_csv(file_path, header = None).drop(columns = feat).values

In [4]:
def get_x_y_values(dataset):
    return dataset.drop('class', axis = 1).values, dataset['class'].values

In [5]:
df = pd.read_csv(gen_data_path + 'DS1', index_col = 'Unnamed: 0')

In [6]:
test_x, test_y = get_x_y_values(df)

In [7]:
m0 = get_matrix(num_features, data_path + m0_file)
m1 = get_matrix(num_features, data_path + m1_file)
cov = get_matrix(num_features, data_path + cov_file, False)

In [8]:
def sigmoid(a):
    return 1 / (1 + math.exp(-a))

def set_coeff():
    inv_cov = np.linalg.inv(cov)
    w = np.matmul(inv_cov, (m0-m1))
    
    w0_0 = -0.5 * np.matmul(m0.T, np.matmul(inv_cov, m0))
    w0_1 = 0.5 * np.matmul(m1.T, np.matmul(inv_cov, m1))
    
    # adding b/c 50/50 percent chances
    w0 = w0_0 + w0_1 + 1
    return w, w0

def eval_x(x):
    a = np.matmul(w_glob, x) + w0_glob
    return round(1 - sigmoid(a))

In [9]:
w_glob, w0_glob = set_coeff()

In [10]:
def test_model(data_x, data_y):
    TN, TP, FN, FP = 0, 0, 0, 0
    for x, y in zip(data_x, data_y):
        y_pred = eval_x(x)
        if y_pred == y:
            if y_pred:
                TP += 1
            else: 
                TN += 1
        else: 
            if y_pred:
                FP += 1
            else: 
                FN += 1
    
    accuracy = (TN + TP)/len(data_y)
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    F_measure = (2 * recall * precision) / (recall + precision)
    
    return accuracy, precision, recall, F_measure

In [11]:
accuracy, precision, recall, F_measure = test_model(test_x, test_y)

In [12]:
print("The accuracy is: %s." % (accuracy))
print("The precision is: %s." % (precision))
print("The recall is: %s." % (recall))
print("The F-measure is: %s." % (F_measure))

The accuracy is: 0.95.
The precision is: 0.9742268041237113.
The recall is: 0.9264705882352942.
The F-measure is: 0.949748743718593.


In [13]:
print("The learnt w0 coefficiants is: %s." % w0_glob)
print("The learnt w coefficiants are: %s." % w_glob)

The learnt w0 coefficiants is: 27.9753091904.
The learnt w coefficiants are: [ 14.25856816  -8.4855298   -5.53042923  -3.08598499  -9.58250653
  -4.25451151  16.67788119 -23.81922914 -28.75865812   9.08190859
 -13.0029778  -12.11999481  15.34983455  12.76433091  -5.60967679
  12.90527982  28.99599201  -6.64595429  -0.50925941  -4.97775505].
