In [None]:
import numpy as np
import datasets.datasets as datasets

def get_patient_class(patient_data):
    return 1-1e-8 if patient_data[-1] == 4 else 1e-8

def get_patient_data(patient_data):
    return patient_data[:-1]

def perform_test(patient_data, parameters):
    sum = 0
    for i in range(len(parameters)-1):
        sum += patient_data[i] * parameters[i]
    return 1 / (1 + np.exp(parameters[-1])[0] * np.exp([sum])[0])

def calc_precision_recall(predicted, actual):
    tp = 0
    fp = 0
    fn = 0
    for i in range(len(predicted)):
        if actual[i] > 0.5:
            if predicted[i] > 0.5:
                tp += 1
            else:
                fn += 1
        elif predicted[i] > 0.5:
            fp += 1
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return precision, recall

def calc_f1(predicted, actual):
    precision, recall = calc_precision_recall(predicted, actual)
    return 2 * (precision * recall) / (precision + recall)

data = list(datasets.cancer_data_gen())
train_data, test_data = data[:550], data[550:]

<h3>Data Labels</h3>
1. Clump Thickness
2. Uniformity of Cell Size
3. Uniformity of Cell Shape
4. Marginal Adhesion
5. Single Epithelial Cell Size
6. Bare Nuclei
7. Bland Chromatin
8. Normal Nucleoli 
9. Mitoses

In [None]:
# Load in the data
x, y = np.zeros([550,10]), np.zeros([550, 1])
for i in range(550):
    x[i] = get_patient_data(train_data[i]) + [1]
    y[i] = get_patient_class(train_data[i])


In [None]:
# Perform kernel transformation
z = np.log((1 / y) - 1)
# Perform linear regression
parameters = np.linalg.inv(x.T.dot(x)).dot(x.T).dot(z)

In [None]:
test_truths = [get_patient_class(patient_data) for patient_data in test_data]
test_predictions = [perform_test(patient_data, parameters) for patient_data in test_data]
print(calc_f1(test_predictions, test_truths))
print(calc_precision_recall(test_predictions, test_truths))