# 1vsAll Logstic Regression 
In this notebook, we'll train 3 binary logistic regression models to classify the iris dataset using the one vs all method. 

In [1]:
import csv
import numpy as np
import math 
import copy
import random

## Read Data from CSV file 
Place the relative file path to the csv file in the path variable

In [2]:
path = "../../data/Chp2/iris_full_set-Copy1.csv"

def read_data(path): 
    with open(path, 'r') as f:
        data = csv.reader(f, delimiter=',')        
        training_inputs = [] 
        for i, line in enumerate(data):
            if i == 0: #first line is title
                continue 
            else:
                feature_vector = [1] #account for bias term 
                feature_vector.extend(line[:-1])
                feature_vector = np.array(feature_vector).astype('float64')
                label = line[-1] 
                training_inputs.append((feature_vector, label))
        return training_inputs
    
data = read_data(path)


## Model
Compute_full_model stores all the indvidual binary logistic regression weights. 
Train_model trains a simple binary logistic regression  

In [3]:
def compute_full_model(data, epoch): #finds all weights using 1 vs all, this will train n models with n being the number of different classes
    labels = []
    for line in data:
        if line[-1] not in labels:
            labels.append(line[-1])
    models = [] 
    for label in labels:
        models.append(train_model(epoch, data, label))
    return (models, labels)

def train_model(epoch, data, postive_label): #1 vs all 
    weights = np.zeros(len(data[0][0])) #data is [(feature_vector, label),...]
    learner_rate = 0.1 #learning rate for stochastic gradient decent 
    decay = 0.99
    data = copy.copy(data) #prevent aliasing issues
    for _ in range(epoch):
        learner_rate = learner_rate*decay
        random.shuffle(data)
        for feature_vector, label in data:  #implement shuffle 
            gradient_vector = compute_graident(weights, feature_vector, label, postive_label)*learner_rate*feature_vector #formula
            weights = weights + gradient_vector
    return weights 

def compute_graident(weights, feature_vector, label, postive_label):
    if label == postive_label: y = 1
    else: y = 0
    assert(len(feature_vector)   == len(weights))
    dot_product = np.dot(weights, feature_vector)
    if dot_product > 600: #so exp function doesn't overflow 
        return y - 1
    elif dot_product < -600:
        return y 
    exp_val = math.exp(dot_product)
    return y - ((exp_val)/(1+exp_val))

Makes a prediction for one data set 

In [4]:

def predict(models, labels, feature_vector, show = False):
    probability_list = []
    assert(len(models) == len(labels))
    for model, label in zip(models, labels):
        dot_product = np.dot(model, feature_vector)
        if dot_product > 400: #so exp function doesn't overflow 
            probability = 0
        elif dot_product < -400:
            probability = 1
        else:
            exp_val = math.exp(dot_product)
            probability = exp_val/(1+exp_val)
        probability_list.append((probability, label))
    if show:
        return probability_list, max(probability_list, key = lambda probs: probs[0])[1]
    return max(probability_list, key = lambda probs: probs[0])[1]

## Train and Calculate Error
Trains the model and calculates training and test error 

In [5]:
def error_rate(training_set, models, labels):
    error = 0
    for feature_vector, label in training_set:
        if predict(models, labels, feature_vector) != label:
            error += 1
    return error/len(training_set)


for _ in range(20):
    np.random.shuffle(data)

index_80 = int(0.8*len(data)) #index of 80% 

training_set = data[:index_80]
test_set = data[index_80:]
models, labels = compute_full_model(training_set, 200)

models, labels = compute_full_model(data, 200)

print("train error: %0.4f" %(error_rate(training_set, models,labels)))
print("test error: %0.4f" %error_rate(test_set, models,labels))

train error: 0.0250
test error: 0.0333


## Predict an datapoint

In [6]:
 #to predict one lable, replace the lst with the features 
prob_list, prediction = predict(models, labels, [10,2,5,4,4], True)
print("The probability for each label is: ", prob_list)
print("Predicted output is: ", prediction)


The probability for each label is:  [(0.004119959182037474, 'Iris-setosa'), (1.0, 'Iris-versicolor'), (1.3924810513730622e-12, 'Iris-virginica')]
Predicted output is:  Iris-versicolor
