# Logistic Regression on Shape Factor and Migrating Clusters
Apply logistic regression with input variables: shape factor of cell sphere, and number of migrating clusters

In [31]:
import numpy as np
import csv
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.model_selection import LeaveOneOut
from sklearn.linear_model import LogisticRegression

### Parse 
Parse the csv file for inputs matrix (X) and outputs vector (Y)

In [32]:
data = []
with open('data.csv', 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        data.append(row[1:])
        # TODO: get your X and Y
data = np.array(data)
X = data[:, 0:-1]
y = data[:, -1]
X = X.astype(float)
y = y.astype(float)

num_migrating_clusters = X[:, 0]
shape_factor = X[:, 1]

In [33]:
print("Inputs matrix: ")
print(X)
print("\n\n")

Inputs matrix: 
[[  1.00000000e+01   1.82125577e-01   1.36800146e+03   1.66630000e+03]
 [  4.40000000e+01   1.86546646e-01   1.70848237e+03   1.76821591e+03]
 [  1.20000000e+01   1.18197605e-01   1.40882540e+03   2.68537500e+03]
 [  6.00000000e+00   2.06828505e-01   1.72587659e+03   1.85909167e+04]
 [  7.00000000e+00   2.42266517e-01   1.17678758e+03   7.33071429e+02]
 [  1.90000000e+01   2.39511659e-01   1.37570818e+03   2.52947368e+02]
 [  1.40000000e+01   3.16555997e-01   1.53193864e+03   1.20957143e+03]
 [  1.70000000e+01   3.99008516e-01   1.59545918e+03   6.06202941e+03]
 [  1.20000000e+01   3.57063577e-01   1.37080451e+03   9.60416667e+01]
 [  2.90000000e+01   4.01967572e-02   1.47527116e+03   1.86374138e+03]
 [  3.30000000e+01   1.61292914e-01   1.84120395e+03   1.77359545e+04]
 [  2.30000000e+01   3.85966792e-01   1.51463329e+03   1.74304348e+02]
 [  1.50000000e+01   3.90886249e-01   1.60580976e+03   8.06233333e+02]
 [  6.00000000e+00   4.03992853e-01   1.28849563e+03   2.7304

### Fit model
Use scikit-learn to fit to a logistic regression model. Use LOOCV to evaluate prediction error

In [34]:
y_pred = []
correct_count = 0

for train_index, test_index in LeaveOneOut().split(X):
    X_train = X[train_index]
    y_train = y[train_index]
    X_test = X[test_index]
    y_test = y[test_index]
    
    # Fit based on the training data
    logreg = LogisticRegression(random_state=0, solver='lbfgs')
    logreg.fit(X_train, y_train)
    
    # Test against the held out data
    pred = logreg.predict(X_test)
    y_pred.append(pred)
    
    # Report results
    if (pred == y_test):
        correct_count += 1
    
    print("Predicted class: {}, True class: {}".format(pred, y_test))
    print()

# Print final report
accuracy = correct_count / len(y)
print("RESULTS:")
print("Correct predictions: {}".format(correct_count))
print("Incorrect predictions: {}".format(len(y) - correct_count))
print("Overall Accuracy: {}".format(accuracy))



Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 1.], True class: [ 2.]

Predicted class: [ 2.], True class: [ 1.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 2.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 1.], True class: [ 1.]

Predicted class: [ 1.], True class: [ 2.]

Predicted class: [ 1.], True class: [ 2.]

Predicted c

