In [6]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

df = pd.read_csv('./column_2C_weka.csv', sep=',')

# re-encode class column
df['class'].replace(['Abnormal','Normal'], [1,0], inplace=True)
df.head(10)

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis,class
0,63.027818,22.552586,39.609117,40.475232,98.672917,-0.2544,1
1,39.056951,10.060991,25.015378,28.99596,114.405425,4.564259,1
2,68.832021,22.218482,50.092194,46.613539,105.985135,-3.530317,1
3,69.297008,24.652878,44.311238,44.64413,101.868495,11.211523,1
4,49.712859,9.652075,28.317406,40.060784,108.168725,7.918501,1
5,40.2502,13.921907,25.12495,26.328293,130.327871,2.230652,1
6,53.432928,15.864336,37.165934,37.568592,120.567523,5.988551,1
7,45.366754,10.755611,29.038349,34.611142,117.270068,-10.675871,1
8,43.79019,13.533753,42.690814,30.256437,125.002893,13.289018,1
9,36.686353,5.010884,41.948751,31.675469,84.241415,0.664437,1


In [7]:
# split into explanatory and response variables 
X = df.iloc[:,:6]
Y = df.iloc[:,6]

In [18]:
# build and fit model
reg = LogisticRegression()
reg.fit(X,Y)

print("Coefficients: ",reg.coef_)
print("Intercept: ", reg.intercept_)

# compute predicted values from training set
Y_pred = reg.predict(X)
reg.predict_proba?

Coefficients:  [[ 0.02704094  0.08559402 -0.02888808 -0.05855307 -0.0178026   0.15320162]]
Intercept:  [1.96490613]




In [10]:
cm = confusion_matrix(Y, Y_pred)
print("Confusion matrix:\n",cm)
accuracy = (cm[0][0]+cm[1][1])/(cm[0][0]+cm[1][1]+cm[0][1]+cm[1][0])
print("Accuracy calculated from the training set = %.3f" % (accuracy))
print(classification_report(Y, Y_pred, target_names=['normal', 'abnormal']))

Confusion matrix:
 [[ 77  23]
 [ 29 181]]
Accuracy calculated from the training set = 0.832
              precision    recall  f1-score   support

      normal       0.73      0.77      0.75       100
    abnormal       0.89      0.86      0.87       210

    accuracy                           0.83       310
   macro avg       0.81      0.82      0.81       310
weighted avg       0.84      0.83      0.83       310



In [21]:
# cross-validate
# number of folds
k = 10
scores = cross_val_score(estimator=reg,
                        X=X,
                        y=Y,
                        scoring="accuracy",
                        cv=k)
print("Accuracy calculated using %d-fold cross validation = %.3f" % (k, scores.mean()))

len(df.columns)

Accuracy calculated using 10-fold cross validation = 0.797




7

In [67]:
# Diagnostic Tool

ds = []

for i in range(0, len(df.columns)-1):
    str1 = input(f"Enter {df.columns[i]}:")
    ds.append(str1)

dfn = pd.DataFrame([ds], columns=df.columns[:6])
Y_pred_single = reg.predict(dfn)

if Y_pred_single == 0:
    str2 = 'normal'
else:
    str2 = 'abnormal'
print(f"Sample was predicted to be {str2}")

# 63.027818 22.552586 39.609117 40.475232 98.672917 -0.254400

Enter pelvic_incidence:39
Enter pelvic_tilt:10
Enter lumbar_lordosis_angle:25
Enter sacral_slope:29
Enter pelvic_radius:114
Enter degree_spondylolisthesis:4.5
Sample was predicted to be abnormal


In [65]:
dfn

Unnamed: 0,pelvic_incidence,pelvic_tilt,lumbar_lordosis_angle,sacral_slope,pelvic_radius,degree_spondylolisthesis
0,63.027818,22.552586,39.609117,40.475232,98.672917,-0.2544
