In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [2]:
data = pd.read_csv('kyphosis.csv')
data

Unnamed: 0,Kyphosis,Age,Number,Start
0,absent,71,3,5
1,absent,158,3,14
2,present,128,4,5
3,absent,2,5,1
4,absent,1,4,15
...,...,...,...,...
76,present,157,3,13
77,absent,26,7,13
78,absent,120,2,13
79,present,42,7,6


In [3]:
data.describe()

Unnamed: 0,Age,Number,Start
count,81.0,81.0,81.0
mean,83.654321,4.049383,11.493827
std,58.104251,1.619423,4.883962
min,1.0,2.0,1.0
25%,26.0,3.0,9.0
50%,87.0,4.0,13.0
75%,130.0,5.0,16.0
max,206.0,10.0,18.0


In [4]:
data.corr(numeric_only=True)

Unnamed: 0,Age,Number,Start
Age,1.0,-0.016687,0.057828
Number,-0.016687,1.0,-0.425099
Start,0.057828,-0.425099,1.0


In [5]:
data['Kyphosis'] = data ['Kyphosis'].map({'absent':0, 'present':1})
data

Unnamed: 0,Kyphosis,Age,Number,Start
0,0,71,3,5
1,0,158,3,14
2,1,128,4,5
3,0,2,5,1
4,0,1,4,15
...,...,...,...,...
76,1,157,3,13
77,0,26,7,13
78,0,120,2,13
79,1,42,7,6


In [6]:
X = data[['Age', 'Number', 'Start']]
X.head(3)

Unnamed: 0,Age,Number,Start
0,71,3,5
1,158,3,14
2,128,4,5


In [7]:
y = data['Kyphosis']
y.head(3)

0    0
1    0
2    1
Name: Kyphosis, dtype: int64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [9]:
scaler = StandardScaler()

In [10]:
X_train_scaled = scaler.fit_transform (X_train)
y_train_scaled = scaler.fit_transform (X_test)

In [11]:
model = LogisticRegression()
model

In [12]:
model.fit(X_train, y_train)
model

In [13]:
predictions = model.predict(X_test)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0], dtype=int64)

In [14]:
print ("Accuracy:", accuracy_score (y_test, predictions))

Accuracy: 0.8


In [15]:
print ("Confusion Matrix:", confusion_matrix (y_test, predictions))

Confusion Matrix: [[18  1]
 [ 4  2]]


In [16]:
print ("Classification Report:", classification_report (y_test, predictions))

Classification Report:               precision    recall  f1-score   support

           0       0.82      0.95      0.88        19
           1       0.67      0.33      0.44         6

    accuracy                           0.80        25
   macro avg       0.74      0.64      0.66        25
weighted avg       0.78      0.80      0.77        25



In [17]:
model = KNeighborsClassifier()
model

In [18]:
model.fit(X_train, y_train)
model

In [19]:
predictions = model.predict(X_test)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [20]:
print ("Accuracy:", accuracy_score (y_test, predictions))

Accuracy: 0.8


In [21]:
print ("Confusion Matrix:", confusion_matrix (y_test, predictions))

Confusion Matrix: [[19  0]
 [ 5  1]]


In [22]:
print ("Classification Report:", classification_report (y_test, predictions))

Classification Report:               precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       1.00      0.17      0.29         6

    accuracy                           0.80        25
   macro avg       0.90      0.58      0.58        25
weighted avg       0.84      0.80      0.74        25



In [23]:
model = SVC()
model

In [24]:
model.fit(X_train, y_train)
model

In [25]:
predictions = model.predict(X_test)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [26]:
print ("Accuracy:", accuracy_score (y_test, predictions))

Accuracy: 0.76


In [27]:
print ("Confusion Matrix:", confusion_matrix (y_test, predictions))

Confusion Matrix: [[19  0]
 [ 6  0]]


In [28]:
print ("Classification Report:", classification_report (y_test, predictions))

Classification Report:               precision    recall  f1-score   support

           0       0.76      1.00      0.86        19
           1       0.00      0.00      0.00         6

    accuracy                           0.76        25
   macro avg       0.38      0.50      0.43        25
weighted avg       0.58      0.76      0.66        25



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [29]:
model = DecisionTreeClassifier()
model

In [30]:
model.fit(X_train, y_train)
model

In [31]:
predictions = model.predict(X_test)
predictions

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [32]:
print ("Accuracy:", accuracy_score (y_test, predictions))

Accuracy: 0.76


In [33]:
print ("Confusion Matrix:", confusion_matrix (y_test, predictions))

Confusion Matrix: [[17  2]
 [ 4  2]]


In [34]:
print ("Classification Report:", classification_report (y_test, predictions))

Classification Report:               precision    recall  f1-score   support

           0       0.81      0.89      0.85        19
           1       0.50      0.33      0.40         6

    accuracy                           0.76        25
   macro avg       0.65      0.61      0.62        25
weighted avg       0.74      0.76      0.74        25



In [35]:
model = RandomForestClassifier(n_estimators = 100)
model

In [36]:
model.fit(X_train, y_train)
model

In [37]:
predictions = model.predict(X_test)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0], dtype=int64)

In [38]:
print ("Accuracy:", accuracy_score (y_test, predictions))

Accuracy: 0.8


In [39]:
print ("Confusion Matrix:", confusion_matrix (y_test, predictions))

Confusion Matrix: [[19  0]
 [ 5  1]]


In [40]:
print ("Classification Report:", classification_report (y_test, predictions))

Classification Report:               precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       1.00      0.17      0.29         6

    accuracy                           0.80        25
   macro avg       0.90      0.58      0.58        25
weighted avg       0.84      0.80      0.74        25

