In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
from lem2Alg import *
from sklearn.preprocessing import StandardScaler

import numpy as np
from sklearn.metrics import mean_squared_error as mse

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from sklearn.metrics import roc_curve, auc

import matplotlib.pyplot as plt


In [4]:
columns = ["age", "sex", "chest_pain", "blood_presure", "cholestoral", "blood_sugar", "rer", "max_heart_rate", "exercise_angina", "oldpeak", "slope_of_peak", "major_vessels", "thal", "oc"]

df = pd.read_csv("heartdisease.txt", sep=" ", names=columns)
df.head()

Unnamed: 0,age,sex,chest_pain,blood_presure,cholestoral,blood_sugar,rer,max_heart_rate,exercise_angina,oldpeak,slope_of_peak,major_vessels,thal,oc
0,70.0,1.0,4.0,130.0,322.0,0.0,2.0,109.0,0.0,2.4,2.0,3.0,3.0,2
1,67.0,0.0,3.0,115.0,564.0,0.0,2.0,160.0,0.0,1.6,2.0,0.0,7.0,1
2,57.0,1.0,2.0,124.0,261.0,0.0,0.0,141.0,0.0,0.3,1.0,0.0,7.0,2
3,64.0,1.0,4.0,128.0,263.0,0.0,0.0,105.0,1.0,0.2,2.0,1.0,7.0,1
4,74.0,0.0,2.0,120.0,269.0,0.0,2.0,121.0,1.0,0.2,1.0,1.0,3.0,1


In [5]:
df.isnull().sum()

age                0
sex                0
chest_pain         0
blood_presure      0
cholestoral        0
blood_sugar        0
rer                0
max_heart_rate     0
exercise_angina    0
oldpeak            0
slope_of_peak      0
major_vessels      0
thal               0
oc                 0
dtype: int64

In [6]:
df['oc'].value_counts()

1    150
2    120
Name: oc, dtype: int64

In [13]:
for item in df.columns:
    print(df[item].unique(), " ** ", item)

[70. 67. 57. 64. 74. 65. 56. 59. 60. 63. 53. 44. 61. 71. 46. 40. 48. 43.
 47. 54. 51. 58. 66. 37. 50. 42. 62. 49. 52. 45. 41. 76. 39. 35. 55. 34.
 38. 69. 68. 77. 29.]  **  age
[1. 0.]  **  sex
[4. 3. 2. 1.]  **  chest_pain
[130. 115. 124. 128. 120. 110. 140. 150. 135. 142. 134. 112. 132. 138.
 160. 170. 144. 122. 152. 101. 126. 118. 136. 105. 174. 145. 108. 156.
 106. 104.  94. 146. 148. 178. 125. 100. 165. 180. 158. 200. 117. 192.
 123. 129. 102. 155. 172.]  **  blood_presure
[322. 564. 261. 263. 269. 177. 256. 239. 293. 407. 234. 226. 235. 303.
 149. 311. 203. 211. 199. 229. 245. 204. 288. 275. 243. 295. 230. 265.
 228. 215. 326. 200. 207. 273. 180. 222. 223. 209. 233. 197. 218. 246.
 225. 315. 205. 417. 195. 198. 166. 178. 249. 281. 126. 305. 240. 276.
 319. 242. 260. 354. 309. 208. 236. 270. 214. 201. 244. 306. 221. 330.
 266. 206. 212. 302. 313. 141. 237. 289. 254. 274. 258. 160. 327. 304.
 271. 283. 188. 286. 360. 267. 196. 232. 277. 210. 213. 282. 167. 224.
 268. 250. 219. 217.

In [14]:
df.describe()

Unnamed: 0,age,sex,chest_pain,blood_presure,cholestoral,blood_sugar,rer,max_heart_rate,exercise_angina,oldpeak,slope_of_peak,major_vessels,thal,oc
count,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0,270.0
mean,54.433333,0.677778,3.174074,131.344444,249.659259,0.148148,1.022222,149.677778,0.32963,1.05,1.585185,0.67037,4.696296,1.444444
std,9.109067,0.468195,0.95009,17.861608,51.686237,0.355906,0.997891,23.165717,0.470952,1.14521,0.61439,0.943896,1.940659,0.497827
min,29.0,0.0,1.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,1.0,0.0,3.0,1.0
25%,48.0,0.0,3.0,120.0,213.0,0.0,0.0,133.0,0.0,0.0,1.0,0.0,3.0,1.0
50%,55.0,1.0,3.0,130.0,245.0,0.0,2.0,153.5,0.0,0.8,2.0,0.0,3.0,1.0
75%,61.0,1.0,4.0,140.0,280.0,0.0,2.0,166.0,1.0,1.6,2.0,1.0,7.0,2.0
max,77.0,1.0,4.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,3.0,3.0,7.0,2.0


In [15]:
model = lem2Classifier()

X = df.drop(['oc'], axis=1)
y = df['oc']

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state = 0)

In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, classification_report
model_l2 = model.fit(X_train, y_train)
l2_predict = model.predict(X_test)
l2_conf_matrix = confusion_matrix(y_test, l2_predict)
l2_acc_score = accuracy_score(y_test, l2_predict)
print("confussion matrix")
print(l2_conf_matrix)
print("\n")
print("Accuracy of Logistic Regression:",l2_acc_score*100,'\n')
print(classification_report(y_test,l2_predict))

KeyError: 0