In [54]:
# https://archive.ics.uci.edu/ml/datasets/heart+disease

In [1]:
pip install gradio

Note: you may need to restart the kernel to use updated packages.


In [2]:
import gradio as gr
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('heart2.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   Age          303 non-null    int64
 1   Sex          303 non-null    int64
 2   Cholesterol  303 non-null    int64
 3   Flag         303 non-null    int64
 4   cardio       303 non-null    int64
dtypes: int64(5)
memory usage: 12.0 KB


In [5]:
columns=('Age', 'Sex', 'Cholestoral (in mg/dl)', 'Resting electrocardiographic results')

In [6]:
# 'Flag' predicted heart disease
df

Unnamed: 0,Age,Sex,Cholesterol,Flag,cardio
0,63,1,233,1,0
1,37,1,250,1,1
2,41,0,204,1,0
3,56,1,236,1,1
4,57,0,354,1,1
...,...,...,...,...,...
298,57,0,241,0,1
299,45,1,264,0,1
300,68,1,193,0,1
301,57,1,131,0,1


In [7]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,303.0,54.366337,9.082101,29.0,47.5,55.0,61.0,77.0
Sex,303.0,0.683168,0.466011,0.0,0.0,1.0,1.0,1.0
Cholesterol,303.0,246.264026,51.830751,126.0,211.0,240.0,274.5,564.0
Flag,303.0,0.544554,0.498835,0.0,0.0,1.0,1.0,1.0
cardio,303.0,0.528053,0.52586,0.0,0.0,1.0,1.0,2.0


In [8]:
X = df.drop(['Flag'], axis = 1).values
y = df['Flag']

In [9]:
type(X)

numpy.ndarray

In [10]:
X[:1]

array([[ 63,   1, 233,   0]], dtype=int64)

In [11]:
type(y)

pandas.core.series.Series

In [12]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: Flag, dtype: int64

In [13]:
from sklearn.preprocessing import StandardScaler

In [14]:
ss = StandardScaler()

X = ss.fit_transform(X)

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [17]:
type(X_train)

numpy.ndarray

In [18]:
type(y_train)

pandas.core.series.Series

In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report,confusion_matrix

In [24]:
trained_svm = svm.LinearSVC().fit(X_train, y_train)

pred_svm = trained_svm.predict(X_test)

print(confusion_matrix(y_test,pred_svm))
print()
print(classification_report(y_test, pred_svm))

[[16 11]
 [15 19]]

              precision    recall  f1-score   support

           0       0.52      0.59      0.55        27
           1       0.63      0.56      0.59        34

    accuracy                           0.57        61
   macro avg       0.57      0.58      0.57        61
weighted avg       0.58      0.57      0.58        61



In [26]:
trained_nb = GaussianNB().fit(X_train, y_train)

pred_nb = trained_nb.predict(X_test)

print(confusion_matrix(y_test,pred_nb))
print()
print(classification_report(y_test, pred_nb))

[[18  9]
 [15 19]]

              precision    recall  f1-score   support

           0       0.55      0.67      0.60        27
           1       0.68      0.56      0.61        34

    accuracy                           0.61        61
   macro avg       0.61      0.61      0.61        61
weighted avg       0.62      0.61      0.61        61



In [27]:
trained_logis_reg = LogisticRegression().fit(X_train,y_train)

pred =trained_logis_reg.predict(X_test)

print(confusion_matrix(y_test,pred))
print()
print(classification_report(y_test,pred))

[[16 11]
 [12 22]]

              precision    recall  f1-score   support

           0       0.57      0.59      0.58        27
           1       0.67      0.65      0.66        34

    accuracy                           0.62        61
   macro avg       0.62      0.62      0.62        61
weighted avg       0.62      0.62      0.62        61



In [28]:
print(np.asarray([['TN', 'FP'], ['FN', 'TP']]))

[['TN' 'FP']
 ['FN' 'TP']]


In [None]:
'''
PK:

FN(= False Negative) tells us when a patient is likelt to have 'Heart Disease' but is predicted 'No Heart Disease'.
So, the higher FN, the riskier the predictions are.

As you can see Logistiic Regression has 12, which is less than the other two prediction models, 15.

'''

In [31]:
def inference(age, sex, ch, cardio):
    s = 0 if sex=='female' else 1

    df = pd.DataFrame([[age, s, ch, cardio]], 
                      columns=['Age', 'Sex', 'Cholestoral (in mg/dl)', 
                               'Resting electrocardiographic results'])
    
    df = ss.transform(df)
    pred = trained_logis_reg.predict_proba(df)[0]
    res = {'No Heart Disease': float(pred[1]), 'Has Heart Disease': float(pred[0])}
    
    return res

In [32]:
sex = gr.inputs.Radio(['female', 'male'], label="Sex")
age = gr.inputs.Slider(minimum=1, maximum=100, default=22, label="Age")
ch = gr.inputs.Slider(minimum=120, maximum=570, default=200, label="Cholestoral (in mg/dl)")
cardio = gr.inputs.Radio([0, 1, 2], label="Resting electrocardiographic results")

gr.Interface(inference, [age, sex, ch, cardio], "label", live=False).launch(share=True) #, debug=True Use in Colab

Running on local URL:  http://127.0.0.1:7861/
Running on public URL: https://21477.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<fastapi.applications.FastAPI at 0x2492ddf6220>,
 'http://127.0.0.1:7861/',
 'https://21477.gradio.app')