# Predicting Liver Disease in Patients

In [25]:
#importing library
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [26]:
#loading the dataset
df=pd.read_csv('../input/indian-liver-patient-records/indian_liver_patient.csv')
df

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
579,40,Male,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,Male,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,Male,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [27]:
#checking for null value in the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 583 entries, 0 to 582
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Age                         583 non-null    int64  
 1   Gender                      583 non-null    object 
 2   Total_Bilirubin             583 non-null    float64
 3   Direct_Bilirubin            583 non-null    float64
 4   Alkaline_Phosphotase        583 non-null    int64  
 5   Alamine_Aminotransferase    583 non-null    int64  
 6   Aspartate_Aminotransferase  583 non-null    int64  
 7   Total_Protiens              583 non-null    float64
 8   Albumin                     583 non-null    float64
 9   Albumin_and_Globulin_Ratio  579 non-null    float64
 10  Dataset                     583 non-null    int64  
dtypes: float64(5), int64(5), object(1)
memory usage: 50.2+ KB


In [28]:
df.isna().sum()

Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    4
Dataset                       0
dtype: int64

# Preprocessing 

In [29]:
#replacing na with mean
df['Albumin_and_Globulin_Ratio']=df['Albumin_and_Globulin_Ratio'].fillna(df['Albumin_and_Globulin_Ratio'].mean())

In [30]:
#Encoding
def binary_encode(df,column,positive_value):
    df=df.copy()
    df[column]=df[column].apply(lambda x:1 if x==positive_value else 0)
    return df


In [31]:
#encoding Gender
df=binary_encode(df,'Gender','Male')

In [32]:
#encoding dataset
df=binary_encode(df,'Dataset',1)

In [33]:
df

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,0,0.7,0.1,187,16,18,6.8,3.3,0.90,1
1,62,1,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,1,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,1,1.0,0.4,182,14,20,6.8,3.4,1.00,1
4,72,1,3.9,2.0,195,27,59,7.3,2.4,0.40,1
...,...,...,...,...,...,...,...,...,...,...,...
578,60,1,0.5,0.1,500,20,34,5.9,1.6,0.37,0
579,40,1,0.6,0.1,98,35,31,6.0,3.2,1.10,1
580,52,1,0.8,0.2,245,48,49,6.4,3.2,1.00,1
581,31,1,1.3,0.5,184,29,32,6.8,3.4,1.00,1


In [34]:
#Spliting the dataset
y=df['Dataset']
x=df.drop('Dataset',axis=1)

In [35]:
#scaling the dataset
scaler=StandardScaler()
x=scaler.fit_transform(x)

In [36]:
#train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7)

In [37]:
y.sum()/len(y)

0.7135506003430532

In [38]:
print(x_train.shape)
print(x_test.shape)

(408, 10)
(175, 10)


In [51]:
#tensorflow creating layers
inputs=tf.keras.Input(shape=(10,))
x=tf.keras.layers.Dense(64,activation='relu')(inputs)
x=tf.keras.layers.Dense(64,activation='relu')(x)
outputs=tf.keras.layers.Dense(1,activation='sigmoid')(x)
model=tf.keras.Model(inputs=inputs,outputs=outputs)


In [66]:
#compiling the model
model.compile(optimizer='adam',
             loss='binary_crossentropy',
              metrics=['accuracy',
                      tf.keras.metrics.AUC(name='auc')]
             
             )
batch_size=64
epochs=31
history=model.fit(x_train,
                 y_train,
                 validation_split=0.2,
                 batch_size=batch_size,
                 epochs=epochs,
                 verbose=0)

In [67]:
#Results
fig=px.line(
    history.history,
    y=['loss','val_loss'],
    labels={'index':'Epoch','value':'Loss'},
    title='Training and Validation Loss'
)
fig.show()

In [65]:
np.argmin(history.history['val_loss'])+1

8

In [69]:
#evaluating the model
model.evaluate(x_test,y_test)



[0.5093448162078857, 0.7428571581840515, 0.7749706506729126]