In [24]:
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from xgboost import XGBClassifier 
import pandas as pd 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import StandardScaler , OneHotEncoder
from sklearn.metrics import classification_report
import pickle

In [15]:
df=pd.read_csv('liver_cirrhosis.csv')
df['Age'] = (df['Age'] / 365).astype('int')
df['N_Years'] = (df['N_Days'] / 365).astype('int')
df.drop('N_Days',axis=1,inplace=True)
df.head()

Unnamed: 0,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage,N_Years
0,C,Placebo,50,F,N,Y,N,N,0.5,149.0,4.04,227.0,598.0,52.7,57.0,256.0,9.9,1,6
1,C,Placebo,54,M,Y,N,Y,N,0.5,219.0,3.93,22.0,663.0,45.0,75.0,220.0,10.8,2,3
2,C,Placebo,32,F,N,N,N,N,0.5,320.0,3.54,51.0,1243.0,122.45,80.0,225.0,10.0,2,11
3,D,Placebo,45,F,N,N,N,N,0.7,255.0,3.74,23.0,1024.0,77.5,58.0,151.0,10.2,2,5
4,D,Placebo,59,F,N,Y,N,N,1.9,486.0,3.54,74.0,1052.0,108.5,109.0,151.0,11.5,1,5


In [16]:
X= df.drop('Stage',axis=1)
y= df['Stage']-1 #-1 is for label encoding 
X_train ,X_test , y_train , y_test = train_test_split(X,y,test_size=0.025,random_state=42,stratify=y)

In [17]:
col_trans = make_column_transformer(
    (OneHotEncoder(drop='first'),['Status','Drug','Age','Sex','Ascites','Hepatomegaly','Spiders','Edema']),
    (StandardScaler(),['Age','Bilirubin','Cholesterol','Albumin','Copper','Alk_Phos','SGOT','Tryglicerides','Platelets','Prothrombin','N_Years']),
    remainder='passthrough'
)
pipeline=make_pipeline(col_trans,XGBClassifier())

In [22]:
pipeline.fit(X_train,y_train)

In [23]:
y_pred=pipeline.predict(X_test)
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.94      0.97      0.96       200
           1       0.98      0.96      0.97       214
           2       0.98      0.96      0.97       211

    accuracy                           0.97       625
   macro avg       0.97      0.97      0.97       625
weighted avg       0.97      0.97      0.97       625



In [30]:
y_pred_val = pipeline.predict(X_train)
print(classification_report(y_pred_val,y_train))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98      7981
           1       0.99      0.98      0.98      8309
           2       0.99      0.99      0.99      8085

    accuracy                           0.98     24375
   macro avg       0.98      0.98      0.98     24375
weighted avg       0.98      0.98      0.98     24375



In [25]:
pickle.dump(pipeline,open('saved_pipeline.sav','wb'))

In [29]:
X_test['Stage']=y_test+1
X_test.to_csv('Test_data.csv',index=False)