<a href="https://colab.research.google.com/github/zoro1324/Chronic-kidney-disease/blob/main/Chronic_kidney_disease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
df = pd.read_csv('/content/kidney_disease.csv')
df.shape
df.info()

In [None]:
df.head()

In [None]:
df.drop(columns=('id'),axis=1,inplace=True)
df.head()

In [None]:
df.describe()

In [None]:
df.rename(
    {
        'bp' : 'Blood_Pressure',
        'sg' : 'Specific_Gravity',
        'al' : 'Albumin',
        'su' : 'Sugar',
        'rbc' : 'Red_Blood_Cells',
        'pc' : 'Pus_Cell',
        'pcc' : 'Pus_Cell_Clumps',
        'ba' : 'Bacteria',
        'bgr' : 'Blood_Glucose_Random',
        'bu' : 'Blood_Unicorn',
        'sc' : 'Specific_Chromatin',
        'sod' : 'Sodium',
        'pot' : 'Potasium',
        'hemo' : 'Hemoglobin',
        'pcv' : 'Packed_Cell_Volume',
        'wc' : 'White_Blood_Cell_Count',
        'rc' : 'Red_Blood_Cell_Count',
        'htn' : 'Hypertension',
        'dm' : 'Diabetes_Mellitus',
        'cad' : 'Coronary_Artery_Disease',
        'appet' : 'Appetite',
        'pe' : 'Pedal_Edema',
        'ane' : 'Ann_Artery_Disease',
    },
    axis=1,
    inplace=True
)

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df['Packed_Cell_Volume'] = pd.to_numeric(df['Packed_Cell_Volume'],errors='coerce')
df['White_Blood_Cell_Count'] = pd.to_numeric(df['White_Blood_Cell_Count'],errors='coerce')
df['Red_Blood_Cell_Count'] = pd.to_numeric(df['Red_Blood_Cell_Count'],errors='coerce')
df.info()

In [None]:
missing = df.isnull().sum()
missing[missing>0].sort_values(ascending=False)

In [None]:
print(df['classification'].unique())
print(df['Diabetes_Mellitus'].unique())
print(df['Appetite'].unique())

In [None]:
df['classification'] = df['classification'].replace(to_replace={'ckd\t':'ckd'})
df['Diabetes_Mellitus'] = df['Diabetes_Mellitus'].replace(to_replace={' yes':'yes','\tyes':'yes','\tno':'no'})


In [None]:
print(df['classification'].unique())
print(df['Diabetes_Mellitus'].unique())
print(df['Appetite'].unique())

In [None]:
for col in df.columns:
    if df[col].dtype in ['float64', 'int64']:
        # For numeric columns
        for group in df['classification'].unique():
            mean_val = df.loc[df['classification'] == group, col].mean()
            mode_val = df.loc[df['classification'] == group, col].mode()
            if not mode_val.empty:
                mode_val = mode_val[0]
            else:
                mode_val = np.nan

            # Fill NaNs with mean first, then mode (you can switch if preferred)
            df.loc[(df['classification'] == group) & (df[col].isna()), col] = mean_val if not np.isnan(mean_val) else mode_val

In [None]:
for col in df.columns:
    if df[col].dtype == 'object' and col != 'classification':
        for group in df['classification'].unique():
            mode_val = df.loc[df['classification'] == group, col].mode()
            if not mode_val.empty:
                mode_val = mode_val[0]
                df.loc[(df['classification'] == group) & (df[col].isna()), col] = mode_val

In [None]:
missing = df.isnull().sum()
missing[missing>0].sort_values(ascending=False)

In [None]:
df['Hypertension'].unique()

In [None]:
df['Diabetes_Mellitus'].unique()

In [None]:
print(df['classification'].unique())
print(df['Diabetes_Mellitus'].unique())
print(df['Appetite'].unique())

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for col in df.columns:
  if df[col].dtype == 'object' :
    df[col] = le.fit_transform(df[col])
df.info()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(15,8))
sns.heatmap(df.corr(),annot=True,linewidths=0.75)

In [None]:
from sklearn.model_selection import train_test_split
X = df.drop('classification',axis=1)
y = df['classification']

In [None]:
from sklearn.feature_selection import SelectKBest,f_classif
selector = SelectKBest(f_classif,k=7)
X_new = selector.fit_transform(X,y)
selected_features = X.columns[selector.get_support()]
print(selected_features)
X=X[selected_features]

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

In [None]:
models = {
    'Random_Forest' : RandomForestClassifier(n_estimators=100),
    'Decision_Tree' : DecisionTreeClassifier(max_depth=9),
    'KNeighbors' : KNeighborsClassifier(n_neighbors=8,n_jobs=-1),
    'SVC' : SVC(kernel='linear'),
    'NB' : GaussianNB()
}

In [None]:
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix,precision_score,recall_score

In [None]:
for name,model in models.items():
  model.fit(X_train,y_train)
  y_pred = model.predict(X_test)
  print("------------------------------------------")
  print(f'{name}-trained')
  print("------------------------------------------")
  print('accuracy_score:',accuracy_score(y_test,y_pred))
  print('f1_score:',f1_score(y_test,y_pred))
  print('confusion_matrix:',confusion_matrix(y_test,y_pred))
  print('precision_score:',precision_score(y_test,y_pred))
  print('recall_score:',recall_score(y_test,y_pred))
  print("------------------------------------------")


In [None]:
best_model = models['Random_Forest']

In [None]:
print("=============================")
print("Best Model is Random_Forest")
print("==============================")

In [72]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 7 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Specific_Gravity      400 non-null    float64
 1   Albumin               400 non-null    float64
 2   Hemoglobin            400 non-null    float64
 3   Packed_Cell_Volume    400 non-null    float64
 4   Red_Blood_Cell_Count  400 non-null    float64
 5   Hypertension          400 non-null    int64  
 6   Diabetes_Mellitus     400 non-null    int64  
dtypes: float64(5), int64(2)
memory usage: 22.0 KB


In [None]:
import gradio as gr

In [None]:
def predict_kidney_disease(Specific_Gravity, Albumin, Hemoglobin, Packed_Cell_Volume, Red_Blood_Cell_Count, Hypertension, Diabetes_Mellitus):
    # The model was trained on integer values for Hypertension and Diabetes_Mellitus
    Hypertension = 1 if Hypertension == 'yes' else 0
    Diabetes_Mellitus = 1 if Diabetes_Mellitus == 'yes' else 0

    features = np.array([[Specific_Gravity, Albumin, Hemoglobin, Packed_Cell_Volume, Red_Blood_Cell_Count, Hypertension, Diabetes_Mellitus]])
    prediction = best_model.predict(features)

    return 'You have Chronic kidney disease (CKD) ' if prediction[0] == 0 else "You Don't have Chronic kidney disease (CKD) "

In [None]:
inputs = [
    gr.Number(label="Specific Gravity"),
    gr.Number(label="Albumin"),
    gr.Number(label="Hemoglobin"),
    gr.Number(label="Packed Cell Volume"),
    gr.Number(label="Red Blood Cell Count"),
    gr.Radio(label="Hypertension"),
    gr.Radio(label="Diabetes Mellitus")
]

output = gr.Textbox(label="Kidney Disease Prediction")

gr.Interface(fn=predict_kidney_disease, inputs=inputs, outputs=output, title="Kidney Disease Prediction").launch()