In [None]:
# Importing necessary libraries
import numpy as np         # For numerical computations, especially with arrays and matrices
import pandas as pd        # For data manipulation and analysis, working with data in DataFrames
import matplotlib.pyplot as plt   # For creating static, animated, and interactive visualizations
import seaborn as sns      # For statistical data visualization, built on top of Matplotlib


In [None]:

#The data in Data is a pandas DataFrame.

#When you use pd.read_csv(), the data is loaded into a DataFrame structure, which is a tabular data format similar to a table in a database or an Excel spreadsheet
# Loading the dataset from a CSV file into a pandas DataFrame
Data = pd.read_csv("kidney_disease.csv")


In [None]:

Data
Data.head()


In [None]:
Data.tail()


In [None]:
# Checking the dimensions of the dataset (number of rows and columns)
Data.shape


In [None]:

# Displaying concise summary of the dataset
Data.info()


In [None]:

# Generating summary statistics for numerical columns in the dataset
Data.describe()


In [None]:

# Generating summary statistics for all columns (numerical and categorical)
Data.describe(include='all')


In [None]:

Data.isnull()
Data.isnull().sum()


In [None]:
Data.isnull().any()


In [None]:
Data.columns


In [None]:
Data.head()
### Dropping the 'id' column from the dataset


In [None]:


### Explanation:

#Data.drop("id", axis=1, inplace=True):

#1. "id": This specifies the column you want to drop.
#2. axis=1: This indicates that you're dropping a column. (If you set axis=0, it would mean dropping a row.)
#3. inplace=True: This makes the operation permanent within the existing DataFrame, so the change is applied directly to Data without needing to assign it to a new variable
Data.drop("id",axis=1,inplace=True)


In [None]:
Data.head()


In [None]:
Data.describe()

In [None]:
Data.info()

In [None]:
Data.columns=["age","blood_pressure","spcific_gravity","albumin",
              "sugar","red_blood_cells","pus_cell","pus_cell_clumps","bacteria",
              "blood_glucose_random","blood_urea","serum_creatinine",
              "sodium","potassium","haemoglobin","packed_cell_volume",
              "white_blood_cell_count","red_blood_cell_count",
              "hypertension","diabetes_mellitus","coronry_artery_disease",
              "appetite","peda_edema","anemia","CLASS"]


In [None]:
Data.head()


In [None]:
Data.info()


In [None]:
# Check columns with only numerical values
numerical_columns = Data.select_dtypes(include=['number']).columns
print("Numerical columns:", numerical_columns)

In [None]:


# Identify categorical columns
categorical_columns = Data.select_dtypes(include=['object']).columns

print("Categorical columns:", categorical_columns)


In [None]:
Data.head(3)


In [None]:
text_columns=["packed_cell_volume",
              "white_blood_cell_count",
              "red_blood_cell_count"]


In [None]:
for i in text_columns:
    print(f"{i} : {Data[i].dtype}")


In [None]:
def convert_text_to_numeric(data,column):
    Data[column]=pd.to_numeric(Data[column],errors="coerce")
    
for column  in text_columns:
    convert_text_to_numeric(Data , column)
    print(f"{column}: {Data[column].dtype}")


In [None]:
Data.info()


In [None]:
Data.isnull().sum()


In [None]:
missing_values=Data.isnull().sum()


In [None]:

missing_values[missing_values>0]


In [None]:

missing_values[missing_values>0].sort_values(ascending=False)



In [None]:

missing_values[missing_values>0].sort_values(ascending=False).head(8)


In [None]:
def mean_values(Data , column):
    mean_val= Data[column].mean()
    Data[column].fillna(value=mean_val , inplace=True)
    
#for numbers

In [None]:
def mode_values( Data, column):
    mode_val= Data[column].mode()[0]
    Data[column]=Data[column].fillna(mode_val)
    
#for text

In [None]:
  
Data.columns


In [None]:
num_columns=[ i for i in Data.columns if Data[i].dtype != "object"]


In [None]:

for x in num_columns:
    mean_values(Data , x)
    


In [None]:

categ_columns =[i for i  in Data.columns if Data[i].dtype=="object"]


In [None]:

for x in categ_columns :
    mode_values(Data , x)




In [None]:

Data.isnull().sum()


In [None]:
Data.head()


In [None]:
print(f"diabetes_mellitus : {Data["diabetes_mellitus"].unique()}")

print(f"coronry_artery_disease : {Data["coronry_artery_disease"].unique()}")
print(f"CLASS : {Data["CLASS"].unique()}")


In [None]:
Data["diabetes_mellitus"]=Data["diabetes_mellitus"].replace(to_replace={ " yes":"yes",
                                                                          "\tno":"no",
                                                                          "\tyes":"yes"})


In [None]:

Data["coronry_artery_disease"]=Data["coronry_artery_disease"].replace(to_replace={"\tno":"no"})


In [None]:

Data["CLASS"]=Data["CLASS"].replace(to_replace={"ckd\t":"ckd",
                                                "notckd":"not ckd"})


In [None]:
print(f"diabetes_mellitus : {Data["diabetes_mellitus"].unique()}")
print(f"coronry_artery_disease : {Data["coronry_artery_disease"].unique()}")

print(f"CLASS : {Data["CLASS"].unique()}")


In [None]:
Data.head()


In [None]:
Data["CLASS"]=Data["CLASS"].map({"ckd":1,
                                 "not ckd":0})

Data["red_blood_cells"]=Data["red_blood_cells"].map({"normal":1,
                                                     "abnormal":0})

Data["pus_cell"]=Data["pus_cell"].map({"normal":1,
                                       "abnormal":0})

Data["pus_cell_clumps"]=Data["pus_cell_clumps"].map({"present":1,
                                                     "notpresent":0})

Data["bacteria"]=Data["bacteria"].map({"present":1,
                                       "notpresent":0})

Data["hypertension"]=Data["hypertension"].map({"yes":1,
                                               "no":0})

Data["diabetes_mellitus"]=Data["diabetes_mellitus"].map({"yes":1,
                                                         "no":0})

Data["coronry_artery_disease"]=Data["coronry_artery_disease"].map({"yes":1,
                                                                   "no":0})

Data["appetite"]=Data["appetite"].map({"good":1,
                                       "poor":0})

Data["peda_edema"]=Data["peda_edema"].map({"yes":1,
                                           "no":0})


Data["anemia"]=Data["anemia"].map({"yes":1,
                                   "no":0})
                                           

            

In [None]:
                                                       
                                                       

Data.head()


In [None]:
plt.figure(figsize=(15,8))
sns.heatmap(Data.corr() , annot= True ,linewidths=0.5)
plt.show()


In [None]:
Data.corr()["CLASS"]


In [None]:
Data["CLASS"].value_counts()


In [None]:
Data["CLASS"].unique()

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
X=Data.drop("CLASS" , axis=1)
Y=Data["CLASS"]


In [None]:
X.head()


In [None]:
Y.head()


In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.25,random_state=25)


In [None]:
xtrain.shape


In [None]:
xtest.shape


In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
Model_DTC=DecisionTreeClassifier()

In [None]:
Model_DTC.fit(xtrain,ytrain)

In [None]:
xtest.shape

In [None]:
prediction=Model_DTC.predict(xtest)

In [None]:
prediction

In [None]:
ytest

In [None]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,confusion_matrix

In [None]:
accuracy_score(ytest,prediction)

In [None]:
precision_score(ytest,prediction)

In [None]:
recall_score(ytest,prediction)

In [None]:
f1_score(ytest,prediction)

In [None]:
confusion_matrix(ytest,prediction)

In [None]:
Model_DTC.predict([[1,2,3,2,2,43,2,4,4,2,21,3,2,4,4,2,44,4,2,1,2,3,4,2]])