In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report



In [4]:
df = pd.read_csv('deficency2.5.csv')


In [5]:
print("First 5 rows of the dataset:")
display(df.head())




First 5 rows of the dataset:


Unnamed: 0,Name,Age,Gender,Blood_Type,Anxiety,Appetite_Changes,Balance_Problems,Bleeding_Gums,Bone_Pain,Brittle_Nails,...,Slow_Healing,Tongue_Problems,Vision_Problems,Weak_Bones,Weak_Immunity,Weakened_Immune_System,Weakness,Weight_Gain,Weight_Loss,Deficiency
0,Adam Mitchell,18,Male,A+,No,No,No,No,Yes,No,...,No,No,No,Yes,No,No,No,No,No,Calcium
1,Alexandra Arnold,65,Female,O-,No,Yes,No,No,No,No,...,No,No,No,Yes,No,No,No,No,No,Calcium
2,Alexis Ellison,66,Male,O+,Yes,No,No,No,No,No,...,No,No,No,Yes,No,No,Yes,No,No,Calcium
3,Alison Hunter,76,Female,AB-,No,No,No,No,No,No,...,No,No,No,Yes,Yes,No,No,No,No,Calcium
4,Amanda Cunningham,58,Female,AB-,No,No,No,No,No,No,...,No,No,No,Yes,No,No,No,No,No,Calcium


In [6]:
print("\nMissing values in each column:")
print(df.isnull().sum())



Missing values in each column:
Name                      0
Age                       0
Gender                    0
Blood_Type                0
Anxiety                   0
                         ..
Weakened_Immune_System    0
Weakness                  0
Weight_Gain               0
Weight_Loss               0
Deficiency                0
Length: 68, dtype: int64


In [7]:
df = df.dropna()


In [8]:
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le



In [9]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]



In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [11]:
# Initialize the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)



In [12]:
dt_classifier.fit(X_train, y_train)



In [13]:
# Predict on the test set
y_pred = dt_classifier.predict(X_test)



In [14]:
acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, average='weighted', zero_division=0)
rec = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)



In [15]:
print("\nModel Evaluation Metrics:")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


Model Evaluation Metrics:
Accuracy: 0.8580
Precision: 0.8596
Recall: 0.8580
F1 Score: 0.8576

Confusion Matrix:
[[47  1  0  2  0  1  0  0  1  0]
 [ 0 44  2  1  1  0  1  1  1  0]
 [ 0  2 37  0  0  2  1  1  0  0]
 [ 0  2  0 33  0  3  1  0  0  0]
 [ 1  1  2  3 33  1  2  1  0  1]
 [ 2  0  2  0  3 50  0  0  1  1]
 [ 0  0  1  0  0  1 45  2  0  0]
 [ 1  0  1  1  1  2  5 53  1  0]
 [ 2  2  0  0  0  1  0  0 40  2]
 [ 0  0  0  1  0  0  0  1  1 47]]

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.90      0.90        52
           1       0.85      0.86      0.85        51
           2       0.82      0.86      0.84        43
           3       0.80      0.85      0.82        39
           4       0.87      0.73      0.80        45
           5       0.82      0.85      0.83        59
           6       0.82      0.92      0.87        49
           7       0.90      0.82      0.85        65
           8       0.89      0.85      0.87  

In [16]:
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load your dataset
df = pd.read_csv("deficency2.5.csv")

# Create encoders
le_gender = LabelEncoder()
le_blood = LabelEncoder()
le_def = LabelEncoder()

# Fit them
df["Gender"] = le_gender.fit_transform(df["Gender"])
df["Blood_Type"] = le_blood.fit_transform(df["Blood_Type"])
df["Deficiency"] = le_def.fit_transform(df["Deficiency"])

# Save as a dictionary — CORRECT format for Streamlit app
encoders = {
    "gender": le_gender,
    "blood_type": le_blood,
    "deficiency": le_def
}

with open("label_encoders.pkl", "wb") as f:
    pickle.dump(encoders, f)

print("✅ label_encoders.pkl saved in correct format.")


✅ label_encoders.pkl saved in correct format.


In [17]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
import pickle

# Load sample dataset (you can replace this with your own data)
data = load_iris()
X, y = data.data, data.target

# Train a Decision Tree model
model = DecisionTreeClassifier()
model.fit(X, y)

# Save the trained model to a .pkl file
with open("decision_tree_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model saved as decision_tree_model.pkl")


Model saved as decision_tree_model.pkl


In [None]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
model.fit(X_train, y_train)  # Or whatever code you used
