In [4]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
!ls "/content/drive/MyDrive/AI-Powered Disease Symptom Analyzer"


disease_rf_model.pkl		label_encoder.pkl  Training.csv
Disease_Symptom_Analyzer_Model	Testing.csv


In [6]:
import pandas as pd

file_path = "/content/drive/MyDrive/AI-Powered Disease Symptom Analyzer/Training.csv"

df = pd.read_csv(file_path)

df.head()


Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis,Unnamed: 133
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,Fungal infection,


In [7]:
print("Shape:", df.shape)
df.info()


Shape: (4920, 134)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4920 entries, 0 to 4919
Columns: 134 entries, itching to Unnamed: 133
dtypes: float64(1), int64(132), object(1)
memory usage: 5.0+ MB


In [8]:
# Drop unnecessary column
df = df.drop(columns=["Unnamed: 133"])

print("New shape:", df.shape)


New shape: (4920, 133)


In [9]:
# Feature columns (all except last)
X = df.iloc[:, :-1]

# Target column (last column)
y = df.iloc[:, -1]

print("Features shape:", X.shape)
print("Target shape:", y.shape)


Features shape: (4920, 132)
Target shape: (4920,)


In [10]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_encoded = le.fit_transform(y)

print("Sample encoded labels:", y_encoded[:10])
print("Number of disease classes:", len(le.classes_))


Sample encoded labels: [15 15 15 15 15 15 15 15 15 15]
Number of disease classes: 41


In [11]:
import pickle

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(le, f)


In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded,
    test_size=0.2,
    random_state=42,
    stratify=y_encoded
)

print("Train size:", X_train.shape)
print("Test size:", X_test.shape)


Train size: (3936, 132)
Test size: (984, 132)


In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [14]:
rf_model = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)


In [16]:
rf_model.fit(X_train, y_train)


In [17]:
# Predict on test data
y_pred = rf_model.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)


Model Accuracy: 1.0


In [None]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       1.00      1.00      1.00        24
           2       1.00      1.00      1.00        24
           3       1.00      1.00      1.00        24
           4       1.00      1.00      1.00        24
           5       1.00      1.00      1.00        24
           6       1.00      1.00      1.00        24
           7       1.00      1.00      1.00        24
           8       1.00      1.00      1.00        24
           9       1.00      1.00      1.00        24
          10       1.00      1.00      1.00        24
          11       1.00      1.00      1.00        24
          12       1.00      1.00      1.00        24
          13       1.00      1.00      1.00        24
          14       1.00      1.00      1.00        24
          15       1.00      1.00      1.00        24
          16       1.00      1.00      1.00        24
          17       1.00    

In [None]:
import pickle

with open("disease_rf_model.pkl", "wb") as f:
    pickle.dump(rf_model, f)


In [None]:
import shutil

shutil.move(
    "disease_rf_model.pkl",
    "/content/drive/MyDrive/AI-Powered Disease Symptom Analyzer/disease_rf_model.pkl"
)


'/content/drive/MyDrive/AI-Powered Disease Symptom Analyzer/disease_rf_model.pkl'

In [None]:
import pickle

# Load model
with open("/content/drive/MyDrive/AI-Powered Disease Symptom Analyzer/disease_rf_model.pkl", "rb") as f:
    model = pickle.load(f)

# Load label encoder
with open("/content/drive/MyDrive/AI-Powered Disease Symptom Analyzer/label_encoder.pkl", "rb") as f:
    le = pickle.load(f)


In [None]:
symptom_list = X.columns.tolist()
symptom_index = {symptom: idx for idx, symptom in enumerate(symptom_list)}

# Quick check
list(symptom_index.items())[:5]


[('itching', 0),
 ('skin_rash', 1),
 ('nodal_skin_eruptions', 2),
 ('continuous_sneezing', 3),
 ('shivering', 4)]

In [None]:
import numpy as np

def predict_disease(selected_symptoms):
    # Create zero vector
    input_vector = np.zeros(len(symptom_list))

    # Set 1 for selected symptoms
    for symptom in selected_symptoms:
        if symptom in symptom_index:
            input_vector[symptom_index[symptom]] = 1

    # Predict
    predicted_label = model.predict([input_vector])[0]
    disease_name = le.inverse_transform([predicted_label])[0]

    return disease_name


In [None]:
# Example symptoms (you can change these)
test_symptoms = [
    "fever",
    "headache",
    "fatigue",
    "cough"
]

prediction = predict_disease(test_symptoms)
print("Predicted Disease:", prediction)


Predicted Disease: Bronchial Asthma




In [None]:
test_symptoms = [
    "chest_pain",
    "breathlessness",
    "sweating"
]

print("Predicted Disease:", predict_disease(test_symptoms))


Predicted Disease: Heart attack




In [14]:
X.columns.tolist()


['itching',
 'skin_rash',
 'nodal_skin_eruptions',
 'continuous_sneezing',
 'shivering',
 'chills',
 'joint_pain',
 'stomach_pain',
 'acidity',
 'ulcers_on_tongue',
 'muscle_wasting',
 'vomiting',
 'burning_micturition',
 'spotting_ urination',
 'fatigue',
 'weight_gain',
 'anxiety',
 'cold_hands_and_feets',
 'mood_swings',
 'weight_loss',
 'restlessness',
 'lethargy',
 'patches_in_throat',
 'irregular_sugar_level',
 'cough',
 'high_fever',
 'sunken_eyes',
 'breathlessness',
 'sweating',
 'dehydration',
 'indigestion',
 'headache',
 'yellowish_skin',
 'dark_urine',
 'nausea',
 'loss_of_appetite',
 'pain_behind_the_eyes',
 'back_pain',
 'constipation',
 'abdominal_pain',
 'diarrhoea',
 'mild_fever',
 'yellow_urine',
 'yellowing_of_eyes',
 'acute_liver_failure',
 'fluid_overload',
 'swelling_of_stomach',
 'swelled_lymph_nodes',
 'malaise',
 'blurred_and_distorted_vision',
 'phlegm',
 'throat_irritation',
 'redness_of_eyes',
 'sinus_pressure',
 'runny_nose',
 'congestion',
 'chest_pain',
