In [1]:
import pandas as pd

df = pd.read_csv('merged_symptoms_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Disease,Symptom
0,0,hypertensive disease,shortness_of_breath syncope sweat_sweatin...
1,1,diabetes,shortness_of_breath sweat_sweating_incre...
2,2,"depression mental , depressive disorder",sleeplessness wo...
3,3,"coronary arteriosclerosis ,coronary heart disease",pain_chest shortness_of_breath sweat_swea...
4,4,pneumonia,shortness_of_breath rale ...


In [2]:
df.columns

Index(['Unnamed: 0', 'Disease', 'Symptom'], dtype='object')

In [3]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7484 entries, 0 to 7483
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Disease  7484 non-null   object
 1   Symptom  7484 non-null   object
dtypes: object(2)
memory usage: 117.1+ KB


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df['Symptom'], df['Disease'], test_size = .25, shuffle = True, random_state=44)

In [6]:
# Import required libraries
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, classification_report

# 1. Multinomial Probabilistic Model (Naive Bayes)
multinomial_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', MultinomialNB())
])

multinomial_pipeline.fit(X_train, y_train)
multinomial_predictions = multinomial_pipeline.predict(X_test)

print("Multinomial Naive Bayes:")
print(f"Accuracy: {accuracy_score(y_test, multinomial_predictions)}")
print(classification_report(y_test, multinomial_predictions))

# 2. ExtraTree Classifier Model
extra_tree_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', ExtraTreesClassifier(random_state=42))
])

extra_tree_pipeline.fit(X_train, y_train)
extra_tree_predictions = extra_tree_pipeline.predict(X_test)

print("\nExtraTree Classifier:")
print(f"Accuracy: {accuracy_score(y_test, extra_tree_predictions)}")
print(classification_report(y_test, extra_tree_predictions))

# 3. Decision Tree Classifier Model
decision_tree_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', DecisionTreeClassifier(random_state=42))
])

decision_tree_pipeline.fit(X_train, y_train)
decision_tree_predictions = decision_tree_pipeline.predict(X_test)

print("\nDecision Tree Classifier:")
print(f"Accuracy: {accuracy_score(y_test, decision_tree_predictions)}")
print(classification_report(y_test, decision_tree_predictions))

# 4. SVM Classifier Model
svm_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LinearSVC())
])

svm_pipeline.fit(X_train, y_train)
svm_predictions = svm_pipeline.predict(X_test)

print("\nSVM Classifier:")
print(f"Accuracy: {accuracy_score(y_test, svm_predictions)}")
print(classification_report(y_test, svm_predictions))

Multinomial Naive Bayes:
Accuracy: 0.9182255478353821
                                                          precision    recall  f1-score   support

                 (vertigo) Paroymsal  Positional Vertigo       0.97      1.00      0.99        34
                                                    AIDS       0.93      1.00      0.97        28
                                                    Acne       1.00      1.00      1.00        37
                                     Alcoholic hepatitis       1.00      1.00      1.00        30
                                                 Allergy       1.00      1.00      1.00        34
                                     Alzheimer's disease       1.00      1.00      1.00         4
                                               Arthritis       1.00      1.00      1.00        32
                                        Bronchial Asthma       0.94      1.00      0.97        33
                                    Cervical spondylosis       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



ExtraTree Classifier:
Accuracy: 0.9652592196686264
                                                          precision    recall  f1-score   support

                 (vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        34
                                                    AIDS       1.00      1.00      1.00        28
                                                    Acne       1.00      1.00      1.00        37
                                     Alcoholic hepatitis       1.00      1.00      1.00        30
                                                 Allergy       1.00      1.00      1.00        34
                                     Alzheimer's disease       1.00      1.00      1.00         4
                                               Arthritis       1.00      1.00      1.00        32
                                        Bronchial Asthma       1.00      1.00      1.00        33
                                    Cervical spondylosis       1.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Decision Tree Classifier:
Accuracy: 0.848743987172635
                                                          precision    recall  f1-score   support

                 (vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        34
                                                    AIDS       1.00      1.00      1.00        28
                                                    Acne       1.00      1.00      1.00        37
                                     Alcoholic hepatitis       1.00      1.00      1.00        30
                                                 Allergy       1.00      1.00      1.00        34
                                     Alzheimer's disease       0.57      1.00      0.73         4
                                               Arthritis       1.00      1.00      1.00        32
                                        Bronchial Asthma       1.00      1.00      1.00        33
                                    Cervical spondylosis      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



SVM Classifier:
Accuracy: 0.9818278995189738
                                                          precision    recall  f1-score   support

                 (vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        34
                                                    AIDS       1.00      1.00      1.00        28
                                                    Acne       1.00      1.00      1.00        37
                                     Alcoholic hepatitis       1.00      1.00      1.00        30
                                                 Allergy       1.00      1.00      1.00        34
                                     Alzheimer's disease       1.00      1.00      1.00         4
                                               Arthritis       1.00      1.00      1.00        32
                                        Bronchial Asthma       1.00      1.00      1.00        33
                                    Cervical spondylosis       1.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Building Simple User Interaction

In [7]:
ds = pd.read_csv('Datasets/symptom_Description.csv')
ds.head(5)

Unnamed: 0,Disease,Description
0,Drug Reaction,An adverse drug reaction (ADR) is an injury ca...
1,Malaria,An infectious disease caused by protozoan para...
2,Allergy,An allergy is an immune system response to a f...
3,Hypothyroidism,"Hypothyroidism, also called underactive thyroi..."
4,Psoriasis,Psoriasis is a common skin disorder that forms...


In [8]:
ds.index=ds['Disease']
ds=ds.drop('Disease',axis=1)
ds.head()

Unnamed: 0_level_0,Description
Disease,Unnamed: 1_level_1
Drug Reaction,An adverse drug reaction (ADR) is an injury ca...
Malaria,An infectious disease caused by protozoan para...
Allergy,An allergy is an immune system response to a f...
Hypothyroidism,"Hypothyroidism, also called underactive thyroi..."
Psoriasis,Psoriasis is a common skin disorder that forms...


In [9]:
pr=pd.read_csv('Datasets/symptom_precaution.csv')
pr.head()

Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep
4,Psoriasis,wash hands with warm soapy water,stop bleeding using pressure,consult doctor,salt baths


In [10]:
pr = pr.fillna("")

pr['precautions']=""
pr['punc']=', '

for i in range(1,5):
    pr['s']=pr["Precaution_{}".format(i)]+pr['punc']
    pr['precautions']=pr['precautions']+pr['s']

for i in range(1,5):
    pr=pr.drop("Precaution_{}".format(i),axis=1)

pr=pr.drop(['s','punc'],axis=1)
pr.head()

Unnamed: 0,Disease,precautions
0,Drug Reaction,"stop irritation, consult nearest hospital, sto..."
1,Malaria,"Consult nearest hospital, avoid oily food, avo..."
2,Allergy,"apply calamine, cover area with bandage, , use..."
3,Hypothyroidism,"reduce stress, exercise, eat healthy, get prop..."
4,Psoriasis,"wash hands with warm soapy water, stop bleedin..."


In [11]:
pr.index=pr['Disease']
pr=pr.drop('Disease',axis=1)
pr.head()

Unnamed: 0_level_0,precautions
Disease,Unnamed: 1_level_1
Drug Reaction,"stop irritation, consult nearest hospital, sto..."
Malaria,"Consult nearest hospital, avoid oily food, avo..."
Allergy,"apply calamine, cover area with bandage, , use..."
Hypothyroidism,"reduce stress, exercise, eat healthy, get prop..."
Psoriasis,"wash hands with warm soapy water, stop bleedin..."


In [12]:
rm = pd.read_csv('recommended_medicines.csv')
rm.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,total_pred
0,,,mean
1,condition,drugName,
2,ADHD,Adderall,0.02342432391602323
3,ADHD,Adderall XR,0.015477133782930466
4,ADHD,Adzenys XR-ODT,0.0037711206823587418


In [13]:
rm.rename(columns={'Unnamed: 0': 'Disease', 'Unnamed: 1': 'drugName'}, inplace=True)
rm = rm.iloc[2:]
rm.head()

Unnamed: 0,Disease,drugName,total_pred
2,ADHD,Adderall,0.0234243239160232
3,ADHD,Adderall XR,0.0154771337829304
4,ADHD,Adzenys XR-ODT,0.0037711206823587
5,ADHD,Amantadine,0.0036568442980448
6,ADHD,Amphetamine,0.004938021333847


In [14]:
s=input('Please specify your symptoms : ')

# Few examples to try-
# Acne - skin_rash pus_filled_pimples blackheads scurringskin_rash pus_filled_pimples blackheads scurring
# Migraine - acidity indigestion headache blurred_and_distorted_vision excessive_hunger stiff_neck depression irritability visual_disturbances
# Malaria - chills vomiting high_fever sweating headache nausea muscle_pain

disease_of_patient=svm_pipeline.predict([s])[0]

if disease_of_patient in ds['Description']:
    description = ds['Description'][disease_of_patient]
    precaution = pr['precautions'][disease_of_patient]
    print('\nPredicted Disease: {0}\n\nDescription: {1}\n\nPrecautions: You should {2}'.format(disease_of_patient, description, precaution))
else:
    print('\nPredicted Disease: {0}'.format(disease_of_patient))

if disease_of_patient in rm["Disease"].values:
        recommended_drugs = (
            rm[rm["Disease"] == disease_of_patient]
            .sort_values(by="total_pred", ascending=False)
            .head(3)
        )
        print("\nRecommended Drugs:")
        for i, row in recommended_drugs.iterrows():
            print(f"{row['drugName']}")

Please specify your symptoms :  chills vomiting high_fever sweating headache nausea muscle_pain



Predicted Disease: Malaria

Description: An infectious disease caused by protozoan parasites from the Plasmodium family that can be transmitted by the bite of the Anopheles mosquito or by a contaminated needle or transfusion. Falciparum malaria is the most deadly type.

Precautions: You should Consult nearest hospital, avoid oily food, avoid non veg food, keep mosquitos out, 

Recommended Drugs:
Doxycycline
Coartem
Fansidar
