In [213]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
import joblib

print("Libraries imported successfully!")

Libraries imported successfully!


In [214]:
# Load dataset
df = pd.read_csv("mDataSet.csv")
print("Dataset Shape:", df.shape)
print("\nFirst 5 Rows:\n", df.head())
print("\nMissing Values:\n", df.isnull().sum())

# Handle missing values
numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = df.select_dtypes(include=['object']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].median())
df[categorical_cols] = df[categorical_cols].fillna(df[categorical_cols].mode().iloc[0])
print("\nMissing Values After Handling:\n", df.isnull().sum())

# Separate features and labels
features = df.drop(columns=["Recommended_Drug", "Tablets_Per_Day"])
labels = df[["Recommended_Drug", "Tablets_Per_Day"]].copy()

# Encode categorical columns
le_region = LabelEncoder()
le_gender = LabelEncoder()
le_drug = LabelEncoder()
le_tablets = LabelEncoder()

features["Region"] = le_region.fit_transform(features["Region"])
features["Gender"] = le_gender.fit_transform(features["Gender"])
labels["Recommended_Drug"] = le_drug.fit_transform(labels["Recommended_Drug"].astype(str))
labels["Tablets_Per_Day"] = le_tablets.fit_transform(labels["Tablets_Per_Day"].astype(int))

# Split data
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
print("\nTraining Features Shape:", X_train.shape)
print("Testing Features Shape:", X_test.shape)

Dataset Shape: (10000, 24)

First 5 Rows:
    fever  chills  sweats  headache  nausea  vomiting  body_aches  \
0      0       0       0         0       0         0           0   
1      0       0       0         0       0         0           0   
2      0       0       1         0       1         1           0   
3      0       0       0         0       0         0           0   
4      0       1       0         1       0         0           0   

   impaired_consciousness  prostration  convulsions  ...  severe_anemia  Age  \
0                       1            0            0  ...              0   31   
1                       1            1            0  ...              0   41   
2                       0            0            0  ...              0   23   
3                       0            0            0  ...              0   59   
4                       0            0            0  ...              0   45   

   Weight              Region  Gender  Pregnant  G6PD_Deficiency  \

In [215]:
# Train the model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
model = MultiOutputClassifier(rf)
model.fit(X_train, y_train)

# Evaluate
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print("Training Accuracy:", train_score)
print("Test Accuracy:", test_score)

# Detailed evaluation
y_pred = model.predict(X_test)
y_pred_drug = y_pred[:, 0]
y_pred_tablets = y_pred[:, 1]
print("\nDrug Accuracy:", accuracy_score(y_test["Recommended_Drug"], y_pred_drug))
print("Drug Classification Report:\n", classification_report(y_test["Recommended_Drug"], y_pred_drug, target_names=le_drug.classes_))
print("Tablets Accuracy:", accuracy_score(y_test["Tablets_Per_Day"], y_pred_tablets))
print("Tablets Classification Report:\n", classification_report(y_test["Tablets_Per_Day"], y_pred_tablets, target_names=le_tablets.classes_.astype(str)))

Training Accuracy: 1.0
Test Accuracy: 0.821

Drug Accuracy: 0.8435
Drug Classification Report:
                          precision    recall  f1-score   support

Artemether-lumefantrine       0.60      0.55      0.57       382
   Atovaquone-proguanil       0.48      0.53      0.51       302
  Chloroquine phosphate       1.00      1.00      1.00       713
          IV artesunate       1.00      1.00      1.00       603

               accuracy                           0.84      2000
              macro avg       0.77      0.77      0.77      2000
           weighted avg       0.85      0.84      0.84      2000

Tablets Accuracy: 0.841
Tablets Classification Report:
               precision    recall  f1-score   support

           1       0.83      0.87      0.85      1015
           2       0.86      0.81      0.83       985

    accuracy                           0.84      2000
   macro avg       0.84      0.84      0.84      2000
weighted avg       0.84      0.84      0.84      2000

In [216]:
joblib.dump(model, "malaria_model.pkl")
joblib.dump(le_region, "le_region.pkl")
joblib.dump(le_gender, "le_gender.pkl")
joblib.dump(le_drug, "le_drug.pkl")
joblib.dump(le_tablets, "le_tablets.pkl")
print("Model and encoders saved!")

Model and encoders saved!


In [217]:
import pandas as pd
import joblib

# Load model and encoders
model = joblib.load("malaria_model.pkl")
le_region = joblib.load("le_region.pkl")
le_gender = joblib.load("le_gender.pkl")
le_drug = joblib.load("le_drug.pkl")
le_tablets = joblib.load("le_tablets.pkl")

def calculate_dosage(drug, weight):
    if drug == "Artemether-lumefantrine":
        if weight < 15:
            return "1 tablet per dose"
        elif weight < 25:
            return "2 tablets per dose"
        elif weight < 35:
            return "3 tablets per dose"
        else:
            return "4 tablets per dose"
    elif drug == "IV artesunate":
        return f"{2.4 * weight} mg per dose"
    elif drug == "Chloroquine phosphate":
        initial = 10 * weight
        subsequent = 5 * weight
        return f"Initial: {initial} mg base, Subsequent: {subsequent} mg base per dose"
    elif drug == "Atovaquone-proguanil":
        if weight < 8:
            return "2 pediatric tablets (62.5 mg atovaquone/25 mg proguanil) per dose"
        elif weight < 10:
            return "3 pediatric tablets (62.5 mg atovaquone/25 mg proguanil) per dose"
        elif weight < 20:
            return "1 adult tablet (250 mg atovaquone/100 mg proguanil) per dose"
        elif weight < 30:
            return "2 adult tablets (250 mg atovaquone/100 mg proguanil) per dose"
        elif weight < 40:
            return "3 adult tablets (250 mg atovaquone/100 mg proguanil) per dose"
        else:
            return "4 adult tablets (250 mg atovaquone/100 mg proguanil) per dose"
    else:
        return "Dosage not defined"

print("Model loaded!")

Model loaded!


In [218]:
# New test cases with detailed patient scenarios

test_cases = [
    # Case 1: Adult male, uncomplicated malaria, Sub-Saharan Africa
    pd.DataFrame({
        "fever": [1], "chills": [1], "sweats": [0], "headache": [1], "nausea": [0], 
        "vomiting": [0], "body_aches": [0], "impaired_consciousness": [0], 
        "prostration": [0], "convulsions": [0], "deep_breathing": [0], 
        "respiratory_distress": [0], "abnormal_bleeding": [0], "jaundice": [0], 
        "severe_anemia": [0], "Age": [30], "Weight": [70], 
        "Region": le_region.transform(["Sub-Saharan Africa"]), 
        "Gender": le_gender.transform(["Male"]), "Pregnant": [0], 
        "G6PD_Deficiency": [0], "Previous_Medications": [0]
    }),

    # Case 2: Child, severe malaria, Papua New Guinea
    pd.DataFrame({
        "fever": [0], "chills": [0], "sweats": [0], "headache": [0], "nausea": [0], 
        "vomiting": [0], "body_aches": [0], "impaired_consciousness": [1], 
        "prostration": [0], "convulsions": [1], "deep_breathing": [0], 
        "respiratory_distress": [0], "abnormal_bleeding": [0], "jaundice": [0], 
        "severe_anemia": [0], "Age": [5], "Weight": [15], 
        "Region": le_region.transform(["Papua New Guinea"]), 
        "Gender": le_gender.transform(["Female"]), "Pregnant": [0], 
        "G6PD_Deficiency": [0], "Previous_Medications": [0]
    }),

    # Case 3: Pregnant woman, uncomplicated malaria, Haiti
    pd.DataFrame({
        "fever": [1], "chills": [0], "sweats": [1], "headache": [0], "nausea": [1], 
        "vomiting": [0], "body_aches": [0], "impaired_consciousness": [0], 
        "prostration": [0], "convulsions": [0], "deep_breathing": [0], 
        "respiratory_distress": [0], "abnormal_bleeding": [0], "jaundice": [0], 
        "severe_anemia": [0], "Age": [25], "Weight": [10], 
        "Region": le_region.transform(["Haiti"]), 
        "Gender": le_gender.transform(["Female"]), "Pregnant": [1], 
        "G6PD_Deficiency": [0], "Previous_Medications": [0]
    })
]

# Confirm that the test cases are defined
print("Test cases defined!")


Test cases defined!


In [219]:
for i, case in enumerate(test_cases, 1):
    prediction = model.predict(case)
    pred_drug = le_drug.inverse_transform([prediction[0][0]])[0]
    pred_tablets = le_tablets.inverse_transform([prediction[0][1]])[0]
    dosage = calculate_dosage(pred_drug, case["Weight"][0])
    print(f"\nTest Case {i}:")
    print(f"Recommended Drug: {pred_drug}")
    print(f"Tablets Per Day: {pred_tablets}")
    print(f"Dosage: {dosage}")


Test Case 1:
Recommended Drug: Artemether-lumefantrine
Tablets Per Day: 2
Dosage: 4 tablets per dose

Test Case 2:
Recommended Drug: IV artesunate
Tablets Per Day: 2
Dosage: 36.0 mg per dose

Test Case 3:
Recommended Drug: Chloroquine phosphate
Tablets Per Day: 1
Dosage: Initial: 100 mg base, Subsequent: 50 mg base per dose


In [220]:
# Add to Cell 2 of TestMalariaModel.ipynb
df = pd.read_csv("mDataSet.csv")
print(df["Recommended_Drug"].value_counts())

Recommended_Drug
Chloroquine phosphate      3514
IV artesunate              2967
Artemether-lumefantrine    1860
Atovaquone-proguanil       1659
Name: count, dtype: int64
