<a href="https://colab.research.google.com/github/web-designer-coder/HomeDocAI-HackATone2025/blob/main/chapter_appendix-tools-for-deep-learning/jupyter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# ===============================
# Substitute for Generic Medicine Project
# Algorithms used: Naive Bayes + KNN + SVM
# ===============================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import NearestNeighbors
from sklearn.svm import SVC

# ===============================
# Step 1: Load Dataset
# ===============================
from google.colab import files
uploaded = files.upload()

df = pd.read_csv("medicine_dataset.csv")

print("Dataset loaded with shape:", df.shape)
print(df.head())

# ===============================
# Step 2: Preprocessing
# ===============================
# Convert Strength into numeric (remove "mg")
df['Strength_num'] = df['Strength'].str.replace(" mg", "").astype(float)

# Encode categorical columns
encoders = {}
for col in ['Category', 'Dosage Form', 'Indication', 'Manufacturer', 'Classification']:
    encoders[col] = LabelEncoder()
    df[col + "_enc"] = encoders[col].fit_transform(df[col])

# Features for training
X = df[['Category_enc', 'Dosage Form_enc', 'Strength_num', 'Indication_enc']]
y_category = df['Category_enc']             # For Naive Bayes (Category Prediction)
y_safety = df['Classification_enc']         # For SVM (Prescription vs OTC)

# Scale features for KNN + SVM
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ===============================
# Step 3: Train Models
# ===============================
# Naive Bayes
nb = MultinomialNB()
nb.fit(X, y_category)

# KNN (Similarity Search)
knn = NearestNeighbors(n_neighbors=6, metric='euclidean')
knn.fit(X_scaled)

# SVM (Safety Filter)
svm = SVC(kernel='linear', probability=True)
svm.fit(X_scaled, y_safety)

print("Models trained successfully!")

# ===============================
# Step 4: Substitute Finder Function
# ===============================
def find_substitutes(medicine_name):
    """Find substitutes for given medicine"""
    try:
        query = df[df['Name'] == medicine_name].iloc[0]
    except:
        return f"Medicine {medicine_name} not found in dataset."

    # Query features
    query_features = [[
        query['Category_enc'],
        query['Dosage Form_enc'],
        query['Strength_num'],
        query['Indication_enc']
    ]]
    query_scaled = scaler.transform(query_features)

    # Step 1: Predict category with Naive Bayes
    predicted_category = nb.predict(query_features)[0]

    # Step 2: Find similar medicines with KNN
    distances, indices = knn.kneighbors(query_scaled)
    similar_meds = df.iloc[indices[0]]

    # Step 3: Apply SVM safety filter
    safe_preds = svm.predict(scaler.transform(X.iloc[indices[0]]))
    final_subs = similar_meds[safe_preds == svm.predict(query_scaled)[0]]

    # Step 4: Filter by same predicted category
    final_subs = final_subs[final_subs['Category_enc'] == predicted_category]

    return final_subs[['Name', 'Category', 'Dosage Form', 'Strength', 'Manufacturer', 'Indication']]

# ===============================
# Step 5: Example Run
# ===============================
medicine_name = "Ibuprocillin"   # <-- replace with any medicine in dataset
substitutes = find_substitutes(medicine_name)

print("\nInput Medicine:", medicine_name)
print("Suggested Substitutes:\n", substitutes)


Saving medicine_dataset.csv to medicine_dataset.csv
Dataset loaded with shape: (50000, 7)
           Name      Category Dosage Form Strength  \
0   Acetocillin  Antidiabetic       Cream   938 mg   
1  Ibuprocillin     Antiviral   Injection   337 mg   
2    Dextrophen    Antibiotic    Ointment   333 mg   
3   Clarinazole    Antifungal       Syrup   362 mg   
4   Amoxicillin    Antifungal      Tablet   802 mg   

                          Manufacturer Indication    Classification  
0                     Roche Holding AG      Virus  Over-the-Counter  
1                          CSL Limited  Infection  Over-the-Counter  
2                    Johnson & Johnson      Wound      Prescription  
3                          AbbVie Inc.       Pain      Prescription  
4  Teva Pharmaceutical Industries Ltd.      Wound  Over-the-Counter  
Models trained successfully!

Input Medicine: Ibuprocillin
Suggested Substitutes:
                Name   Category Dosage Form Strength           Manufacturer  \
1   

