In [9]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
# Load the dataset with medicine descriptions
medicine_df = pd.read_excel("../datasets/Medicine_description.xlsx")
medicine_df

Unnamed: 0,Drug_Name,Reason,Description
0,A CN Gel(Topical) 20gmA CN Soap 75gm,Acne,Mild to moderate acne (spots)
1,A Ret 0.05% Gel 20gmA Ret 0.1% Gel 20gmA Ret 0...,Acne,A RET 0.025% is a prescription medicine that i...
2,ACGEL CL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
3,ACGEL NANO Gel 15gm,Acne,It is used to treat acne vulgaris in people 12...
4,Acleen 1% Lotion 25ml,Acne,treat the most severe form of acne (nodular ac...
...,...,...,...
22476,T Muce Ointment 5gm,Wound,used for treating warts
22477,Wokadine 10% Solution 100mlWokadine Solution 5...,Wound,used to soften the skin cells
22478,Wokadine M Onit 10gm,Wound,used for scars
22479,Wound Fix Solution 100ml,Wound,used for wounds


In [11]:
# Fill missing values in the 'Description' column with an empty string
medicine_df['Description'].fillna('', inplace=True)

# Define TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the medicine descriptions to TF-IDF vectors
tfidf_matrix = tfidf_vectorizer.fit_transform(medicine_df['Description'])

# Compute cosine similarity between medicine descriptions
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [12]:
def recommend_similar_medicines(med_name, top_n=5):
    # Find the index of the medicine in the dataset
    idx = medicine_df.index[medicine_df['Drug_Name'] == med_name].tolist()[0]
    
    # Get similarity scores of the medicine with other medicines
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the medicines based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get top N similar medicines (excluding the medicine itself)
    similar_medicines = [(medicine_df.iloc[sim_score[0]]['Drug_Name'], sim_score[1]) 
                         for sim_score in sim_scores[1:top_n+1]]
    
    return similar_medicines

In [13]:
# Example usage:
target_med_name = 'Acne UV Gel 60gm'
recommended_medicines = recommend_similar_medicines(target_med_name)
print(f"Medicines similar to '{target_med_name}':")
for med, similarity in recommended_medicines:
    print(f"{med} (Similarity: {similarity:.2f})")

Medicines similar to 'Acne UV Gel 60gm':
Acne UV Gel 60gm (Similarity: 1.00)
Acnerex Soap 75gm (Similarity: 1.00)
Acneril 1% Gel 10gmAcneril Tablet 10Acneril 0.10% Cream 20gm (Similarity: 1.00)
Acnezyl Gel(Topical) 10gm (Similarity: 1.00)
Acnicin Gel 15gmAcnicin 1/1% Solution 25ml (Similarity: 1.00)
