In [17]:
import pandas as pd
from textblob import TextBlob
from fuzzywuzzy import fuzz

# File paths for comments and side effects
comments_file = 'cleaned_reddit_posts.csv'
side_effects_file = 'Updated_Side_Effects.csv'

# Load the comments data and side effects keywords
comments_df = pd.read_csv(comments_file)
side_effects_df = pd.read_csv(side_effects_file)

# Create a dictionary to store the count and comments for each side effect
side_effects_counts = {side_effect: {"count": 0, "comments": []} for side_effect in side_effects_df['side_effect']}

# Function: Analyze sentiment and return True if the sentiment is negative
def is_negative_sentiment(text):
    analysis = TextBlob(text)
    # Polarity < 0 is considered negative
    return analysis.sentiment.polarity < 0

# Preprocessing function: Remove punctuation and convert text to lowercase
def preprocess_text(text):
    # Remove punctuation and convert all letters to lowercase
    return ''.join(e.lower() for e in text if e.isalnum() or e.isspace())

# Fuzzy matching function to check if a keyword matches the text
def keyword_match(text, keywords):
    for keyword in keywords:
        if fuzz.partial_ratio(keyword.lower(), text.lower()) > 80:  # Fuzzy match with a ratio > 80
            return True
    return False

# Iterate through the comments
for idx, row in comments_df.iterrows():
    comment_text = preprocess_text(row['text'])  # Preprocess the comment text
    
    # Perform sentiment analysis, process only negative comments
    if is_negative_sentiment(comment_text):
        # Iterate through the side effects
        for _, se_row in side_effects_df.iterrows():
            side_effect = se_row['side_effect']
            
            # Extract the keywords related to the side effect, split by commas and preprocess
            keyword_columns = [col for col in se_row.index if col.endswith('keywords')]
            for keyword_col in keyword_columns:
                keywords = [preprocess_text(k) for k in se_row[keyword_col].split(',')] if pd.notnull(se_row[keyword_col]) else []
                
                # Check if any of the keywords match the comment (using fuzzy matching)
                if keyword_match(comment_text, keywords):
                    # Update the count and store the comment for the side effect
                    side_effects_counts[side_effect]["count"] += 1
                    side_effects_counts[side_effect]["comments"].append(row['text'])
                    break  # Avoid counting the same comment multiple times for one side effect

# Convert to DataFrame and sort by mention count
side_effects_list = [{"side_effect": side_effect, "count": data["count"], "comments": data["comments"]}
                     for side_effect, data in side_effects_counts.items() if data["count"] > 0]  # Only keep side effects mentioned > 0

side_effects_df_sorted = pd.DataFrame(side_effects_list).sort_values(by="count", ascending=False)

# Output the sorted side effects along with their mention count and related comments, skipping side effects with 0 mentions
for idx, row in side_effects_df_sorted.iterrows():
    print(f"Side Effect: {row['side_effect']}")
    print(f"Mentions: {row['count']}")
    print("Related Comments:")
    for comment in row['comments']:
        print(f"- {comment}")
    print("=" * 50)


Side Effect: Somnolence
Mentions: 4
Related Comments:
- The only way I sleep is when I drink and itâs absolutely horrible
- really only getting headaches and having a hard time sleeping
- Like up and down effect one minute I am drowsy next over stimulated
- and having a hard time with sleep so far..
Side Effect: Drug Ineffective
Mentions: 3
Related Comments:
- What were his suggestions? I have tried taking 2 weeks break to see if I could lower my tolerance level but it was not effective.
- Iâve been done with schooling for a few years now and have been working FT for a while
- Iâm not sure if telehealth practices can just do that 24/7, the doctors donât work 24/7, even though they might have customer service 24/7
Side Effect: Feeling Abnormal
Mentions: 3
Related Comments:
- Who is the manufacturer? I tried a weird manufacturer during the shortage because thatâs all my pharmacy had, and they were horrible
- I swear it makes me feel like I took molly my jaw start moving weird, 