In [3]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import google.generativeai as genai
from tqdm import tqdm
import nltk

In [None]:
nltk.download('vader_lexicon')

df = pd.read_csv('data/products.csv')

sid = SentimentIntensityAnalyzer()

In [None]:
def get_sentiment(phrase):
    return sid.polarity_scores(phrase)['compound']

def analyze_reviews(reviews):
    vectorizer = CountVectorizer(ngram_range=(2, 3), min_df=2)
    X = vectorizer.fit_transform(reviews)
    
    counts = X.sum(axis=0).A1
    features = vectorizer.get_feature_names_out()
    df_ngrams = pd.DataFrame({'ngram': features, 'count': counts})
    
    df_ngrams['sentiment'] = df_ngrams['ngram'].apply(get_sentiment)
    
    positive = df_ngrams[df_ngrams['sentiment'] > 0].sort_values(by='count', ascending=False).head(20)
    negative = df_ngrams[df_ngrams['sentiment'] < 0].sort_values(by='count', ascending=False).head(20)
    
    return list(positive['ngram']), list(negative['ngram'])

In [None]:
genai.configure(api_key="AIzaSyC1x2Y3Z4A5B6C7D8E9F0G1H2I3J4K5L6M7")

def get_gemini_analysis(product, positives, negatives):
    prompt = f"""
    Product: {product}
    Positive Phrases: {positives}
    Negative Phrases: {negatives}

    Based on the above, try to under the positive and negative phrases from the customers and provide:
    1. Pros
    2. Cons
    3. Suggestions for improvement
    Return in JSON format with keys: Pros, Cons, Suggestions
    """
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content(prompt)
    
    try:
        result = eval(response.text)
    except:
        result = {"Pros": "", "Cons": "", "Suggestions": ""}
    
    return result

In [None]:
output_data = []
for product, group in tqdm(df.groupby('Product')):
    positives, negatives = analyze_reviews(group['Review'])
    gemini_result = get_gemini_analysis(product, positives, negatives)
    
    output_data.append({
        "Product": product,
        "Pros": gemini_result.get("Pros", ""),
        "Cons": gemini_result.get("Cons", ""),
        "Suggestions": gemini_result.get("Suggestions", "")
    })

output_df = pd.DataFrame(output_data)
output_df.to_csv("products_pros_cons_sug.csv", index=False)

print("Analysis complete. Saved to products_pros_cons_sug.csv")