# Sentiment Analysis & Feedback System

This notebook implements a complete sentiment analysis pipeline for customer reviews.
It performs:
1. Data Loading & Cleaning
2. Language Translation
3. Duplicate Removal
4. Sentiment Analysis (BERT)
5. Aggregation & Reporting

In [None]:
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
from IPython.display import display, Image

# Add current directory to path to import pipeline
sys.path.append(os.getcwd())

# Import functions from our pipeline script
from sentiment_pipeline import (
    load_data, preprocess_reviews, translate_and_clean, 
    handle_duplicates, analyze_sentiment, aggregate_model_stats, 
    generate_feedback_report, plot_results
)

%matplotlib inline

In [None]:
# 1. Load Data
dataset_path = 'final_dataset.csv'
df = load_data(dataset_path)
display(df.head())

In [None]:
# 2. Preprocess (Clean & Filter)
df = preprocess_reviews(df)
print(f"Rows after cleaning: {len(df)}")

In [None]:
# 3. Translate & Re-clean
# This step might take time if using actual translation API
df = translate_and_clean(df)
display(df[['original_review', 'final_review']].head())

In [None]:
# 4. Handle Duplicates
df = handle_duplicates(df)

In [None]:
# 5. Sentiment Analysis
df = analyze_sentiment(df)
display(df[['model', 'final_review', 'sentiment_label', 'sentiment_score']].head())

In [None]:
# 6. Aggregate Stats
stats_df = aggregate_model_stats(df)

In [None]:
# 7. Generate Feedback Report
feedback_df = generate_feedback_report(stats_df)

In [None]:
# Display Summary Tables (Requirement 7)

print("=== Model Summary ===")
display(stats_df.head(10))

print("\n=== Feedback Report ===")
display(feedback_df.head(10))

In [None]:
# 8. Save Outputs
output_dir = 'outputs'
plots_dir = os.path.join(output_dir, 'plots')
os.makedirs(output_dir, exist_ok=True)

df.to_csv(os.path.join(output_dir, 'sentiment_output.csv'), index=False)
stats_df.to_csv(os.path.join(output_dir, 'per_model_summary.csv'), index=False)
feedback_df.to_csv(os.path.join(output_dir, 'feedback_report.csv'), index=False)

# Markdown report
md_path = os.path.join(output_dir, 'manufacturer_recommendations.md')
with open(md_path, 'w') as f:
    f.write("# Manufacturer Feedback Report\n\n")
    for _, row in feedback_df.iterrows():
        f.write(f"## Model: {row['model']}\n")
        f.write(f"**Summary**: {row['summary']}\n\n")
        f.write(f"**Strengths**: {row['strengths']}\n\n")
        f.write(f"**Weaknesses**: {row['weaknesses']}\n\n")
        f.write(f"**Recommendations**: {row['recommendations']}\n\n")
        f.write("---\n\n")

print(f"Outputs saved to {output_dir}")

In [None]:
# 9. Plots
try:
    plot_results(df, stats_df, plots_dir)
    
    # Display plots inline
    print("Global Sentiment Distribution:")
    display(Image(filename=os.path.join(plots_dir, 'global_sentiment_distribution.png')))
    
    print("Per-Model Sentiment Count:")
    display(Image(filename=os.path.join(plots_dir, 'per_model_sentiment_count.png')))
except Exception as e:
    print(f"Error plotting or displaying: {e}")