queries to run
-overall sentitiment analysis
-categories (preprocessing) with positive, neutral, and negative percentile

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import os


def plot_sentiment_distribution(input_file, output_file):
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    
    df = pd.read_csv(input_file)
    sentiment_counts = df['sentiment'].value_counts()
    total = len(df)
    percentages = [count/total*100 for count in sentiment_counts]
    labels = [f'{sentiment}\n({percentage:.1f}%)' 
             for sentiment, percentage in zip(sentiment_counts.index, percentages)]
    
    colors = ['#3498db', '#9b59b6', '#1abc9c']

    plt.figure(figsize=(8, 6))
    plt.pie(sentiment_counts, 
           labels=labels,
           colors=colors,
           autopct='',
           startangle=90)
    
    plt.title('Overall Sentiment Distribution of Reviews for Electronic Products', pad=20)
    plt.axis('equal')
    plt.text(0, -1.2, f'Total Reviews: {total:,}', 
             ha='center', va='center')
    
    plt.savefig(output_file, format='png', bbox_inches='tight', dpi=300)
    plt.close()
    
    print(f"\nPlot saved: {output_file}")

In [10]:
input_path = "../data/processed/sentitiment_analysis_reviews_w_metadata.csv"
output_path = "../data/visuals/overall_sentiment_analysis.png"

plot_sentiment_distribution(input_path, output_path)


Plot saved: ../data/visuals/overall_sentiment_analysis.png


In [14]:
def plot_sentiment_by_rating(input_file, output_file):
    try:
        full_output_path = os.path.abspath(output_file)
        os.makedirs(os.path.dirname(full_output_path), exist_ok=True)
        
        df = pd.read_csv(input_file)
        ratings = sorted(df['overall'].unique())
        
        sentiments = ['positive', 'negative', 'neutral']
        colors = ['#3498db', '#9b59b6', '#1abc9c']
        
        plt.figure(figsize=(10, 6))
        bar_width = 0.25
        
        for idx, sentiment in enumerate(sentiments):
            counts = [len(df[(df['overall'] == rating) & (df['sentiment'] == sentiment)]) for rating in ratings]
            plt.bar([x + idx*bar_width for x in range(len(ratings))], counts, bar_width, 
                    label=sentiment, color=colors[idx])
        
        plt.title('Sentiment Distribution by Rating for Electronic Products', pad=20)
        plt.xlabel('Rating Score')
        plt.ylabel('Number of Reviews')
        plt.legend()
        
        plt.xticks([x + bar_width for x in range(len(ratings))], ratings)
        
        plt.savefig(full_output_path, dpi=300, bbox_inches='tight')
        plt.close()
        
        if os.path.exists(full_output_path):
            print(f"Plot successfully saved to: {full_output_path}")
        else:
            print("Failed to save the plot")
            
    except Exception as e:
        print(f"Error occurred: {str(e)}")

In [15]:
input_path = "../data/processed/sentitiment_analysis_reviews_w_metadata.csv"
output_path = "../data/visuals/sentiment_by_rating.png"
plot_sentiment_by_rating(input_path, output_path)

Plot successfully saved to: c:\Users\nisht\Desktop\final_project_idmp\Five-Star\data\visuals\sentiment_by_rating.png


In [16]:
def plot_average_rating_by_sentiment(input_file, output_file):
    try:
        full_output_path = os.path.abspath(output_file)
        os.makedirs(os.path.dirname(full_output_path), exist_ok=True)
        
        df = pd.read_csv(input_file)
        avg_rating = df.groupby('sentiment')['overall'].mean()
        
        plt.figure(figsize=(8, 6))
        avg_rating.plot(kind='bar', color=['#3498db', '#9b59b6', '#1abc9c'])
        
        plt.title('Average Rating by Sentiment for Electronic Products', pad=20)
        plt.xlabel('Sentiment')
        plt.ylabel('Average Rating')
        
        plt.savefig(full_output_path, dpi=300, bbox_inches='tight')
        plt.close()
        
        if os.path.exists(full_output_path):
            print(f"Plot successfully saved to: {full_output_path}")
        else:
            print("Failed to save the plot")
            
    except Exception as e:
        print(f"Error occurred: {str(e)}")

In [17]:
input_path = "../data/processed/sentitiment_analysis_reviews_w_metadata.csv"
output_path = "../data/visuals/average_rating_by_sentiment.png"
plot_average_rating_by_sentiment(input_path, output_path)

Plot successfully saved to: c:\Users\nisht\Desktop\final_project_idmp\Five-Star\data\visuals\average_rating_by_sentiment.png


In [25]:
def analyze_sentiment_by_category(input_file, output_file):
    try:
        full_output_path = os.path.abspath(output_file)
        os.makedirs(os.path.dirname(full_output_path), exist_ok=True)
        
        df = pd.read_csv(input_file)
        
        def extract_second_category(category_string):
            if pd.isna(category_string):
                return 'Unknown'
            categories = str(category_string).split('|')
            return categories[1] if len(categories) > 1 else 'Unknown'
        
        df['main_category'] = df['category'].apply(extract_second_category)
        
        category_sentiment = pd.crosstab(df['main_category'], df['sentiment'], normalize='index') * 100
        
        plt.figure(figsize=(12, 6))
        category_sentiment.plot(kind='bar', stacked=True, color=['#3498db', '#9b59b6', '#1abc9c'])
        
        plt.title('Sentiment Distribution by Product Category (%)', pad=20)
        plt.xlabel('Product Category')
        plt.ylabel('Percentage of Reviews')
        plt.legend(title='Sentiment')
        plt.xticks(rotation=45, ha='right')
        plt.grid(True, linestyle='--', alpha=0.7)
        
        plt.tight_layout()
        plt.savefig(full_output_path, dpi=300, bbox_inches='tight')
        plt.close()
        
        if os.path.exists(full_output_path):
            print(f"Plot saved: {full_output_path}")
            
    except Exception as e:
        print(f"Error occurred: {str(e)}")

In [26]:
input_path = "../data/processed/sentitiment_analysis_reviews_w_metadata.csv"
output_path = "../visuals/sentiment_by_category.png"
analyze_sentiment_by_category(input_path, output_path)

Plot saved: c:\Users\nisht\Desktop\final_project_idmp\Five-Star\visuals\sentiment_by_category.png


<Figure size 1200x600 with 0 Axes>