In [1]:
import pandas as pd
import sys
import os

data_ingestion_path = os.path.abspath(os.path.join('..', 'Data_Ingestion')) 
sys.path.append(data_ingestion_path)

from model import AspectSentimentAnalyzer

In [2]:
df = pd.read_csv("../Data/Cleaned/cleaned.csv")
i = df.sample(10)

In [3]:
model = AspectSentimentAnalyzer()
model

Device set to use cpu
Device set to use cpu
Device set to use cpu
Device set to use cpu


<model.AspectSentimentAnalyzer at 0x7f84f2372e90>

In [6]:
df_pred = i['text'].apply(model.analyze_text).apply(pd.Series)

In [5]:
model.analyze_text("My trip ended up triple amount")

{'text': 'My trip ended up triple amount',
 'overall_sentiment': 'neutral',
 'overall_emotion': 'disappointment',
 'aspect_analysis': {'price': 'negative'},
 'churn_risk': 'low'}

In [8]:
final = pd.concat([i,df_pred],axis=1)

In [11]:
#final.to_csv("../Data/Cleaned/predicted.csv")

In [15]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import textwrap
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from collections import Counter
import ast

In [19]:
final = final.iloc[:, ~final.columns.duplicated(keep='first')]

#### Overall Sentiment

In [20]:
fig1 = px.pie(final, names='overall_sentiment', title='<b>Overall Sentiment Distribution</b>',
             color_discrete_sequence=px.colors.qualitative.Pastel)
fig1.update_traces(textposition='inside', textinfo='percent+label')
fig1.update_layout(uniformtext_minsize=12, uniformtext_mode='hide')
fig1.show()

#### Emotion Frequency

In [21]:
emotion_counts = final['overall_emotion'].value_counts().reset_index()
fig2 = px.line_polar(emotion_counts, r='count', theta='overall_emotion', 
                    line_close=True, title='<b>Emotion Frequency Analysis</b>')
fig2.update_traces(fill='toself')
fig2.show()

In [22]:
fig3 = px.box(final, x='rating', y='overall_sentiment', color='overall_sentiment',
             title='<b>Rating Distribution by Sentiment</b>',
             color_discrete_map={
                 'positive': '#2ecc71',
                 'negative': '#e74c3c',
                 'neutral': '#f39c12'
             })
fig3.update_layout(xaxis_title='Star Rating', yaxis_title='Sentiment')

In [35]:
from pywaffle import Waffle

# Prepare data
top_aspects = final['aspect'].value_counts().nlargest(5).index.tolist()
filtered_df = final[final['aspect'].isin(top_aspects)]
data = filtered_df.groupby(['aspect', 'sentiment']).size().unstack()

# Create plot
plt.figure(figsize=(12, 8))
for i, aspect in enumerate(top_aspects, 1):
    plt.subplot(2, 3, i)
    Waffle.make_waffle(
        data.loc[aspect].to_dict(),
        rows=5,
        colors=['#27ae60', '#e74c3c', '#f39c12'],
        title={'label': aspect, 'loc': 'left'}
    )
plt.suptitle('Sentiment Composition for Top 5 Aspects', y=1.02)
plt.tight_layout()
plt.show()

KeyError: 'aspect'

In [23]:
aspect_data = []
for _, row in final.iterrows():
    for aspect, sentiment in row['aspect_analysis'].items():
        aspect_data.append({'aspect': aspect, 'sentiment': sentiment})
aspect_df = pd.DataFrame(aspect_data)

# Create stacked bar chart
fig4 = px.histogram(aspect_df, x='aspect', color='sentiment',
                   title='<b>Sentiment Distribution by Aspect</b>',
                   barmode='stack',
                   color_discrete_map={
                       'positive': '#27ae60',
                       'negative': '#c0392b',
                       'neutral': '#bdc3c7'
                   })
fig4.update_layout(xaxis_title='Aspect', yaxis_title='Count')

In [31]:
import plotly.express as px

# Calculate churn percentages
churn_by_company = pd.crosstab(final['company'], final['churn_risk'], normalize='index') * 100
churn_by_company = churn_by_company.reset_index()

# Create diverging bar chart
fig = px.bar(churn_by_company, 
             x='company', 
             y='high',  # Focus on high churn risk percentage
             color='high',
             color_continuous_scale=['#2ecc71', '#f39c12', '#e74c3c'],
             range_color=[0, 100],
             title='<b>Percentage of High Churn Risk Reviews by Company</b>',
             labels={'high': 'High Churn Risk (%)'},
             text='high',
             height=600)

# Add reference line for average
avg_churn = churn_by_company['high'].mean()
fig.add_hline(y=avg_churn, line_dash="dot", 
              annotation_text=f"Industry Avg: {avg_churn:.1f}%", 
              annotation_position="top left")

# Customize layout
fig.update_layout(
    xaxis_title="Company",
    yaxis_title="Percentage of High Churn Risk Reviews",
    coloraxis_showscale=False,
    uniformtext_minsize=8,
    uniformtext_mode='hide'
)
fig.update_traces(texttemplate='%{text:.1f}%', textposition='outside')

fig.show()