# DEMO - Codes for the Graphs

## Percantage of Conversations Participated by the Airlines

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns
import os
import datetime 

# configuration
REPLIED_TOPICS_CSV_PATH = r"C:/Users/20243898/Downloads/tweets_classified_Lufthansa.csv"
NOT_REPLIED_TOPICS_CSV_PATH = r"C:/Users/20243898/Downloads/tweets_wo_reply_classified.csv"

TARGET_YEAR = 2019  
TARGET_MONTH = 5    

month_name_for_print = datetime.datetime(TARGET_YEAR, TARGET_MONTH, 1).strftime('%B %Y')
print(f"\nAnalyzing data for: {month_name_for_print}")

PLOTS_OUTPUT_DIR = os.path.join(os.path.expanduser('~'), 'Downloads', f'lufthansa_topic_reply_monthly_{TARGET_YEAR}_{TARGET_MONTH:02d}')
os.makedirs(PLOTS_OUTPUT_DIR, exist_ok=True)
print(f"Replied CSV: {REPLIED_TOPICS_CSV_PATH}\nNot Replied CSV: {NOT_REPLIED_TOPICS_CSV_PATH}\nPlots to: {PLOTS_OUTPUT_DIR}")

TOPIC_COLUMN_NAME_IN_CSV = 'primary_topic'

DATE_COLUMN_NAME_IN_CSV = 'created_at' 

sns.set_style("whitegrid"); pd.options.display.float_format = '{:.1f}'.format

# load data
def load_filter_and_count_topics(file_path, topic_col, date_col, year, month):
    df_temp = pd.DataFrame()
    if not os.path.exists(file_path): print(f"ERROR: File not found: {file_path}"); return pd.Series(dtype='int64')
    try:
        df_temp = pd.read_csv(file_path, low_memory=False)
        if topic_col not in df_temp.columns: print(f"ERROR: Topic col '{topic_col}' missing in {file_path}"); return pd.Series(dtype='int64')
        if date_col not in df_temp.columns: print(f"ERROR: Date col '{date_col}' missing in {file_path}"); return pd.Series(dtype='int64')
        
        df_temp[date_col] = pd.to_datetime(df_temp[date_col], errors='coerce')
        if df_temp[date_col].isnull().all() and len(df_temp) > 0: print(f"WARN: All dates NaT in {file_path}"); return pd.Series(dtype='int64')
            
        df_filtered = df_temp[df_temp[date_col].notna() & (df_temp[date_col].dt.year == year) & (df_temp[date_col].dt.month == month)]
        print(f"  {os.path.basename(file_path)}: Loaded {len(df_temp)}, Filtered to {len(df_filtered)} for {year}-{month:02d}")
        if df_filtered.empty: return pd.Series(dtype='int64')
        return df_filtered[topic_col].fillna('Unknown Topic').astype('category').value_counts()
    except Exception as e: print(f"Error processing {file_path}: {e}"); return pd.Series(dtype='int64')

replied_counts = load_filter_and_count_topics(REPLIED_TOPICS_CSV_PATH, TOPIC_COLUMN_NAME_IN_CSV, DATE_COLUMN_NAME_IN_CSV, TARGET_YEAR, TARGET_MONTH).rename('Lufthansa Replied')
not_replied_counts = load_filter_and_count_topics(NOT_REPLIED_TOPICS_CSV_PATH, TOPIC_COLUMN_NAME_IN_CSV, DATE_COLUMN_NAME_IN_CSV, TARGET_YEAR, TARGET_MONTH).rename('Lufthansa Did Not Reply')

# plot graph
if not replied_counts.empty or not not_replied_counts.empty:
    df_plot = pd.concat([replied_counts, not_replied_counts], axis=1).fillna(0).astype(int)
    df_plot['total'] = df_plot.sum(axis=1)
    df_plot = df_plot.sort_values('total', ascending=False).drop(columns='total')
    
    print(f"\nCombined Counts for {month_name_for_print}:\n", df_plot)
    if not df_plot.empty and df_plot.sum().sum() > 0 : # Check if there is any data to plot
        plot_df_final = df_plot.head(15)[['Lufthansa Replied', 'Lufthansa Did Not Reply']]
        if not plot_df_final.empty and plot_df_final.sum().sum() > 0:
            ax = plot_df_final.plot(kind='bar',stacked=True,figsize=(14,8),color={'Lufthansa Replied':'green','Lufthansa Did Not Reply':'red'})
            plt.title(f'Lufthansa Reply Status by Topic - {month_name_for_print} (Top 15)', fontsize=14)
            plt.ylabel('Number of Tweets', fontsize=12); plt.xlabel('Topic Category', fontsize=12)
            plt.xticks(rotation=45,ha="right",fontsize=10); plt.yticks(fontsize=10)
            plt.legend(title='Response Status',loc='upper right')
            for c in ax.containers: ax.bar_label(c,labels=[f'{int(v.get_height()):,}' if v.get_height()>0 else '' for v in c],label_type='center',fontsize=8,color='white',weight='bold')
            plt.tight_layout(); plt.savefig(os.path.join(PLOTS_OUTPUT_DIR,f'lh_reply_status_{TARGET_YEAR}_{TARGET_MONTH:02d}.png')); plt.show()
        else: print(f"No data to plot for top topics in {month_name_for_print}.")
    else: print(f"No aggregated data with counts > 0 for {month_name_for_print}.")
else: print(f"No data processed for {month_name_for_print}. Check CSVs or filters.")
print(f"\n--- Analysis for {month_name_for_print} Complete ---")


Analyzing data for: May 2019
Replied CSV: C:/Users/20243898/Downloads/tweets_classified_Lufthansa.csv
Not Replied CSV: C:/Users/20243898/Downloads/tweets_wo_reply_classified.csv
Plots to: C:\Users\20243898\Downloads\lufthansa_topic_reply_monthly_2019_05


  df_temp[date_col] = pd.to_datetime(df_temp[date_col], errors='coerce')


  tweets_classified_Lufthansa.csv: Loaded 51433, Filtered to 1310 for 2019-05


  df_temp[date_col] = pd.to_datetime(df_temp[date_col], errors='coerce')


  tweets_wo_reply_classified.csv: Loaded 82790, Filtered to 1035 for 2019-05

Combined Counts for May 2019:
                               Lufthansa Replied  Lufthansa Did Not Reply
primary_topic                                                           
contact / reachability                      588                      290
other                                       304                      461
baggage / luggage issues                    191                       77
seat / upgrade issues                       105                       94
app / website issues                         74                       32
flight delay or cancellation                 27                       69
refund or compensation                       21                       12
