### **Imports**

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### **Factuality**

In [None]:
df_test = pd.read_csv('factuality_predictions_with_alexa.csv')

df_test['alexa_rank_quartile'] = pd.qcut(df_test['alexa_rank'], 4, labels=["Q1 (Most Popular)", "Q2", "Q3", "Q4 (Least Popular)"])

# Calculate accuracy per quartile
df_test['correct_prediction'] = df_test['factuality'] == df_test['predicted_factuality']
accuracy_by_quartile = df_test.groupby('alexa_rank_quartile')['correct_prediction'].mean()

# Bar plot of accuracy by quartile
plt.figure(figsize=(10, 6))
sns.barplot(x=accuracy_by_quartile.index, y=accuracy_by_quartile.values)
plt.title("Prediction Accuracy by Media Popularity Quartile")
plt.xlabel("Alexa Rank Quartile")
plt.ylabel("Prediction Accuracy")
plt.show()

### **Political Bias**

In [None]:
df_test_bias = pd.read_csv('bias_predictions_with_alexa.csv')

# Create quartiles for Alexa Rank
df_test_bias['alexa_rank_quartile'] = pd.qcut(df_test_bias['alexa_rank'], 4, labels=["Q1 (Most Popular)", "Q2", "Q3", "Q4 (Least Popular)"])

# Calculate accuracy per quartile
df_test_bias['correct_prediction'] = df_test_bias['bias_label_3_class'] == df_test_bias['predicted_bias']
accuracy_by_quartile = df_test_bias.groupby('alexa_rank_quartile')['correct_prediction'].mean()

# Bar plot of accuracy by quartile
plt.figure(figsize=(10, 6))
sns.barplot(x=accuracy_by_quartile.index, y=accuracy_by_quartile.values)
plt.title("Prediction Accuracy by Media Popularity Quartile")
plt.xlabel("Alexa Rank Quartile")
plt.ylabel("Prediction Accuracy")
plt.show()

### **Media Popularity: Bias & Factuality**

In [None]:
# Load datasets
df_test_factuality = pd.read_csv('factuality_predictions_with_alexa.csv')
df_test_bias = pd.read_csv('bias_predictions_with_alexa.csv')

# Factuality plot mappings
factuality_mapping = {'LOW': 0, 'MIXED': 1, 'HIGH': 2}
df_test_factuality['factuality_num'] = df_test_factuality['factuality'].map(factuality_mapping)
factuality_marker_styles = {'LOW': '.', 'MIXED': 'x', 'HIGH': '*'}
factuality_manual_order = ['Low - Correct', 'Low - Incorrect', 'Mixed - Correct', 'Mixed - Incorrect', 'High - Correct', 'High - Incorrect']

# Political bias plot mappings
bias_mapping = {'left': 0, 'center': 1, 'right': 2}
df_test_bias['bias_num'] = df_test_bias['bias_label_3_class'].map(bias_mapping)
bias_marker_styles = {'left': '.', 'center': 'x', 'right': '*'}
bias_manual_order = ['Left - Correct', 'Left - Incorrect', 'Center - Correct', 'Center - Incorrect', 'Right - Correct', 'Right - Incorrect']

# Add jitter
factuality_jitter = np.random.uniform(-0.1, 0.1, size=len(df_test_factuality))
bias_jitter = np.random.uniform(-0.1, 0.1, size=len(df_test_bias))

# Create the combined figure
fig, axes = plt.subplots(1, 2, figsize=(16, 7))  # Two plots side-by-side

### Political Bias Plot ###
for _, row in df_test_bias.iterrows():
    x = row['bias_num'] + bias_jitter[_]
    y = row['alexa_rank']
    predicted = row['predicted_bias']
    marker = bias_marker_styles[row['bias_label_3_class']]
    color = 'green' if row['bias_label_3_class'] == predicted else 'red'
    axes[0].scatter(x, y, marker=marker, color=color, s=75,
                    label=f"{row['bias_label_3_class'].capitalize()} - Correct" if color == 'green' else f"{row['bias_label_3_class'].capitalize()} - Incorrect")

axes[0].set_xticks(list(bias_mapping.values()))
axes[0].set_xticklabels(['Left', 'Center', 'Right'])
axes[0].set_yscale('log')
axes[0].set_xlabel('Political Bias Labels')
axes[0].set_ylabel('Alexa Rank (Log Scale)')
axes[0].set_title('(a) Political Bias vs. Alexa Rank')
axes[0].grid(True)

# Legend for bias
handles, labels = axes[0].get_legend_handles_labels()
by_label = dict(zip(labels, handles))  # Avoid duplicates
axes[0].legend([by_label[label] for label in bias_manual_order], bias_manual_order, loc='lower left')

### Factuality Plot ###
for _, row in df_test_factuality.iterrows():
    x = row['factuality_num'] + factuality_jitter[_]
    y = row['alexa_rank']
    predicted = row['predicted_factuality']
    marker = factuality_marker_styles[row['factuality']]
    color = 'green' if row['factuality'] == predicted else 'red'
    axes[1].scatter(x, y, marker=marker, color=color, s=75,
                    label=f"{row['factuality'].capitalize()} - Correct" if color == 'green' else f"{row['factuality'].capitalize()} - Incorrect")

axes[1].set_xticks(list(factuality_mapping.values()))
axes[1].set_xticklabels(['Low', 'Mixed', 'High'])
axes[1].set_yscale('log')
axes[1].set_xlabel('Factuality Labels')
axes[1].set_ylabel('Alexa Rank (Log Scale)')
axes[1].set_title('(b) Factuality vs. Alexa Rank')
axes[1].grid(True)

# Legend for factuality
handles, labels = axes[1].get_legend_handles_labels()
by_label = dict(zip(labels, handles))  # Avoid duplicates
axes[1].legend([by_label[label] for label in factuality_manual_order], factuality_manual_order, loc='lower right')

# Adjust layout and save the combined figure
plt.tight_layout()
plt.savefig('side_by_side_bias_factuality_with_labels.png', dpi=300)
plt.show()
