In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme()
sns.set(rc={"figure.figsize": (20, 6)})

from polarization import get_polarization, get_polarization_by_time
from utils import load_event
from constants import OUTPUT_DIR, EVENTS, EVENTS_INFO, FIGURES_DIR

In [None]:
event_polarization = {}

for e in EVENTS:
    print(e)
    data = load_event(e)
    event_polarization[e] = get_polarization(e, data)

    with open(f"{OUTPUT_DIR}/{e}_polarization_leaveout.json", 'w') as f:
        json.dump(event_polarization[e], f)

    # Plot user polarization
    _, (pol_left, pol_right) = event_polarization[e]
    user_pols = pd.DataFrame(zip(pol_left + pol_right, ["left"] * len(pol_left) + [
                             "right"] * len(pol_left)), columns=["polarization", "affiliation"])

    ax = sns.boxplot(y="polarization", x="affiliation", data=user_pols)

    ax.set_ylabel("User polarization")

    plt.savefig(
        fname=f"{FIGURES_DIR}/{e}_user_pol.pdf",
        bbox_inches='tight', pad_inches=0, format="pdf"
    )

    plt.show()

In [None]:
pol_by_week_events = {}

for e in EVENTS:
    pol_by_week_events[e] = get_polarization_by_time(e, data, freq="week")
    pol_by_week_events[e].to_csv(f"{OUTPUT_DIR}/{e}_polarization_by_day.csv")

In [None]:
# Plot weekly

for e in EVENTS:
    ax = sns.lineplot(
        x=pol_by_week_events[e].created_utc, y=pol_by_week_events[e].pol, label="Polarization")
    sns.lineplot(data=pol_by_week_events[e], x="created_utc",
                 y="random_pol", label="Polarization with random user assignment")

    plt.axvline(EVENTS_INFO[e]["date"], linestyle="--", color="red",
                label=f"{EVENTS_INFO[e]['name']} date")

    plt.xlabel("Date")
    plt.ylabel("Polarization")
    plt.title("Weekly polarization score")
    plt.legend()
    plt.savefig(
        fname=f"{FIGURES_DIR}/{e}_pol_by_week.pdf",
        bbox_inches='tight', pad_inches=0, format="pdf"
    )
    plt.show()

In [None]:
pol_by_day_events = {}

for e in EVENTS:
    pol_by_day_events[e] = get_polarization_by_time(e, data, freq="day")
    pol_by_day_events[e].to_csv(f"{OUTPUT_DIR}/{e}_polarization_by_day.csv")

In [None]:
# Plot daily

for e in EVENTS:
    ax = sns.lineplot(
        x=pol_by_day_events[e].created_utc, y=pol_by_day_events[e].pol, label="Polarization")
    sns.lineplot(data=pol_by_day_events[e], x="created_utc",
                 y="random_pol", label="Polarization with random user assignment")

    plt.axvline(EVENTS_INFO[e]["date"], linestyle="--", color="red",
                label=f"{EVENTS_INFO[e]['name']} date")

    plt.xlabel("Date")
    plt.ylabel("Polarization")
    plt.title("Daily polarization score")
    plt.legend()
    plt.savefig(
        fname=f"{FIGURES_DIR}/{e}_pol_by_day.pdf",
        bbox_inches='tight', pad_inches=0, format="pdf"
    )
    plt.show()