In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme()

from eda.constants import FIGURES_DIR, FIG_SIZE
from polarization.utils import get_polarization, get_polarization_by_time
from preprocessing.utils import load_event_comments, load_event_vocab
from preprocessing.constants import OUTPUT_DIR, EVENTS, EVENTS_INFO

sns.set(rc={"figure.figsize": FIG_SIZE})


In [None]:
event_polarization = {}

for event_name, event_details in EVENTS.items():
    print(event_name)
    event_comments = load_event_comments(event_name)
    event_vocab = load_event_vocab(event_name)
    event_polarization[event_name] = get_polarization(
        event_comments,
        event_vocab,
    )

    with open(f"{OUTPUT_DIR}/{event_name}_leaveout_polarization.json", "w") as file:
        json.dump(event_polarization[event_name], file)

    # Plot user polarization
    _, (dem_polarization, rep_polarization) = event_polarization[event_name]

    user_pols = pd.DataFrame(
        zip(
            dem_polarization + rep_polarization,
            ["dem"] * len(dem_polarization) + ["rep"] * len(dem_polarization),
        ),
        columns=["polarization", "affiliation"],
    )

    ax = sns.boxplot(y="polarization", x="affiliation", data=user_pols)

    ax.set_ylabel("User polarization")

    plt.savefig(
        fname=f"{FIGURES_DIR}/{event_name}_user_polarization.pdf",
        bbox_inches="tight",
        pad_inches=0,
        format="pdf",
    )

    plt.show()


In [None]:
pol_by_week_events = {}

for event_name in EVENTS:
    event_comments = load_event_comments(event_name)
    event_vocab = load_event_vocab(event_name)
    pol_by_week_events[event_name] = get_polarization_by_time(
        event_comments,
        event_vocab,
        freq="week",
    )
    pol_by_week_events[event_name].to_csv(
        f"{OUTPUT_DIR}/{event_name}_polarization_by_week.csv"
    )


In [None]:
# Plot weekly

for event_name in EVENTS:
    ax = sns.lineplot(
        x=pol_by_week_events[event_name]["date"],
        y=pol_by_week_events[event_name]["polarization"],
        label="Polarization",
    )
    sns.lineplot(
        data=pol_by_week_events[event_name],
        x="date",
        y="random_polarization",
        label="Polarization with random user assignment",
    )

    plt.axvline(
        EVENTS_INFO[event_name]["date"],
        linestyle="--",
        color="red",
        label=f"{EVENTS_INFO[event_name]['name']} date",
    )

    plt.xlabel("Date")
    plt.ylabel("Polarization")
    plt.title("Weekly polarization score")
    plt.legend()
    plt.savefig(
        fname=f"{FIGURES_DIR}/{event_name}_polarization_by_week.pdf",
        bbox_inches="tight",
        pad_inches=0,
        format="pdf",
    )
    plt.show()


In [None]:
pol_by_day_events = {}

for event_name in EVENTS:
    event_comments = load_event_comments(event_name)
    event_vocab = load_event_vocab(event_name)
    pol_by_day_events[event_name] = get_polarization_by_time(
        event_comments, event_vocab, freq="day"
    )
    pol_by_day_events[event_name].to_csv(
        f"{OUTPUT_DIR}/{event_name}_polarization_by_day.csv"
    )


In [None]:
# Plot daily

for event_name in EVENTS:
    ax = sns.lineplot(
        x=pol_by_day_events[event_name]["date"],
        y=pol_by_day_events[event_name]["polarization"],
        label="Polarization",
    )
    sns.lineplot(
        data=pol_by_day_events[event_name],
        x="date",
        y="random_polarization",
        label="Polarization with random user assignment",
    )

    plt.axvline(
        EVENTS_INFO[event_name]["date"],
        linestyle="--",
        color="red",
        label=f"{EVENTS_INFO[event_name]['name']} date",
    )

    plt.xlabel("Date")
    plt.ylabel("Polarization")
    plt.title("Daily polarization score")
    plt.legend()
    plt.savefig(
        fname=f"{FIGURES_DIR}/{event_name}_polarization_by_day.pdf",
        bbox_inches="tight",
        pad_inches=0,
        format="pdf",
    )
    plt.show()
