In [1]:
%load_ext autoreload
%autoreload 2

# %cd '/workspaces/polarization_reddit'
%cd '/home/xavi_oliva/Documents/Github/polarization_reddit'

/home/xavi_oliva/Documents/Github/polarization_reddit


In [2]:
import json

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set_theme()

from eda.constants import FIGURES_DIR, FIG_SIZE
from polarization.utils import calculate_polarization, calculate_polarization_by_time
from preprocessing.utils import load_event_comments, load_event_vocab
from preprocessing.constants import OUTPUT_DIR, EVENTS_INFO

sns.set(rc={"figure.figsize": FIG_SIZE})


In [3]:
EVENT_KEY = "us_elections_2016"
EVENT_DETAILS = EVENTS_INFO[EVENT_KEY]

print(EVENT_DETAILS)

{'name': '2016 US presidential election', 'date': Timestamp('2016-11-08 00:00:00')}


In [4]:
print(EVENT_DETAILS["name"])

event_comments = load_event_comments(EVENT_KEY)
event_vocab = load_event_vocab(EVENT_KEY)

2016 US presidential election


In [49]:
event_polarization = calculate_polarization(
    event_comments,
    event_vocab,
)

with open(f"{OUTPUT_DIR}/{EVENT_KEY}_leaveout_polarization.json", "w") as file:
    json.dump(event_polarization, file)

# Plot user polarization
_, (dem_polarization, rep_polarization) = event_polarization

user_pols = pd.DataFrame(
    zip(
        dem_polarization + rep_polarization,
        ["dem"] * len(dem_polarization) + ["rep"] * len(dem_polarization),
    ),
    columns=["polarization", "affiliation"],
)

ax = sns.boxplot(y="polarization", x="affiliation", data=user_pols)

ax.set_xlabel("Affiliation")
ax.set_ylabel("User polarization")
ax.set_title(EVENT_DETAILS["name"])

plt.savefig(
    fname=f"{FIGURES_DIR}/{EVENT_KEY}_user_polarization.pdf",
    bbox_inches="tight",
    pad_inches=0,
    format="pdf",
)

plt.show()


113254
113777
113254
113777


  return dem_q / (dem_q + rep_q)
  self._set_arrayXarray(i, j, x)


113254
113777
113253
113777
113244
113777
113244
113777
113244
113777
113244
113777
113244
113777
113243
113777
113243
113777
113243
113777
113241
113777
113237
113777
113237
113777
113237
113777
113237
113777
113237
113777
113237
113777
113237
113777
113237
113777
113236
113777
113235
113777
113235
113777
113230
113777
113230
113777
113228
113777
113228
113777
113228
113777
113227
113777
113227
113777
113225
113777
113225
113777
113225
113777
113225
113777
113225
113777
113225
113777
113225
113777
113225
113777
113225
113777
113223
113777
113223
113777
113222
113777
113222
113777
113219
113777
113219
113777
113213
113777
113212
113777
113212
113777
113180
113777
113180
113777
113178
113777
113172
113777
113172
113777
113172
113777
113172
113777
113145
113777
113145
113777
113136
113777
113136
113777
113135
113777
113133
113777
113133
113777
113126
113777
113126
113777
113126
113777
113126
113777
113126
113777
113126
113777
113126
113777
113126
113777
113126
113777
113109
113777
113109

KeyboardInterrupt: 

In [None]:
pol_by_week_events = calculate_polarization_by_time(
    event_comments,
    event_vocab,
    freq="week",
)

pol_by_week_events.to_csv(f"{OUTPUT_DIR}/{EVENT_KEY}_polarization_by_week.csv")


In [None]:
# Plot weekly

ax = sns.lineplot(
    x=pol_by_week_events["date"],
    y=pol_by_week_events["polarization"],
    label="Polarization",
)
sns.lineplot(
    data=pol_by_week_events,
    x="date",
    y="random_polarization",
    label="Polarization with random user assignment",
)

plt.axvline(
    EVENTS_INFO[EVENT_KEY]["date"],
    linestyle="--",
    color="red",
    label=f"{EVENTS_INFO[EVENT_KEY]['name']} date",
)

plt.xlabel("Date")
plt.ylabel("Polarization")
plt.title("Weekly polarization score")
plt.legend()

plt.savefig(
    fname=f"{FIGURES_DIR}/{EVENT_KEY}_polarization_by_week.pdf",
    bbox_inches="tight",
    pad_inches=0,
    format="pdf",
)
plt.show()


In [None]:
pol_by_day_events = calculate_polarization_by_time(
    event_comments,
    event_vocab,
    freq="day",
)
pol_by_day_events.to_csv(f"{OUTPUT_DIR}/{EVENT_KEY}_polarization_by_day.csv")


In [None]:
# Plot daily

ax = sns.lineplot(
    x=pol_by_day_events["date"],
    y=pol_by_day_events["polarization"],
    label="Polarization",
)
sns.lineplot(
    data=pol_by_day_events,
    x="date",
    y="random_polarization",
    label="Polarization with random user assignment",
)

plt.axvline(
    EVENTS_INFO[EVENT_KEY]["date"],
    linestyle="--",
    color="red",
    label=f"{EVENTS_INFO[EVENT_KEY]['name']} date",
)

plt.xlabel("Date")
plt.ylabel("Polarization")
plt.title("Daily polarization score")
plt.legend()

plt.savefig(
    fname=f"{FIGURES_DIR}/{EVENT_KEY}_polarization_by_day.pdf",
    bbox_inches="tight",
    pad_inches=0,
    format="pdf",
)
plt.show()
