In [None]:
%load_ext autoreload
%autoreload 2

%cd '..'

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
sns.set_theme()

from load.utils import load_df_from_parquet
from preprocessing.utils import (
    load_event_comments,
)
from eda.constants import PARTIES_COLORS

In [None]:
embeddings = load_event_comments(
    theme="elections",
    event_name="us_elections_2016_with_embeddings",
)

In [None]:
user_embeddings = load_df_from_parquet(
    file_name="us_elections_2016_user_embeddings.parquet",
    date=None,
)

In [None]:
user_embeddings.groupby(by="party").agg("count")


In [None]:
user_embeddings_np = np.stack(user_embeddings["mean"])

In [None]:
# Generate PCA

pca = PCA(
    random_state=0,
    n_components=3,
)
# random sample

low_dim_embeddings = pca.fit_transform(user_embeddings_np)


In [None]:
fig = plt.figure()
ax = fig.add_subplot(projection="3d")

ax.scatter(
    low_dim_embeddings[:, 0],
    low_dim_embeddings[:, 1],
    low_dim_embeddings[:, 2],
    c=user_embeddings["party"].map(PARTIES_COLORS),
    alpha=0.3,
)

# sns.scatterplot(
#     x=low_dim_embeddings[:, 0],
#     y=low_dim_embeddings[:, 1],
#     hue=user_embeddings["party"],
#     palette=["blue", "red"],
#     alpha=0.5,
# )

plt.savefig("data/figures/pca_user_embeddings.pdf", bbox_inches="tight")

plt.show()


In [None]:
# Generate t-SNE

tsne = TSNE(
    random_state=0,
    n_components=3,
    metric="cosine",
    verbose=1,
)

low_dim_embeddings_tsne = tsne.fit_transform(user_embeddings_np)


In [None]:
fig = plt.figure()
ax = fig.add_subplot(projection="3d")

ax.scatter(
    low_dim_embeddings_tsne[:, 0],
    low_dim_embeddings_tsne[:, 1],
    low_dim_embeddings_tsne[:, 2],
    c=user_embeddings["party"].map(PARTIES_COLORS),
    alpha=0.3,
)
# sns.scatterplot(
#     x=low_dim_embeddings[:, 0],
#     y=low_dim_embeddings[:, 1],
#     # hue=user_embeddings["party"],
# )

# save figure

plt.savefig("data/figures/tsne_user_embeddings.pdf", bbox_inches="tight")

plt.show()
