In [1]:
import pandas as pd
import numpy as np
import pickle
import json
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from scipy.spatial.distance import cdist

from plot_text import header, description
from call_backs import input_callback, selected_code
import bokeh
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, CustomJS, Slider, TapTool, TextInput
from bokeh.palettes import Category20
from bokeh.transform import linear_cmap, transform
from bokeh.io import output_file, show, output_notebook
from bokeh.plotting import figure
from bokeh.models import RadioButtonGroup, TextInput, Div, Paragraph
from bokeh.layouts import column, widgetbox, row, layout
from bokeh.layouts import column
import random

In [2]:
dat = pd.read_csv("./data.csv", sep=",(?=\S)", engine="python")
id2name = {}
id_lst = []
id2num_of_followed = {}
for idx, row in dat.iterrows():
    user_id = row["id"].strip('"')
    id_lst.append(user_id)
    user_name = row["screenName"].strip('"')
    num_of_followed = row["followersCount"]
    id2name[user_id] = user_name
    id2num_of_followed[user_id] = num_of_followed
del dat

user2vec = {}
with open("./node2vec_50.emb", "r", encoding="utf_8") as f:
    for line in f.readlines()[1:]:
        new = line.strip("\n").split(" ")
        txt = new[0]
        vec = [float(x) for x in new[1:]]
        user2vec[txt] = np.array(vec)
f.close()

df = {"u_id":[], "C-": []}
with open("./full_neighbors.txt", "r", encoding="utf_8") as f:
    i = 1
    for l in f:
        influencer, neighbors = l.strip("\n").split("\t")
        df["u_id"].append(influencer)
        df["C-"].append(i)
        for neighbor in neighbors.split(","):
            if neighbor not in df["u_id"]:
                df["u_id"].append(neighbor)
                df["C-"].append(i)
        i += 1
        if i > 20:
            break
f.close()
    
df["u_name"] = [id2name[x] for x in df["u_id"]]
df["follow_cnt"] = [id2num_of_followed[x] for x in df["u_id"]]
df = pd.DataFrame(df)
X = []
for idx, row in df.iterrows():
    X.append(user2vec[row["u_id"]])
X = np.array(X)
print(X.shape)

(21177, 50)


In [3]:
with open("./neighbor_sentiments.txt", "r", encoding="utf_8") as f:
    line = f.readlines()[0]
    id2sents = eval(line)
    sents = []
    non_sent_cnt = 0
    for u_id in df["u_id"]:
        if u_id not in id2sents:
            non_sent_cnt += 1
        sents.append(id2sents.get(u_id, "fun"))
df["sent"] = sents
print(non_sent_cnt)

7495


In [4]:
# tsne = TSNE(verbose=1, perplexity=50, n_iter=1000, random_state=42)
# X_emb = tsne.fit_transform(X)
# pickle.dump(X_emb,open("./tsne_emb.pickle", "wb"))
X_emb = pickle.load(open("./tsne_emb.pickle", "rb"))

In [5]:
output_notebook()

source = ColumnDataSource(data=dict(x=X_emb[:,0], y=X_emb[:,1],
                                    x_backup = X_emb[:,0],y_backup = X_emb[:,1], 
                                    desc= df["C-"],
                                    user_id = df["u_id"], 
                                    user_name = df["u_name"],
                                    follow_cnt = df["follow_cnt"],
                                    sentiment = df["sent"],
                                    labels = ["C-" + str(x) for x in df["C-"]]))

hover = HoverTool(tooltips=[("user_id", "@user_id{safe}"),
                            ("user_name", "@user_name"),
                            ("follow_cnt", "@follow_cnt"),
                            ("sentiment","@sentiment"),
                           ], point_policy="follow_mouse")

initial_palette = Category20[20]

mapper = linear_cmap(field_name='desc', 
                     palette=Category20[20],
                     low=min(df["C-"]) ,high=max(df["C-"]))

plot = figure(plot_width=1000, plot_height=1000, 
           tools=[hover, 'pan', 'wheel_zoom', 'box_zoom', 'reset', 'save', 'tap'], 
           title="Visualization of Twitter Users", 
           toolbar_location="above")

plot.scatter('x', 'y', size=5, source=source, fill_color=mapper,
              line_alpha=0.3, line_width=1.1, line_color="black", legend = 'labels')
plot.legend.background_fill_alpha = 0.6



In [7]:
l = layout([
    [header],
    [description],
    [plot],
])
output_file('./interactive.html')
show(l)