### Packages

In [None]:
import pickle 
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, date, time, timezone
import json

## Tweets

In [None]:
# Opening JSON file
with open('vax_final.json') as json_file:
    vax_data = json.load(json_file) #vaccination
    
with open('ukrainewar_final.json') as json_file:
    war_data = json.load(json_file) #war

## Labelling Tweets

In [None]:
# Unique integer for each date
def make_timestamp(x):
    x = x['created_at']
    return 12*24*40*(x['year']-2019) + 24*30*x['month'] + 24*x['day'] + x['hour'] 

In [None]:
# Loading TweetID -> opinion rating
tid_vax_label = pickle.load(open("vax_gpt_labels.pkl",'rb'))
tid_war_label = pickle.load(open("war_gpt_labels.pkl",'rb'))

In [None]:
# Dictionary with keys: user_ids 
#           and values: list of triplets (tweet_id, timestamp, rating)
vax_nodeid_to_tweetsids = {}

vax_min_timestamp = 10000*23000 # min timestamp
vax_max_timestamp = 0           # max timestamp
for nodeid in vax_data:
    vax_nodeid_to_tweetsids[nodeid] = []
    for el in vax_data[nodeid]:
        t = make_timestamp(el)
        vax_min_timestamp = min(vax_min_timestamp, t)
        vax_max_timestamp = max(vax_max_timestamp, t)
        vax_nodeid_to_tweetsids[nodeid].append([el['tweet_id'], t])
    vax_nodeid_to_tweetsids[nodeid] = sorted(vax_nodeid_to_tweetsids[nodeid], key=lambda x: x[1])
    for i in range(len(vax_nodeid_to_tweetsids[nodeid])):
        opinion = 5
        if i>0: opinion = vax_nodeid_to_tweetsids[nodeid][i-1][2]
        id_tweet = vax_nodeid_to_tweetsids[nodeid][i][0]
        if id_tweet in tid_vax_label: opinion = tid_vax_label[id_tweet]
        vax_nodeid_to_tweetsids[nodeid][i].append(opinion)

# Dictionary with keys: user_ids 
#           and values: list of triplets (tweet_id, timestamp, rating)
war_nodeid_to_tweetsids = {}

war_min_timestamp = 10000*23000 # min timestamp
war_max_timestamp = 0           # max timestamp
for nodeid in war_data:
    war_nodeid_to_tweetsids[nodeid] = []
    for el in war_data[nodeid]:
        t = make_timestamp(el)
        war_min_timestamp = min(war_min_timestamp, t)
        war_max_timestamp = max(war_max_timestamp, t)
        war_nodeid_to_tweetsids[nodeid].append([el['tweet_id'], t]) 
    war_nodeid_to_tweetsids[nodeid] = sorted(war_nodeid_to_tweetsids[nodeid], key=lambda x: x[1])
    for i in range(len(war_nodeid_to_tweetsids[nodeid])):
        opinion = 5
        if i>0: opinion = war_nodeid_to_tweetsids[nodeid][i-1][2]
        id_tweet = war_nodeid_to_tweetsids[nodeid][i][0]
        if id_tweet in tid_war_label: opinion = tid_war_label[id_tweet]
        war_nodeid_to_tweetsids[nodeid][i].append(opinion)

In [None]:
print(vax_min_timestamp, vax_max_timestamp)
print(war_min_timestamp, war_max_timestamp)

# Who-Follows-Whom Network

In [None]:
G = nx.read_edgelist("final.edgelist", create_using=nx.Graph)
G.to_undirected()
print(f'Number of nodes: {len(G.nodes())}')
print(f'Number of edges: {len(G.edges())}')
plt.hist(list(dict(G.degree).values()), bins=50)
plt.yscale("log")
plt.title("Who-Follows-Whom - Degree Distribution")
plt.show()

In [None]:
#index and inverted index to map twitter_node_id to an integer index 
ids_to_index = {}
index_to_ids = {}
counter = 0
for node in G.nodes():
    ids_to_index[node] = counter
    index_to_ids[counter] = node
    counter += 1

In [None]:
#save graph
with open("G.edgelist", "w") as f:
    for edge in G.edges():
        u = ids_to_index[edge[0]]
        v = ids_to_index[edge[1]]
        f.write(f'{u} {v}\n')
        f.write(f'{v} {u}\n') #remove that for directed

## Making Timestamps

In [None]:
#number of timestamps
T = 51

In [None]:
#spliting a list a to n buckets
def equal_split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

#spliting tweets int T buckets that correspond to a single timestamp
#all buckets contain the same number of tweets
def make_pairs(G, nodeid_to_tweetsids, T = 20):
    allts = []
    for nodeid in G.nodes():
        list_el = nodeid_to_tweetsids[nodeid]
        for i in range(len(list_el)): allts.append(list_el[i][1])
    list_el = sorted(allts)
    res = list(equal_split(list_el,T))
    pairs = []
    for el in res: pairs.append([el[0],el[-1]])
    return pairs

#making the opinions for each user in each timestamp
def make_opinions(G, ids_to_index, nodeid_to_tweetsids, pairs, filname='./test1.txt'):
    #an nxT matrix, with value of (i,j) element being the opinion rating of node i at time j
    ids_to_score = np.zeros((len(G.nodes), len(pairs)))
    #for each node
    for nodeid in G.nodes():
        #index of node
        index = ids_to_index[nodeid]
        #list of triplets (tweet_id, timestamp, rating)
        list_el = nodeid_to_tweetsids[nodeid]
        i = 0
        curall = []
        #each timestamp t corresponds to a period [tmin, tmax]
        for t in range(len(pairs)):
            tmin = pairs[t][0]
            tmax = pairs[t][1]
            while True:
                if list_el[i][1]>= tmax: 
                    #if the user with id = index didn't make any tweet at period t = [tmin, tmax]
                    if len(curall)==0: 
                        #if t>0 then set the opinion of t-1
                        if t>0: ids_to_score[index][t] = ids_to_score[index][t-1]
                        #if t=0 set initial opinion neutral equal to 5
                        else: ids_to_score[index][t] = 5 
                    #if user with id = index have made at least one tweet at period t = [tmin, tmax]
                    else: 
                        #set as opinion the average of the tweet's labeling
                        ids_to_score[index][t] = np.sum(curall)/len(curall)
                    curall = []
                    break
                elif list_el[i][1]>= tmin:
                    curall.append(list_el[i][2])
                if (i<(len(list_el)-1)): i = i+1
                else: 
                    ids_to_score[index][t] = ids_to_score[index][t-1]
                    break
    np.savetxt(filname, ids_to_score, delimiter=' ')
    return ids_to_score

### Saving Opinions

In [None]:
vax_pairs = make_pairs(G, vax_nodeid_to_tweetsids, T)
vax_ids_to_score = make_opinions(G, ids_to_index, vax_nodeid_to_tweetsids, vax_pairs, filname='./vax_opinions.txt')

war_pairs = make_pairs(G, war_nodeid_to_tweetsids, T)
war_ids_to_score = make_opinions(G, ids_to_index, war_nodeid_to_tweetsids, war_pairs, filname='./war_opinions.txt')

In [None]:
n = len(vax_ids_to_score)
plt.bar(range(1,T+1), np.sum(vax_ids_to_score, 0)/n)
plt.title('Distribution of averaged vax-opinions over time')
plt.show()

n = len(war_ids_to_score)
plt.title('Distribution of averaged war-opinions over time')
plt.bar(range(1,T+1), np.sum(war_ids_to_score, 0)/n)
plt.show()

In [None]:
for t in range(T):
    bucket = [0 for i in range(11)]
    for i in range(len(vax_ids_to_score)):
        bucket[int(vax_ids_to_score[i,t])]+=1
    plt.plot(bucket)
plt.title("Vax Tweets - Opinion Distribution")
plt.show()

for t in range(T):
    bucket = [0 for i in range(11)]
    for i in range(len(war_ids_to_score)):
        bucket[int(war_ids_to_score[i,t])]+=1
    plt.plot(bucket)
plt.title("War Tweets - Opinion Distribution")
plt.show()

# Vax-Mention Network

In [None]:
#load data
vax_mention = pickle.load(open("vax_mention_tid_dict.pkl","rb"))

In [None]:
#nodes of all user_ids included in the mention-data
all_vax_ids = set()
for el in vax_mention:
    all_vax_ids.add(el)
    for el2 in vax_mention[el]:
        all_vax_ids.add(el2)
all_vax_ids = [str(el) for el in list(all_vax_ids)]

### Making the graph 

In [None]:
vax_mention_edges = [(i,j) for i in vax_mention for j in vax_mention[i]]
G_vax = nx.Graph()
G_vax.add_edges_from(vax_mention_edges)

### LCC

In [None]:
if False:
    largest_cc = max(nx.connected_components(G_vax), key=len)
    print(len(largest_cc))
    print(largest_cc)
    G_vax = nx.induced_subgraph(G_vax, largest_cc).copy()

### Find the induced subgraph of G

In [None]:
G_v = nx.induced_subgraph(G, all_vax_ids).copy()
G_v.to_undirected()
print(G_v)

In [None]:
#index and inverted index to map twitter_node_id to an integer index 
ids_to_indexvax = {}
indexvax_to_ids = {}
counter = 0
for node in all_vax_ids:
    ids_to_indexvax[node] = counter
    indexvax_to_ids[counter] = node
    counter += 1

### Saving the 2-Layer vax-networks and nodes opinions

In [None]:
cont = 0
with open("./G_vax_scc.edgelist", "w") as f:
    for edge in G_vax.edges():
        u = ids_to_indexvax[str(edge[0])]
        v = ids_to_indexvax[str(edge[1])]
        if G_v.has_edge(str(edge[0]), str(edge[1]))==False: cont+=1
        f.write(f'{u} {v}\n')
        f.write(f'{v} {u}\n') #remove it for directeds
print(f'There are {cont} out of {len(G_vax.edges())} edges not included in the other layer...')        
  
cont = 0
with open("./G_v_scc.edgelist", "w") as f:
    for edge in G_v.edges():
        u = ids_to_indexvax[str(edge[0])]
        v = ids_to_indexvax[str(edge[1])]
        if G_vax.has_edge(edge[0], edge[1])==False: cont+=1
        f.write(f'{u} {v}\n')
        f.write(f'{v} {u}\n') #remove it for directed
print(f'There are {cont} out of {len(G_v.edges())} edges not included in the other layer...')        

#Opinions
vax_pairs = make_pairs(G_v, vax_nodeid_to_tweetsids, T)
vax_ids_to_score = make_opinions(G_v, ids_to_indexvax, vax_nodeid_to_tweetsids, vax_pairs, filname='./vax_ops.txt')

# War-Mention Network

In [None]:
#load data
war_mention = pickle.load(open("war_mention_tid_dict.pkl","rb"))
for el in war_mention: 
    for i in range(len(war_mention[el])): 
        war_mention[el][i] = int(war_mention[el][i])

In [None]:
#nodes of all user_ids included in the mention-data
all_war_ids = set()
for el in war_mention:
    all_war_ids.add(el)
    for el2 in war_mention[el]:
        all_war_ids.add(el2)
all_war_ids = [str(el) for el in list(all_war_ids)]

### Making the graph 

In [None]:
war_mention_edges = [(i,j) for i in war_mention for j in war_mention[i]]
G_war = nx.Graph()
G_war.add_edges_from(war_mention_edges)
print(G_war)

### LCC

In [None]:
if False:
    largest_cc = max(nx.connected_components(G_war), key=len)
    print(len(largest_cc))
    print(largest_cc)
    G_w = nx.induced_subgraph(G_war, largest_cc).copy()

### Find the induced subgraph of G

In [None]:
G_w = nx.induced_subgraph(G, all_war_ids).copy()
G_w = G_w.to_undirected()
print(G_w)

In [None]:
#index and inverted index to map twitter_node_id to an integer index 
ids_to_indexwar = {}
indexwar_to_ids = {}
counter = 0
for node in all_war_ids:
    ids_to_indexwar[node] = counter
    indexwar_to_ids[counter] = node
    counter += 1

### Saving the 2-Layer vax-networks and nodes opinions

In [None]:
cont = 0
with open("./G_war_scc.edgelist", "w") as f:
    for edge in G_war.edges():
        u = ids_to_indexwar[str(edge[0])]
        v = ids_to_indexwar[str(edge[1])]
        if G_w.has_edge(str(edge[0]), str(edge[1]))==False: cont+=1
        f.write(f'{u} {v}\n')
        f.write(f'{v} {u}\n') #remove it for directed
print(f'There are {cont} out of {len(G_war.edges())} edges not included in the other layer...')        
        
with open("./G_w_scc.edgelist", "w") as f:
    for edge in G_w.edges():
        u = ids_to_indexwar[str(edge[0])]
        v = ids_to_indexwar[str(edge[1])]
        if G_war.has_edge(str(edge[0]), str(edge[1]))==False: cont+=1
        f.write(f'{u} {v}\n')
        f.write(f'{v} {u}\n') #remove it for directed
print(f'There are {cont} out of {len(G_w.edges())} edges not included in the other layer...')        

#Opinions
war_pairs = make_pairs(G_w, war_nodeid_to_tweetsids, T)
war_ids_to_score = make_opinions(G_w, ids_to_indexwar, war_nodeid_to_tweetsids, war_pairs, filname='./war_ops.txt')