In [None]:
import networkx as nx
from networkx.algorithms import approximation as ap

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(suppress=True)

In [None]:
rt_1st = pd.read_csv("RT_network_1st_degree.csv")
rt_1st.drop("Unnamed: 0", inplace=True, axis=1)

rt_edges = pd.read_csv("full_RT_edge_list.csv")


In [None]:
user_timeline = pd.read_json("user_timeline_RT_com.jsonl", lines=True)

In [None]:
user_timeline.describe()

In [None]:
rt_edges
#rt_1st

rt_edges[rt_edges["source"]=="57Frog"]

In [None]:
basic_edges = []

def tuple_convert(row):
    holder_tuple = (row['target'], row['source'])
    basic_edges.append(holder_tuple)
    return None 
    
    
rt_edges.apply(lambda row: tuple_convert(row), axis=1)


    

In [None]:
G = nx.Graph()
G.add_nodes_from(list(rt_1st['user_name']))
G.add_edges_from(basic_edges)


In [None]:
"""
nx.draw(G, with_labels=True, font_weight='bold')
plt.subplot(122)
"""

#biggest_c = ap.max_clique(G)

d = nx.density(G)
print("Network density is:")
output = f"{d:.9f}" #formating d so it won't be in sci. notation
print(output)

In [None]:
degree_histo = nx.degree_histogram(G)


print(len(degree_histo))
#print(degree_histo)

In [None]:
top=20
print(degree_histo[top+1])

range_wanted = range(0,top+1)
range_hist = (0,top+1)
histo_nums = [degree_histo[num] for num in range_wanted]


print(histo_nums)


#plt.hist(x=histo_nums, range=range_hist, align='mid', rwidth=.8)
plt.bar(x=range_wanted, 
        height=histo_nums)


plt.xlabel(xlabel="Number of connections")
plt.ylabel(ylabel="Frequency")
plt.xticks(ticks=range_wanted)
plt.ylim(0, max(histo_nums))


plt.show()

In [None]:

top_degree = []
degrees_list = []

   
        

def node_check(node, top_wanted): #for itterating over nodes in get_most_connected()
    node_list = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
    degrees = len(node_list)  #getting length of that list
    
    if len(top_degree)<top_wanted:
        top_degree.append((node, degrees))
        degrees_list.append(degrees)
    elif degrees > min(degrees_list):
        top_degree.pop(0)
        top_degree.append((node, degrees))
        top_degree.sort(key = lambda x: x[1])
        
        degrees_list.pop(0)
        degrees_list.append(degrees)
        degrees_list.sort()
        

def get_most_connected(G, top_wanted): #returns list of tuples: usernames and number of connections. returns top X
    global top_degree
    top_degree = []
    global degrees_list
    degrees_list = []
    for node in nx.nodes(G):
        node_check(node, top_wanted)
        
    return top_degree


def get_top_n_graph(g, n=50): #get's n number of nodes that are the most connected nodes, returns a graph (not drawn)
    top_n = get_most_connected(g, n)
    top_n_nodes = [tup[0] for tup in top_n]
    top_n_G = nx.subgraph(g, nbunch=top_n_nodes)
    return top_n_G

def get_top_n_plus_neighbors_graph(g, n=200):
    top_n = get_most_connected(g, n)
    top_n_nodes = [tup[0] for tup in top_n]
    neighbor_list = []
    
    for node in top_n_nodes:
        for neigh in nx.all_neighbors(g, node):
            neighbor_list.append(neigh)
    
    everyone = top_n_nodes + neighbor_list
    everyone_set = set(everyone)
    top_n_plus_neighbors_g = nx.subgraph(g, nbunch=everyone_set)
    return top_n_plus_neighbors_g
    


def top_n_density(G, top_wanted):
    top_n_list = get_most_connected(G, top_wanted)
    top_n_nodes = [tup[0] for tup in top_n_list]
    top_n_G = nx.subgraph(G, nbunch=top_n_nodes)
    d_top_n = nx.density(top_n_G)
    output = f"{d_top_n:.20f}" 
    output = float(output)
    
    return output


def make_top_n_df(g, n=10):
    top_n = get_most_connected(g, n)
    top_n_nodes = [tup[0] for tup in top_n]
    top_n_degrees = [tup[1] for tup in top_n]
    
    
    neighbor_list = []
    for node in top_n_nodes:
        for neigh in nx.all_neighbors(g, node):
            neighbor_list.append(neigh)
    neighbor_set = set(neighbor_list)
    neighbor_degrees = []
    for node in neighbor_set:
        node_list2 = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
        degrees2 = len(node_list2)  #getting length of that list
        neighbor_degrees.append(degrees2)
        
        
    all_nodes = top_n_nodes + list(neighbor_set)
    all_degrees = top_n_degrees + neighbor_degrees
    
    
    
    
    
    top_n_df = pd.DataFrame(list(zip(all_nodes, all_degrees)), columns =['node', 'degrees'])
    
    top_n_df['bucket'] = 0
    top_n_df.loc[top_n_df["node"].isin(list(neighbor_set)), 'bucket'] = list(pd.cut(top_n_df.loc[top_n_df["node"].isin(list(neighbor_set)), "degrees"], 
                                                                                    bins=4, 
                                                                                    labels=[1,2,3,4]))
    
    top_n_df['label'] = ' '
    top_n_df.loc[top_n_df["node"].isin(top_n_nodes), "label"] = "bot?"
    top_n_df.loc[top_n_df["bucket"]==4, "label"] = top_n_df["node"] #label for most connect non-bot nodes
    top_n_df.loc[top_n_df["bucket"]==3, "label"] = top_n_df["node"] #label for 2nd most connect non-bot nodes
    
    
    #map_dict = {1:100, 2:200, 3:300, 4:4000, 5:10000}
    top_n_df['size'] = 1
    top_n_df.loc[top_n_df["bucket"]==4, "size"] = 15000
    top_n_df.loc[top_n_df["bucket"]==3, "size"] = 7000
    top_n_df.loc[top_n_df["bucket"]==2, "size"] = 300
    top_n_df.loc[top_n_df["bucket"]==1, "size"] = 100
    top_n_df.loc[top_n_df["bucket"]==0, "size"] = 4000 #bucket 0 should be bots
    
    
    
    return top_n_df
    
def draw_from_df(g, df, f_size=(50,50)):
    label_dic = dict(zip(list(df["node"]), list(df["label"])))
    
    df_subgraph = nx.subgraph(g, nbunch=list(df["node"])) #important to only call draw on subgraph!!!!
    
    plt.figure(figsize=f_size) #40,40
    nx.draw_spring(df_subgraph, 
                   with_labels=True,
                   labels = label_dic,
                   font_size = 20,
                   font_weight='bold', 
                   nodelist=list(df["node"]),
                   node_color=list(df["bucket"]),
                   node_size=list(df["size"]),
                   cmap="prism_r",#"inferno"
                   width=.5)
    plt.show()
    
    

In [None]:
top10_df = make_top_n_df(G, n=15)
top10_df

In [None]:
draw_from_df(G, top10_df, f_size=(50,50))

In [None]:
#nodelist=[found_node] node_color="r" node_size= (default is 300)
#top500_G

def range_of_degrees(g, list_not_range=False):
    the_degrees = []

    for node in nx.nodes(g):
        node_list = []
        node_list = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
        degrees = len(node_list)
        the_degrees.append(degrees)
        
    if list_not_range==False:
        return max(the_degrees), min(the_degrees)
    if list_not_range==True:
        return the_degrees


def get_nodes_and_bucket_lists(g, buckets_wanted=5):
    nodes = [node for node in nx.nodes(g)]
    
    the_degrees = []
    for node in nodes:
        node_list = []
        node_list = [neigh for neigh in nx.all_neighbors(g, node)] #returns a list of nodes connected to the node.
        degrees = len(node_list)
        the_degrees.append(degrees)
    
    bucket_labeled = list(pd.cut(the_degrees, bins=buckets_wanted, labels=[num for num in range(1,buckets_wanted+1)]))
    #print(the_degrees)
    return nodes, bucket_labeled

def draw_network(g, buckets_needed=5, f_size=(100,100)):
    nodes = get_nodes_and_bucket_lists(g, buckets_wanted=buckets_needed)[0]
    bucket_labels = get_nodes_and_bucket_lists(g, buckets_wanted=buckets_needed)[1]
    
    #print(bucket_labels)
    
    #map_dict = {1:10000, 2:4000, 3:300, 4:100, 5:50}
    map_dict = {1:100, 2:200, 3:300, 4:4000, 5:10000} #as label gets bigger, number of connections/degrees increases
    sizes = list(map(map_dict.get, bucket_labels))
    #sizes = [l*100 for l in bucket_labels]
    
    
    
    plt.figure(figsize=f_size) #40,40
    nx.draw_spring(g, 
                   with_labels=True, 
                   font_weight='bold', 
                   nodelist=nodes,
                   node_color=bucket_labels,
                   node_size=sizes,
                   cmap="prism_r",#"inferno"
                   width=.5)
    plt.show()

In [None]:
top50 = get_most_connected(G, 50)
print(len(top50))
print(top50)

In [None]:
top_50_nodes = [tup[0] for tup in top50]
top50_G = nx.subgraph(G, nbunch=top_50_nodes)



d50 = nx.density(top50_G)
print("Network density is:")
output = f"{d50:.20f}" #formating d so it won't be in sci. notation
print(output)

In [None]:
top_n_density(G, 500) #0.000022244 <-- this commented number is the density of the network as a whole

In [None]:
density_y = [top_n_density(G, num) for num in range(10,2001, 50)]
top_x = [num for num in range(10,2001, 50)]

In [None]:
plt.figure(2,figsize=(20,10)) 
plt.plot(top_x, density_y)
plt.xlabel(xlabel="Number of top connected nodes in graph")
plt.ylabel(ylabel='Network density')
plt.show()

In [None]:
top500 = get_most_connected(G, 500)
top_500_nodes = [tup[0] for tup in top500]
top500_G = nx.subgraph(G, nbunch=top_500_nodes)

In [None]:
"""
COLOR MAP (cmap) OPTIONS:

Possible values are: Accent, Accent_r, Blues, Blues_r, BrBG, BrBG_r, BuGn, BuGn_r, BuPu, BuPu_r, 
CMRmap, CMRmap_r, Dark2, Dark2_r, GnBu, GnBu_r, Greens, Greens_r, Greys, Greys_r, OrRd, OrRd_r, Oranges, Oranges_r, 
PRGn, PRGn_r, Paired, Paired_r, Pastel1, Pastel1_r, Pastel2, Pastel2_r, PiYG, PiYG_r, PuBu, PuBuGn, PuBuGn_r, 
PuBu_r, PuOr, PuOr_r, PuRd, PuRd_r, Purples, Purples_r, RdBu, RdBu_r, RdGy, RdGy_r, RdPu, RdPu_r, RdYlBu, 
RdYlBu_r, RdYlGn, RdYlGn_r, Reds, Reds_r, Set1, Set1_r, Set2, Set2_r, Set3, Set3_r, Spectral, Spectral_r, 
Wistia, Wistia_r, YlGn, YlGnBu, YlGnBu_r, YlGn_r, YlOrBr, YlOrBr_r, YlOrRd, YlOrRd_r, afmhot, afmhot_r, autumn, 
autumn_r, binary, binary_r, bone, bone_r, brg, brg_r, bwr, bwr_r, cividis, cividis_r, cool, cool_r, coolwarm, 
coolwarm_r, copper, copper_r, cubehelix, cubehelix_r, flag, flag_r, gist_earth, gist_earth_r, gist_gray, 
gist_gray_r, gist_heat, gist_heat_r, gist_ncar, gist_ncar_r, gist_rainbow, gist_rainbow_r, gist_stern, 
gist_stern_r, gist_yarg, gist_yarg_r, gnuplot, gnuplot2, gnuplot2_r, gnuplot_r, gray, gray_r, hot, hot_r, 
hsv, hsv_r, inferno, inferno_r, jet, jet_r, magma, magma_r, nipy_spectral, nipy_spectral_r, ocean, ocean_r, 
pink, pink_r, plasma, plasma_r, prism, prism_r, rainbow, rainbow_r, seismic, seismic_r, spring, spring_r, 
summer, summer_r, tab10, tab10_r, tab20, tab20_r, tab20b, tab20b_r, tab20c, tab20c_r, terrain, terrain_r, 
twilight, twilight_r, twilight_shifted, twilight_shifted_r, viridis, viridis_r, winter, winter_r


"""

In [None]:
top200_plus_neighbors = get_top_n_plus_neighbors_graph(G, n=10)
draw_network(top200_plus_neighbors, 5, (50,50))

In [None]:
top50 = get_top_n_graph(G, n=210)
draw_network(top50, 5, (50,50))

In [None]:
top50 = get_top_n_graph(G, n=250)
draw_network(top50, 5, (50,50))

In [None]:
draw_network(top500_G,buckets_needed=5)

In [None]:
test_bucket = get_nodes_and_bucket_lists(top500_G)

In [None]:
label_test = pd.cut(range_of_degrees(top500_G, list_not_range=True), bins=5, labels=[1,2,3,4,5])
label_test

In [None]:
max_clique_top500 = ap.max_clique(top500_G)

In [None]:
max_clique_top500

In [None]:
cliques500 = nx.find_cliques(top500_G)

In [None]:
for clique in cliques500:
    print(clique)