In [None]:
import networkx as nx
from networkx.algorithms import approximation as ap

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.set_printoptions(suppress=True)

high_freq = ["viriyabot", "notfarmerwife", "fotopak", "Ollissya", "kuppp005", "global79619367", "_ForeignService", "DerekMaher3", "MathersLig", "standtallnroar", "yuuji_K1", "Explorador_IT", "A1Patriot2020"]


In [None]:
sput_edges = pd.read_csv("sputnik_full_edge_list.csv", error_bad_lines=False)
basic_edges = []

def tuple_convert(row):
    holder_tuple = (row['target'], row['source'])
    basic_edges.append(holder_tuple)
    return None 
    
    
sput_edges.apply(lambda row: tuple_convert(row), axis=1)

In [None]:
G = nx.Graph()
#G.add_nodes_from(list(rt_1st['user_name']))
G.add_edges_from(basic_edges)

In [None]:
d = nx.density(G)
print("Network density is:")
output = f"{d:.9f}" #formating d so it won't be in sci. notation
print(output)

In [None]:
degree_histo = nx.degree_histogram(G)


print(len(degree_histo))
#print(degree_histo)

top=20
print(degree_histo[top+1])

range_wanted = range(0,top+1)
range_hist = (0,top+1)
histo_nums = [degree_histo[num] for num in range_wanted]


print(histo_nums)


#plt.hist(x=histo_nums, range=range_hist, align='mid', rwidth=.8)
plt.bar(x=range_wanted, 
        height=histo_nums)


plt.xlabel(xlabel="Number of connections")
plt.ylabel(ylabel="Frequency")
plt.xticks(ticks=range_wanted)
plt.ylim(0, max(histo_nums))


plt.show()

In [None]:

top_degree = []
degrees_list = []

   
        

def node_check(node, top_wanted): #for itterating over nodes in get_most_connected()
    node_list = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
    degrees = len(node_list)  #getting length of that list
    
    if len(top_degree)<top_wanted:
        top_degree.append((node, degrees))
        degrees_list.append(degrees)
    elif degrees > min(degrees_list):
        top_degree.pop(0)
        top_degree.append((node, degrees))
        top_degree.sort(key = lambda x: x[1])
        
        degrees_list.pop(0)
        degrees_list.append(degrees)
        degrees_list.sort()
        

def get_most_connected(G, top_wanted): #returns list of tuples: usernames and number of connections. returns top X
    global top_degree
    top_degree = []
    global degrees_list
    degrees_list = []
    for node in nx.nodes(G):
        node_check(node, top_wanted)
        
    return top_degree


def get_top_n_graph(g, n=50): #get's n number of nodes that are the most connected nodes, returns a graph (not drawn)
    top_n = get_most_connected(g, n)
    top_n_nodes = [tup[0] for tup in top_n]
    top_n_G = nx.subgraph(g, nbunch=top_n_nodes)
    return top_n_G

def get_top_n_plus_neighbors_graph(g, n=200):
    top_n = get_most_connected(g, n)
    top_n_nodes = [tup[0] for tup in top_n]
    neighbor_list = []
    
    for node in top_n_nodes:
        for neigh in nx.all_neighbors(g, node):
            neighbor_list.append(neigh)
    
    everyone = top_n_nodes + neighbor_list
    everyone_set = set(everyone)
    top_n_plus_neighbors_g = nx.subgraph(g, nbunch=everyone_set)
    return top_n_plus_neighbors_g
    


def top_n_density(G, top_wanted):
    top_n_list = get_most_connected(G, top_wanted)
    top_n_nodes = [tup[0] for tup in top_n_list]
    top_n_G = nx.subgraph(G, nbunch=top_n_nodes)
    d_top_n = nx.density(top_n_G)
    output = f"{d_top_n:.20f}" 
    output = float(output)
    
    return output


def make_top_n_df(g, n=10):
    top_n = get_most_connected(g, n)
    top_n_nodes = [tup[0] for tup in top_n]
    top_n_degrees = [tup[1] for tup in top_n]
    
    
    neighbor_list = []
    for node in top_n_nodes:
        for neigh in nx.all_neighbors(g, node):
            neighbor_list.append(neigh)
    neighbor_set = set(neighbor_list)
    neighbor_degrees = []
    for node in neighbor_set:
        node_list2 = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
        degrees2 = len(node_list2)  #getting length of that list
        neighbor_degrees.append(degrees2)
        
        
    all_nodes = top_n_nodes + list(neighbor_set)
    all_degrees = top_n_degrees + neighbor_degrees
    
    
    
    
    
    top_n_df = pd.DataFrame(list(zip(all_nodes, all_degrees)), columns =['node', 'degrees'])
    
    top_n_df['bucket'] = 0
    top_n_df.loc[top_n_df["node"].isin(list(neighbor_set)), 'bucket'] = list(pd.cut(top_n_df.loc[top_n_df["node"].isin(list(neighbor_set)), "degrees"], 
                                                                                    bins=4, 
                                                                                    labels=[1,2,3,4]))
    
    top_n_df['label'] = ' '
    top_n_df.loc[top_n_df["node"].isin(top_n_nodes), "label"] = "bot?"
    top_n_df.loc[top_n_df["bucket"]==4, "label"] = top_n_df["node"] #label for most connect non-bot nodes
    top_n_df.loc[top_n_df["bucket"]==3, "label"] = top_n_df["node"] #label for 2nd most connect non-bot nodes
    
    
    #map_dict = {1:100, 2:200, 3:300, 4:4000, 5:10000}
    top_n_df['size'] = 1
    top_n_df.loc[top_n_df["bucket"]==4, "size"] = 15000
    top_n_df.loc[top_n_df["bucket"]==3, "size"] = 7000
    top_n_df.loc[top_n_df["bucket"]==2, "size"] = 300
    top_n_df.loc[top_n_df["bucket"]==1, "size"] = 100
    top_n_df.loc[top_n_df["bucket"]==0, "size"] = 4000 #bucket 0 should be bots
    
    
    
    return top_n_df
    
def draw_from_df(g, df, f_size=(50,50)):
    label_dic = dict(zip(list(df["node"]), list(df["label"])))
    
    df_subgraph = nx.subgraph(g, nbunch=list(df["node"])) #important to only call draw on subgraph!!!!
    
    plt.figure(figsize=f_size) #40,40
    nx.draw_spring(df_subgraph, 
                   with_labels=True,
                   labels = label_dic,
                   font_size = 20,
                   font_weight='bold', 
                   nodelist=list(df["node"]),
                   node_color=list(df["bucket"]),
                   node_size=list(df["size"]),
                   cmap="prism_r",#"inferno"
                   width=.5)
    plt.show()
    
    

In [None]:
def make_df_from_list(g, lst):
    
    
    top_n_nodes = lst.copy()
    
    top_n_degrees = []
    counter = 0
    
    
    for node in top_n_nodes:
        if node in [nod for nod in nx.nodes(G)]:
            node_list4 = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
            degrees4 = len(node_list4)  #getting length of that list
            top_n_degrees.append(degrees4)
            counter += 1
        else:
            top_n_nodes.pop(counter)
    
    
    neighbor_list = []
    for node in top_n_nodes:
        for neigh in nx.all_neighbors(g, node):
            neighbor_list.append(neigh)
    neighbor_set = set(neighbor_list)
    neighbor_degrees = []
    for node in neighbor_set:
        node_list2 = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
        degrees2 = len(node_list2)  #getting length of that list
        neighbor_degrees.append(degrees2)
        
        
    all_nodes = top_n_nodes + list(neighbor_set)
    all_degrees = top_n_degrees + neighbor_degrees
    
    
    
    
    
    top_n_df = pd.DataFrame(list(zip(all_nodes, all_degrees)), columns =['node', 'degrees'])
    
    top_n_df['bucket'] = 0
    #top_n_df.loc[top_n_df["node"].isin(list(neighbor_set)), 'bucket'] = list(pd.cut(top_n_df.loc[top_n_df["node"].isin(list(neighbor_set)), "degrees"], 
                                                                                    #bins=4, 
                                                                                    #labels=[1,2,3,4]))
    #this one is inverse of above line, for when "bots?" actually connect with eachother on occasion         
    top_n_df.loc[~top_n_df["node"].isin(top_n_nodes), 'bucket'] = list(pd.cut(top_n_df.loc[~top_n_df["node"].isin(top_n_nodes), "degrees"], 
                                                                                    bins=4, 
                                                                                    labels=[1,2,3,4]))
    
    top_n_df['label'] = ' '
    top_n_df.loc[top_n_df["node"].isin(top_n_nodes), "label"] = "bot?"
    top_n_df.loc[top_n_df["bucket"]==4, "label"] = top_n_df["node"] #label for most connect non-bot nodes
    top_n_df.loc[top_n_df["bucket"]==3, "label"] = top_n_df["node"] #label for 2nd most connect non-bot nodes
    
    
    #map_dict = {1:100, 2:200, 3:300, 4:4000, 5:10000}
    top_n_df['size'] = 1
    top_n_df.loc[top_n_df["bucket"]==4, "size"] = 15000
    top_n_df.loc[top_n_df["bucket"]==3, "size"] = 7000
    top_n_df.loc[top_n_df["bucket"]==2, "size"] = 300
    top_n_df.loc[top_n_df["bucket"]==1, "size"] = 100
    top_n_df.loc[top_n_df["bucket"]==0, "size"] = 4000 #bucket 0 should be bots
    
    
    
    return top_n_df

In [None]:

def range_of_degrees(g, list_not_range=False):
    the_degrees = []

    for node in nx.nodes(g):
        node_list = []
        node_list = [neigh for neigh in nx.all_neighbors(G, node)] #returns a list of nodes connected to the node.
        degrees = len(node_list)
        the_degrees.append(degrees)
        
    if list_not_range==False:
        return max(the_degrees), min(the_degrees)
    if list_not_range==True:
        return the_degrees


def get_nodes_and_bucket_lists(g, buckets_wanted=5):
    nodes = [node for node in nx.nodes(g)]
    
    the_degrees = []
    for node in nodes:
        node_list = []
        node_list = [neigh for neigh in nx.all_neighbors(g, node)] #returns a list of nodes connected to the node.
        degrees = len(node_list)
        the_degrees.append(degrees)
    
    bucket_labeled = list(pd.cut(the_degrees, bins=buckets_wanted, labels=[num for num in range(1,buckets_wanted+1)]))
    #print(the_degrees)
    return nodes, bucket_labeled

def draw_network(g, buckets_needed=5, f_size=(100,100)):
    nodes = get_nodes_and_bucket_lists(g, buckets_wanted=buckets_needed)[0]
    bucket_labels = get_nodes_and_bucket_lists(g, buckets_wanted=buckets_needed)[1]
    
    #print(bucket_labels)
    
    #map_dict = {1:10000, 2:4000, 3:300, 4:100, 5:50}
    map_dict = {1:100, 2:200, 3:300, 4:4000, 5:10000} #as label gets bigger, number of connections/degrees increases
    sizes = list(map(map_dict.get, bucket_labels))
    #sizes = [l*100 for l in bucket_labels]
    
    
    
    plt.figure(figsize=f_size) #40,40
    nx.draw_spring(g, 
                   with_labels=True, 
                   font_weight='bold', 
                   nodelist=nodes,
                   node_color=bucket_labels,
                   node_size=sizes,
                   cmap="prism_r",#"inferno"
                   width=.5)
    plt.show()

In [None]:
num_top_nodes = []
densities = []

for num in range(1,1000,10):
    dens = top_n_density(G, num)
    num_top_nodes.append(num)
    densities.append(dens)
    
plt.plot(num_top_nodes, densities)
plt.xlabel(xlabel="Number of top connected nodes in the graph")
plt.ylabel(ylabel="Network density")
plt.title(label="Sputnik")
plt.show()

In [None]:
most_edges = get_most_connected(G, 60)
suspect_nodes = [tup[0] for tup in most_edges]

suspect_subgraph = nx.subgraph(G, nbunch=suspect_nodes)

no_edges = []
for node in nx.nodes(suspect_subgraph):
    node_list = []
    node_list = [neigh for neigh in nx.all_neighbors(suspect_subgraph, node)] #returns a list of nodes connected to the node.
    degrees = len(node_list)
    if degrees == 0:
        no_edges.append(node)
        
print(no_edges)
    
    


In [None]:
num_top_nodes = []
densities = []

for num in range(1,150,10):
    dens = top_n_density(G, num)
    num_top_nodes.append(num)
    densities.append(dens)
    
plt.plot(num_top_nodes, densities)
plt.show()

In [None]:
high_freq = ["viriyabot", "notfarmerwife", "fotopak", "Ollissya", "kuppp005", "global79619367", "_ForeignService", "DerekMaher3", "MathersLig", "standtallnroar", "yuuji_K1", "Explorador_IT", "A1Patriot2020"]



most_edges = get_most_connected(G, 150)
most_edges

pos_bots = most_edges[-40:-20]

most_edges
pos_bots

In [None]:
high_freq_present = set(most_edges).intersection(high_freq)
high_freq_present

In [None]:
sput_nodes = [node for node in nx.nodes(G)]
high_freq_present = set(sput_nodes).intersection(high_freq)

high_freq_present

In [None]:
high_df = make_df_from_list(G, list(high_freq_present))
draw_from_df(G, high_df, f_size=(50,50))

In [None]:
top180 = get_top_n_graph(G, n=180)
draw_network(top180, buckets_needed=5, f_size=(100,100))

In [None]:
top150 = get_top_n_graph(G, n=150)
draw_network(top150, buckets_needed=5, f_size=(100,100))

In [None]:
pos_bots_list = [bot[0] for bot in pos_bots]

high_df = make_df_from_list(G, pos_bots_list)
draw_from_df(G, high_df, f_size=(50,50))

In [None]:
pos_bots_list = [bot[0] for bot in pos_bots]

high_df = make_df_from_list(G, pos_bots_list)
draw_from_df(G, high_df, f_size=(50,50))

In [None]:
high_df = make_df_from_list(G, no_edges)
draw_from_df(G, high_df, f_size=(50,50))

In [None]:
no_edges