In [1]:
import csv
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import community

In [2]:
data = []
with open('casts.csv') as file:
    read = csv.reader(file, delimiter=';')
    for row in read:
        data.append(row)

In [3]:
# movieID, movieName, actorName, roleType, role

# role
# short description of the role prefixed by R:
# If the trole is uncertain, the RZ: is used as the prefix
# If the name used in the role is significant (as in Biographical Movies), this role name follows in "quotes", as R:king "Henry V"
# If only the role name is known, then the prefix is RN:
# If the role is unknown, then only RU: is entered.
    
movies = {}

for row in data:
    movie = row[1]
    actor = row[2]
    if movie not in movies:
        movies[movie] = []
    movies[movie].append(actor) 

In [4]:
graph = nx.Graph()
for movie, actors in movies.items():
    for actor in actors:
        graph.add_node(actor)
        for actor2 in actors:
            if (actor != actor2):
                graph.add_edge(actor, actor2)

In [None]:
# number of nodes and edges, density, number of components
print('Number of nodes: ', nx.number_of_nodes(graph))
print('Number of edges: ', nx.number_of_edges(graph))
print('Density : ', nx.density(graph))
print('Number of components: ', nx.number_connected_components(graph))


Number of nodes:  16615
Number of edges:  155660
Density :  0.0011278009862353722
Number of components:  637


In [None]:
#(degree, closeness, betweenness, eigenvector)
cents = {
'betweenness_centrality': nx.betweenness_centrality(graph),
# 'closeness_centrality' : nx.closeness_centrality(graph),
# 'degree_centrality' : nx.degree_centrality(graph),
# 'eigenvector_centrality' : nx.eigenvector_centrality_numpy(graph)
}

In [None]:
for cent in cents:
    print(cent)
    sort = sorted(cents[cent].items(), key=lambda element: element[1], reverse=True)
#     for actor, value in cents[cent].items():
#         print(actor, ' ', value)
    for actor in sort[:10]:
        print(actor)
    print()
    

In [None]:
communities_generator = community.girvan_newman(graph)
top_level_communities = next(communities_generator)

In [None]:

for com in map(sorted, top_level_communities[:10]):
    print(com)
    print()

In [None]:
length = nx.single_source_shortest_path_length(graph, 'Kevin Bacon')

In [None]:
suml = 0
count = 0
for actor, value in length.items():
    suml += value
    count += 1
    
sort = sorted(length.items(), key=lambda element: element[1], reverse=True)  

print('Top ten:')
for actor, value in sort[:10]:
    print(actor, ' ', value)
    
print()
print('Last ten:')
for actor, value in sort[-10:]:
    print(actor, ' ', value)

print()
print('Average: ', suml/count)

In [None]:
nx.write_gexf(graph, "exportSmall.gexf")