In [1]:
import csv
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import community

In [2]:
data = []
with open('casts.csv') as file:
    read = csv.reader(file, delimiter=';')
    for row in read:
        data.append(row)

In [3]:
# movieID, movieName, actorName, roleType, role

# role
# short description of the role prefixed by R:
# If the trole is uncertain, the RZ: is used as the prefix
# If the name used in the role is significant (as in Biographical Movies), this role name follows in "quotes", as R:king "Henry V"
# If only the role name is known, then the prefix is RN:
# If the role is unknown, then only RU: is entered.
    
movies = {}

for row in data:
    movie = row[1]
    actor = row[2]
    if movie not in movies:
        movies[movie] = []
    movies[movie].append(actor) 

In [4]:
graph = nx.Graph()
for movie, actors in movies.items():
    for actor in actors:
        graph.add_node(actor)
        for actor2 in actors:
            if (actor != actor2):
                graph.add_edge(actor, actor2)

# General statistics

In [5]:
# number of nodes and edges, density, number of components
print('Number of nodes: ', nx.number_of_nodes(graph))
print('Number of edges: ', nx.number_of_edges(graph))
print('Density : ', nx.density(graph))
print('Number of components: ', nx.number_connected_components(graph))


Number of nodes:  16615
Number of edges:  155660
Density :  0.0011278009862353722
Number of components:  637


# Centralities

In [6]:
#(degree, closeness, betweenness, eigenvector)
cents = {
# 'betweenness_centrality': nx.betweenness_centrality(graph, k=myk),
# 'closeness_centrality' : nx.closeness_centrality(graph),
'degree_centrality' : nx.degree_centrality(graph),
'eigenvector_centrality' : nx.eigenvector_centrality_numpy(graph)
}

In [7]:
for cent in cents:
    print(cent)
    sort = sorted(cents[cent].items(), key=lambda element: element[1], reverse=True)
#     for actor, value in cents[cent].items():
#         print(actor, ' ', value)
    for actor in sort[:10]:
        print(actor)
    print()
    

degree_centrality
('s a', 0.19922956542674852)
('Humphrey Bogart', 0.025941976646201997)
('James Stewart', 0.022511135187191524)
('Gary Cooper', 0.022270374383050438)
('John Gielgud', 0.022270374383050438)
('John Carradine', 0.022089803779944624)
('Peter Lorre', 0.021668472372697724)
('C.Aubrey Smith', 0.02028409774888648)
('Henry Fonda', 0.01950162513542795)
('Burt Lancaster', 0.018779342723004695)

eigenvector_centrality
('s a', 0.32566004242875807)
('C.Aubrey Smith', 0.08708906128312012)
('John Carradine', 0.08560732265089027)
('James Stewart', 0.08366500255035038)
('John Gielgud', 0.08139785940349979)
('Peter Lorre', 0.07888583301737864)
('Gary Cooper', 0.07783469695716107)
('Basil Rathbone', 0.07530887475478036)
('Humphrey Bogart', 0.07458256884204434)
('Henry Fonda', 0.07436749327030842)



# Communities

In [8]:
for i in range(3,10):
    com = nx.algorithms.community.k_clique_communities(graph, i)
    print(len(list(com)), ' communities for clique: ', i)

1157  communities for clique:  3
2074  communities for clique:  4
2738  communities for clique:  5
2782  communities for clique:  6
2349  communities for clique:  7
1828  communities for clique:  8
1342  communities for clique:  9


# Kevin Bacon numbers

In [9]:
length = nx.single_source_shortest_path_length(graph, 'Kevin Bacon')

In [10]:
suml = 0
count = 0
for actor, value in length.items():
    suml += value
    count += 1
    
sort = sorted(length.items(), key=lambda element: element[1], reverse=True)  

print('Top ten:')
for actor, value in sort[:10]:
    print(actor, ' ', value)
    
print()
print('Last ten:')
for actor, value in sort[-10:]:
    print(actor, ' ', value)

print()
print('Average: ', suml/count)

Top ten:
Paredes   6
Antonia SanJuan   6
Elisa Touati   6
Marbel Verdu   6
Maria deMederios   6
Barbara Dennek   6
Jacqueline Lecomte   6
Henri Piccoli   6
Robert Castle   6
Hilmar Thate   5

Last ten:
Tom Cruise   1
Jack Nicholson   1
Demi Moore   1
J.A. Preston   1
Michael deLeon   1
Kiefer Sutherland   1
Evan Rachel   1
Mary Stuart Masterson   1
Cathy Moriarty   1
Kevin Bacon   0

Average:  2.813836143938388


In [11]:
nx.write_gexf(graph, "export.gexf")