In [219]:
import networkx as nx
import random

file = "top250movies.txt"

In [220]:
try:
    with open(file, "r", encoding = "utf-8") as f:
        actorDict = {}
        lineList = f.readlines()
        for line in lineList:
            line = line.strip() # remove leading and trailing whitespace
            line = line.split("/") # split line into list of strings
            actorDict[line[0]] = line[1:] 
except FileNotFoundError:
    raise IOError(f"Error: Could not find file {file}.")
except ValueError:
    raise ValueError(f"Error: Invalid data in file {file}.")

type(actorDict)

dict

In [221]:
print("Example entry in actorDict:" + str(list(actorDict.items())[random.randint(0, len(actorDict))]))

Example entry in actorDict:('Platoon (1986)', ['Tom Berenger', 'Keith David', 'Willem Dafoe', 'Forest Whitaker', 'Francesco Quinn', 'Kevin Dillon', 'John C. McGinley', 'Reggie Johnson', 'Mark Moses', 'Corey Glover', 'Johnny Depp', 'Chris Pedersen', 'Bob Orwig', 'Corkey Ford', 'David Neidorf', 'Charlie Sheen', 'Richard Edson', 'Tony Todd', 'Kevin Eshelman', 'James Terry McIlvain', 'J. Adam Glover', 'Ivan Kane', 'Paul Sanchez', 'Dale Dye', 'Peter Hicks', 'Basile Achara', 'Steve Barredo', 'Chris Castillejo', 'Andrew B. Clark', 'Bernardo Manalili', 'Than Rogers', 'Li Thi Van', 'Clarisa Ortacio', 'Romy Sevilla', 'Mathew Westfall', 'Nick Nicholson', 'Warren McLean', 'Li Mai Thao', 'Ron Barracks', 'H. Gordon Boos', 'Brad Cassini', 'Mark Ebenhoch', "Robert 'Rock' Galotti", 'Eric Hahn', 'Berto Spoor', 'Oliver Stone', 'Henry Strzalkowski'])


In [222]:
# Directed graph
# We care about the direction 
G = nx.DiGraph()
for actors in actorDict.values():
    for i, actor in enumerate(actors):
        for j in range(i + 1, len(actors)):
            cheaperActor = actors[j]
            G.add_edge(cheaperActor, actor, weight=len(actorDict.values())) 
            
print(G)
print("Example node in graph G: " + str(list(G.nodes())[random.randint(0, len(G))]))
print("Example edge in graph G: " + str(list(G.edges())[random.randint(0, len(G))]))

DiGraph with 14882 nodes and 880639 edges
Example node in graph G: Andy Skinner
Example edge in graph G: ('Cillian Murphy', 'Ben Mendelsohn')


In [223]:
# Compute PageRank
netw = nx.pagerank(G, alpha=0.80, tol=1e-15)

print("Example PageRanked value: " + str(list(netw.items())[random.randint(0, len(netw))]))

Example PageRank value: ('Willy DeVille', 1.9600593291192686e-05)


In [224]:
# Sort PageRank
sortedPageRank = sorted(netw.items(), key=lambda item: item[1], reverse=True)

print("Ranked list of actors: ")
for actor in sortedPageRank[:]:
    print(actor[0])

Ranked list of actors: 
Leonardo DiCaprio
Jamie Foxx
Robert De Niro
Christoph Waltz
Tom Hanks
Al Pacino
Ben Kingsley
Brad Pitt
Christian Bale
Ralph Fiennes
Liam Neeson
Antonella Attili
Diahnne Abbott
Matt Damon
Morgan Freeman
Tom Hardy
Gary Oldman
Karen Gillan
Ryan Gosling
Kevin Spacey
Mark Ruffalo
Jack Nicholson
Michael Caine
Harrison Ford
Hugh Jackman
Joseph Gordon-Levitt
Thomas Mitchell
James Stewart
Marlon Brando
Clint Eastwood
Bruce Willis
F. Murray Abraham
Zoe Saldana
Eli Wallach
Sumi Shimamoto
Daniel Day-Lewis
Harvey Keitel
Enzo Cannavale
Frank Adu
Faye Dunaway
Matthew McConaughey
Ellen Burstyn
Tim Roth
Ben Affleck
Mélanie Laurent
Robert Duvall
Charles Chaplin
Chris Hemsworth
Chiwetel Ejiofor
Josh Brolin
Jonah Hill
Joe Pesci
Martin Sheen
Aamir Khan
William Holden
Mark Hamill
Elizabeth Olsen
Chieko Baishô
Darsheel Safary
Dave Bautista
Scarlett Johansson
Michael Berryman
Rumi Hiiragi
Alison Crosbie
Emma Stone
Matthew Modine
Isa Danieli
Claude Rains
Ryûnosuke Kamiki
Victor Argo
Ann