In [1]:
import requests
episodes = requests.get('https://raw.githubusercontent.com/jeffreylancaster/game-of-thrones/master/data/episodes.json').json()['episodes']

In [2]:
out = list()
for episode in episodes:
    episodeNum = episode['episodeNum']
    seasonNum = episode['seasonNum']
    for scene in episode['scenes']:
        out.append(((episodeNum, seasonNum),
                    (scene['sceneStart'], scene['sceneEnd']),
                    set(c['name'] for c in scene['characters'])))

In [3]:
from six import iteritems
cmap = {v: i for i, v in enumerate(sorted(list(set(c for o in out for c in o[2]))))}
cmap_inv = {v: i for i, v in iteritems(cmap)}

In [4]:
import time
import datetime
from itertools import combinations

df = []
def key(a):
    return (a[0][0], a[0][1], a[1][0], a[1][1])

def to_seconds(s):
    x = time.strptime(s.split(',')[0],'%H:%M:%S')
    return datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()

episode_map = dict()
se_previous, tf, offset_t = None, 0, 0
for element in sorted(out, key=key):
    if se_previous != element[0]:
        se_previous = element[0]
        offset_t += tf
    if len(element[2]):
        ts, tf = to_seconds(element[1][0]), to_seconds(element[1][1])
        for a, b in combinations(sorted([cmap[e] for e in element[2]]), 2):
            df.append((a, b, ts + offset_t, tf + offset_t))
        episode_map[ts + offset_t] = element[0] + (ts, )
        episode_map[tf + offset_t] = element[0] + (tf, )

In [5]:
from stream_graph import TemporalLinkSetDF
ls = TemporalLinkSetDF(df, False, sort_by=None)

In [6]:
import pandas as pd
DF = pd.DataFrame(list(ls.degree_of(direction='both')), columns=['user', 'degree'])

def map(a):
    return cmap_inv[a]

DF['names'] = DF.user.apply(map)
DF = DF.set_index('user')

In [7]:
DF = DF.sort_values(by=['degree'], ascending=False)
DF.head(20)

Unnamed: 0_level_0,degree,names
user,Unnamed: 1_level_1,Unnamed: 2_level_1
207,131196.0,Jon Snow
500,115846.0,Tyrion Lannister
88,103089.0,Daenerys Targaryen
74,102005.0,Cersei Lannister
421,88605.0,Sansa Stark
209,84269.0,Jorah Mormont
196,66634.0,Jaime Lannister
278,66003.0,Lord Varys
420,65031.0,Sandor Clegane
91,64785.0,Davos Seaworth


In [8]:
data = [(map(a), map(b), d) for ((a, b), d) in ls.duration_of(direction='both')]
DF = pd.DataFrame(data, columns=['actor-a', 'actor-b', 'duration'])

In [9]:
DF = DF.sort_values(by=['duration'], ascending=False)
DF.head(20)

Unnamed: 0,actor-a,actor-b,duration
659,Daenerys Targaryen,Jorah Mormont,11849.0
621,Davos Seaworth,Jon Snow,8661.0
1314,Daenerys Targaryen,Missandei,8467.0
3139,Jon Snow,Tormund Giantsbane,8439.0
3292,Lord Varys,Tyrion Lannister,7914.0
513,Cersei Lannister,Jaime Lannister,6920.0
3246,Daenerys Targaryen,Tyrion Lannister,6498.0
3242,Cersei Lannister,Tyrion Lannister,6485.0
2523,Petyr Baelish,Sansa Stark,6397.0
3305,Missandei,Tyrion Lannister,6360.0


In [10]:
# Inefficient because we already have cliques..
#mxc = ls.get_maximal_cliques(direction='both')

In [11]:
ls.minimal_temporal_nodeset.df

Unnamed: 0,u,ts,tf
0,0,203917.0,203993.0
1,1,115123.0,115159.0
2,2,173937.0,174127.0
3,3,42428.0,42515.0
4,3,109828.0,110176.0
5,3,110181.0,110185.0
6,3,110198.0,110205.0
7,3,110208.0,110219.0
8,3,110224.0,110368.0
9,4,131522.0,131529.0


In [12]:
cliques = [(t, set(map(n) for n in ns), ns.size) for t, ns in ls.minimal_temporal_nodeset.nodes_at()]

In [13]:
#from operator import itemgetter
#cliques_sorted sorted(cliques, key=itemgetter(2), reverse=True)
indexes = [i[0] for i in sorted(enumerate(cliques), key=lambda x:x[1][2], reverse=True)]

In [14]:
print((cliques[indexes[0]][0], cliques[indexes[0]+1][0]), cliques[indexes[0]][1:])

(1419.0, 1756.0) ({'Jon Snow', 'Theon Greyjoy', 'Sansa Stark', 'Rickon Stark', 'Joffrey Baratheon', 'Jaime Lannister', 'Hodor', 'Myrcella Baratheon', 'Eddard Stark', 'Rodrik Cassel', 'Bran Stark', 'Summer', 'Maester Luwin', 'Tommen Baratheon', 'Catelyn Stark', 'Cersei Lannister', 'Arya Stark', 'Robert Baratheon', 'Jory Cassel', 'Sandor Clegane'}, 20)
