In [9]:
import numpy as np
import pandas as pd
import networkx as nx
import tabulate

# Load Data

In [2]:
# Load reversed cover graph (i.e. edge (u, v) denotes u preceded v in the cover chain)
cover_graph_reversed = nx.read_edgelist('networks/cover_graph_reversed.txt', nodetype=int)

In [102]:
# Load cover songs data
covers = pd.read_csv('data/secondhandsongs/covers.csv')
covers = covers.dropna(subset=['release_date', 'performer', 'artists'])

In [120]:
cover_id_to_artist = {}

for artists_str, ids_str  in zip(covers['artists'].values, covers['artist_ids'].values):
    artists = artists_str.split(',')
    ids = ids_str.split(',') 
    
    # Try to match id to artist name
    if len(artists) == len(ids):
        for i in range(len(artists)):
            cover_id_to_artist[int(ids[i])] = artists[i].strip()

In [3]:
# Load allmusic influence graph
influence_graph = nx.read_edgelist('networks/allmusic_influence_graph.txt', create_using=nx.DiGraph(), nodetype=int)
# Get reversed version (i.e. edge (u, v) denotes artist u was influenced by v)
influence_graph_reversed = nx.reverse(influence_graph)

In [4]:
# Load allmusic artist information
artists = pd.read_csv('data/allmusic/artists_cleaned.csv')

In [5]:
# Create dictionary mapping from artist id to artist name
influence_id_to_artist = pd.Series(artists['name'].values, index=artists['id'].values).to_dict()

# Run PageRank & HITS on Reversed Cover Song Graph (Next Immediate Neighbor Assumption)

In [121]:
pagerank_dict = nx.pagerank(cover_graph_reversed)

In [122]:
# Sort by value, in descending order
sorted_pagerank = sorted(pagerank_dict.items(), key=lambda x: x[1], reverse=True)

In [123]:
for i, (id, pr) in enumerate(sorted_pagerank[:25]):
    print str(i+1) + '.', cover_id_to_artist[id], pr

1. Top of the Pops 0.00107235921235
2. Vitamin String Quartet 0.000759528602695
3. Willie Nelson 0.000594296734771
4. Glee Cast 0.000557786988933
5. Elvis Presley 0.000495188321966
6. Johnny Cash 0.000456302568276
7. James Last 0.00045548237444
8. Cliff Richard 0.000427189865568
9. Sweet Little Band 0.000401461321822
10. Johnny Mathis 0.000401130572528
11. Ella Fitzgerald 0.000393695998541
12. Ray Conniff 0.000384013541031
13. Tom Jones 0.00038244373999
14. Bob Dylan 0.000378421550853
15. Frank Sinatra 0.000372429369872
16. Beelzebubs 0.000370854782234
17. Geoff Love 0.000365205830214
18. Eugene Chadbourne 0.000358913005778
19. Bing Crosby 0.000356657105374
20. Depeche Mode 0.000354156331196
21. Jerry Lee Lewis 0.000354033161514
22. Mina 0.000352882844068
23. Raga Rockers 0.000349281921077
24. The Jordanaires 0.000348331456455
25. Tony Bennett 0.000345457139346


In [124]:
hubs, authorities = nx.hits(cover_graph_reversed)

In [125]:
sorted_hubs = sorted(hubs.items(), key=lambda x: x[1], reverse=True)
sorted_authorities = sorted(authorities.items(), key=lambda x: x[1], reverse=True)

In [126]:
for i, (id, val) in enumerate(sorted_hubs[:25]):
    print str(i+1) + '.', cover_id_to_artist[id], val

1. Ella Fitzgerald 0.00158321435893
2. Frank Sinatra 0.00146035030174
3. Oscar Peterson 0.00144958326096
4. Tony Bennett 0.00136777365966
5. Sarah Vaughan 0.00132686836563
6. Johnny Mathis 0.00130484197629
7. Ray Conniff 0.00126896758607
8. Peggy Lee 0.00123011881749
9. George Shearing 0.00120661488928
10. Mel Tormé 0.001192843191
11. Rosemary Clooney 0.00116013245412
12. Ray Brown 0.00111152044699
13. Sammy Davis Jr. 0.00107092845171
14. Carmen McRae 0.00106544599234
15. Geoff Love 0.00104736267089
16. Nelson Riddle 0.00102375498718
17. Hank Jones 0.00102358788005
18. Joe Pass 0.00101917520263
19. André Previn 0.00100392961037
20. Count Basie 0.000995606551973
21. Nancy Wilson 0.000992172627021
22. Dave McKenna 0.0009867048865
23. Bing Crosby 0.000976864673444
24. Chet Baker 0.000969477942094
25. Helen Merrill 0.000967052744633


In [127]:
for i, (id, val) in enumerate(sorted_authorities[:25]):
    print str(i+1) + '.', cover_id_to_artist[id], val

1. Ella Fitzgerald 0.00158321435893
2. Frank Sinatra 0.00146035030174
3. Oscar Peterson 0.00144958326096
4. Tony Bennett 0.00136777365966
5. Sarah Vaughan 0.00132686836563
6. Johnny Mathis 0.00130484197629
7. Ray Conniff 0.00126896758607
8. Peggy Lee 0.00123011881749
9. George Shearing 0.00120661488928
10. Mel Tormé 0.001192843191
11. Rosemary Clooney 0.00116013245412
12. Ray Brown 0.00111152044699
13. Sammy Davis Jr. 0.00107092845171
14. Carmen McRae 0.00106544599234
15. Geoff Love 0.00104736267089
16. Nelson Riddle 0.00102375498718
17. Hank Jones 0.00102358788005
18. Joe Pass 0.00101917520263
19. André Previn 0.00100392961037
20. Count Basie 0.000995606551971
21. Nancy Wilson 0.000992172627021
22. Dave McKenna 0.000986704886502
23. Bing Crosby 0.000976864673442
24. Chet Baker 0.000969477942094
25. Helen Merrill 0.000967052744633


# Run PageRank & HITS on Reversed Cover Song Graph (First Artist Assumption)

In [128]:
cover_graph_first_artist = nx.read_edgelist('networks/cover_graph_first_artist_assumption_reversed.txt', nodetype=int)

In [129]:
pagerank_dict_first = nx.pagerank(cover_graph_first_artist)

In [130]:
# Sort by value, in descending order
sorted_pagerank_first = sorted(pagerank_dict_first.items(), key=lambda x: x[1], reverse=True)

In [131]:
for i, (id, pr) in enumerate(sorted_pagerank_first[:25]):
    print str(i+1) + '.', cover_id_to_artist[id], pr

1. Depeche Mode 0.00396074991902
2. The Beatles 0.002351704653
3. Frank Sinatra 0.00171747659942
4. Ramones 0.00167773822291
5. Paul Whiteman 0.00158502448715
6. Bing Crosby 0.00155838287807
7. Bob Dylan 0.00150065346032
8. David Bowie 0.00146578857574
9. Duke Ellington 0.00134898885876
10. Top of the Pops 0.00132759996379
11. Joy Division 0.00127387148041
12. Helen Jepson 0.00127184002859
13. Kiss 0.0012040341883
14. New Order 0.00119613435544
15. The Cure 0.00118014680383
16. Elvis Presley 0.00116872950535
17. Bruce Springsteen 0.00104647778623
18. The Rolling Stones 0.00100432757706
19. Louis Armstrong 0.000998215980967
20. Guy Lombardo 0.000983772861698
21. AC/DC 0.000959007294547
22. Led Zeppelin 0.000934777187908
23. Pink Floyd 0.000933960159308
24. Paul Mickelson 0.000927527688219
25. Caravelli 0.000926967108266


# Run PageRank & HITS on AllMusic Influence Graph

In [6]:
influence_pagerank_dict = nx.pagerank(influence_graph_reversed)

In [7]:
# Sort by value, in descending order
influence_sorted_pagerank = sorted(influence_pagerank_dict.items(), key=lambda x: x[1], reverse=True)

In [8]:
for i, (id, pr) in enumerate(influence_sorted_pagerank[:25]):
    print str(i+1) + '.', influence_id_to_artist[id], pr

1. Louis Armstrong 0.00723579416235
2. Scott Joplin 0.0069225229649
3. The Beatles 0.00642019485134
4. Charley Patton 0.00484325175019
5. Jelly Roll Morton 0.00465115405069
6. Uncle Dave Macon 0.00447063070519
7. Fats Waller 0.00427570727595
8. Bob Dylan 0.00374972007327
9. Jimmie Rodgers 0.00357579092837
10. James Brown 0.00350958350175
11. King Oliver 0.00336848227306
12. James P. Johnson 0.00319789547229
13. Duke Ellington 0.00317864858045
14. Chuck Berry 0.00305074471994
15. Louis Jordan 0.00303283909014
16. W.C. Handy 0.00298178797717
17. Mike Walbridge 0.00292282573903
18. The Rolling Stones 0.00287099431177
19. Blind Lemon Jefferson 0.00276100426908
20. The Mills Brothers 0.00270951367635
21. The Velvet Underground 0.00265674847772
22. Bessie Smith 0.00249454086781
23. Little Richard 0.00246646882579
24. Hobart Smith 0.00245348444039
25. Jimi Hendrix 0.00242844811919


In [11]:
# Prepare for pretty printing for insertion into Latex
table_matrix = []

for i, (id, pr) in enumerate(influence_sorted_pagerank[:25]):
    table_matrix.append([influence_id_to_artist[id], pr])

In [18]:
print tabulate.tabulate(table_matrix, headers=['Artist', 'PageRank'],tablefmt='latex')

\begin{tabular}{lr}
\hline
 Artist                 &   PageRank \\
\hline
 Louis Armstrong        & 0.00723579 \\
 Scott Joplin           & 0.00692252 \\
 The Beatles            & 0.00642019 \\
 Charley Patton         & 0.00484325 \\
 Jelly Roll Morton      & 0.00465115 \\
 Uncle Dave Macon       & 0.00447063 \\
 Fats Waller            & 0.00427571 \\
 Bob Dylan              & 0.00374972 \\
 Jimmie Rodgers         & 0.00357579 \\
 James Brown            & 0.00350958 \\
 King Oliver            & 0.00336848 \\
 James P. Johnson       & 0.0031979  \\
 Duke Ellington         & 0.00317865 \\
 Chuck Berry            & 0.00305074 \\
 Louis Jordan           & 0.00303284 \\
 W.C. Handy             & 0.00298179 \\
 Mike Walbridge         & 0.00292283 \\
 The Rolling Stones     & 0.00287099 \\
 Blind Lemon Jefferson  & 0.002761   \\
 The Mills Brothers     & 0.00270951 \\
 The Velvet Underground & 0.00265675 \\
 Bessie Smith           & 0.00249454 \\
 Little Richard         & 0.00246647 \\
 Hobar

In [59]:
influence_hubs, influence_authorities = nx.hits(influence_graph_reversed)

In [60]:
influence_sorted_hubs = sorted(influence_hubs.items(), key=lambda x: x[1], reverse=True)
influence_sorted_authorities = sorted(influence_authorities.items(), key=lambda x: x[1], reverse=True)

In [76]:
for i, (id, val) in enumerate(influence_sorted_hubs[:25]):
    print str(i+1) + '.', influence_id_to_artist[id], val

1. Alice Cooper 0.00110348982188
2. Big Star 0.00109370733013
3. David Bowie 0.00108967887121
4. Pretenders 0.00105115555192
5. Kiss 0.00104831566171
6. Marc Bolan 0.00104660794901
7. Cheap Trick 0.00103559410147
8. R.E.M. 0.0010314313059
9. Elvis Costello 0.00102806831943
10. Matthew Sweet 0.00102232747235
11. The Smithereens 0.00102188840634
12. Nirvana 0.00101800204409
13. U2 0.00100942903629
14. The Feelies 0.00100240812138
15. Pixies 0.00100149804361
16. Guns N' Roses 0.000994724252373
17. Alex Chilton 0.000988965933535
18. Queen 0.00098342856001
19. Aerosmith 0.00097653598413
20. The Replacements 0.000970015466892
21. Mott the Hoople 0.000968772419745
22. Meat Puppets 0.000967947254137
23. Blur 0.000964394519206
24. Tom Petty 0.000963784215781
25. Ramones 0.000959161467634


# PageRank Intersection

In [137]:
set([influence_id_to_artist[id] for (id, pr) in influence_sorted_pagerank[:25]]).intersection(set([cover_id_to_artist[id] for (id, pr) in sorted_pagerank_first[:25]]))

{'Bob Dylan',
 'Duke Ellington',
 'Louis Armstrong',
 'The Beatles',
 'The Rolling Stones'}