In [17]:
import numpy as np
import pandas as pd
import networkx as nx

# Load Data

In [2]:
# Load reversed cover graph (i.e. edge (u, v) denotes u preceded v in the cover chain)
cover_graph_reversed = nx.read_edgelist('networks/cover_graph_reversed.txt')

In [18]:
# Load cover songs data
covers = pd.read_csv('data/secondhandsongs/covers.csv')

In [19]:
covers.head()

Unnamed: 0,work_id,work,adapted_work_id,adapted_work,performance_id,performance,performer_id,performer,artist_ids,artists,release_date,performance_date
0,1,Petite fleur,1,Petite fleur,1,Petite fleur,1.0,Sidney Bechet,1,Sidney Bechet,1952,1952-01-21
1,1,Petite fleur,1,Petite fleur,2,Petite fleur,3.0,Chris Barber's Jazz Band,113638,Chris Barber Band,1959,
2,1,Petite fleur,1,Petite fleur,39127,Petite fleur,494.0,Charlie Byrd,494,Charlie Byrd,2000-03-14,
3,1,Petite fleur,1,Petite fleur,40915,Petite fleur,12897.0,Laurindo Almeida & The Bossa Nova Allstars,12898,Laurindo Almeida,1962,
4,1,Petite fleur,1,Petite fleur,66022,Petite fleur,27422.0,"Bart Voet, Louis Debij, Sam Vloemans, Esmé Bos...","31845, 31849, 31846, 31847, 31848","Bart Voet, Gulli Gudmundsson, Louis Debij, Sam...",2001,


In [54]:
# Load allmusic influence graph
influence_graph = nx.read_edgelist('networks/allmusic_influence_graph.txt', create_using=nx.DiGraph(), nodetype=int)
# Get reversed version (i.e. edge (u, v) denotes artist u was influenced by v)
influence_graph_reversed = nx.reverse(influence_graph)

In [64]:
# Load allmusic artist information
artists = pd.read_csv('data/allmusic/artists_cleaned.csv')

In [67]:
# Create dictionary mapping from artist id to artist name
influence_id_to_artist = pd.Series(artists['name'].values, index=artists['id'].values).to_dict()

# Run PageRank & HITS on Reversed Cover Song Graph

In [78]:
pagerank_dict = nx.pagerank(cover_graph_reversed)

In [14]:
# Sort by value, in descending order
sorted_pagerank = sorted(pagerank_dict.items(), key=lambda x: x[1], reverse=True)

In [30]:
sorted_pagerank[:5]

[(u'78114', 0.0010723592123481627),
 (u'8008', 0.0007595286026947051),
 (u'148', 0.0005942967347706583),
 (u'55509', 0.0005577869889327653),
 (u'103', 0.0004951883219657681)]

In [28]:
hubs, authorities = nx.hits(cover_graph_reversed)

In [38]:
sorted_hubs = sorted(hubs.items(), key=lambda x: x[1], reverse=True)
sorted_authorities = sorted(authorities.items(), key=lambda x: x[1], reverse=True)

In [39]:
sorted_hubs[:10]

[(u'816', 0.0015832143589290075),
 (u'319', 0.001460350301741755),
 (u'12626', 0.0014495832609566267),
 (u'2682', 0.0013677736596585989),
 (u'979', 0.001326868365627356),
 (u'2193', 0.001304841976290107),
 (u'5308', 0.0012689675860727374),
 (u'519', 0.001230118817490824),
 (u'7335', 0.0012066148892763019),
 (u'1095', 0.0011928431910041674)]

In [40]:
sorted_authorities[:10]

[(u'816', 0.0015832143589270145),
 (u'319', 0.0014603503017396806),
 (u'12626', 0.0014495832609563474),
 (u'2682', 0.0013677736596582177),
 (u'979', 0.0013268683656257013),
 (u'2193', 0.0013048419762881572),
 (u'5308', 0.001268967586069557),
 (u'519', 0.0012301188174897789),
 (u'7335', 0.001206614889277478),
 (u'1095', 0.0011928431910039755)]

# Run PageRank & HITS on Reversed Cover Song Graph

In [56]:
influence_pagerank_dict = nx.pagerank(influence_graph_reversed)

In [57]:
# Sort by value, in descending order
influence_sorted_pagerank = sorted(influence_pagerank_dict.items(), key=lambda x: x[1], reverse=True)

In [75]:
for i, (id, pr) in enumerate(influence_sorted_pagerank[:25]):
    print str(i+1) + '.', influence_id_to_artist[id], pr

1. Louis Armstrong 0.00723579416235
2. Scott Joplin 0.0069225229649
3. The Beatles 0.00642019485134
4. Charley Patton 0.00484325175019
5. Jelly Roll Morton 0.00465115405069
6. Uncle Dave Macon 0.00447063070519
7. Fats Waller 0.00427570727595
8. Bob Dylan 0.00374972007327
9. Jimmie Rodgers 0.00357579092837
10. James Brown 0.00350958350175
11. King Oliver 0.00336848227306
12. James P. Johnson 0.00319789547229
13. Duke Ellington 0.00317864858045
14. Chuck Berry 0.00305074471994
15. Louis Jordan 0.00303283909014
16. W.C. Handy 0.00298178797717
17. Mike Walbridge 0.00292282573903
18. The Rolling Stones 0.00287099431177
19. Blind Lemon Jefferson 0.00276100426908
20. The Mills Brothers 0.00270951367635
21. The Velvet Underground 0.00265674847772
22. Bessie Smith 0.00249454086781
23. Little Richard 0.00246646882579
24. Hobart Smith 0.00245348444039
25. Jimi Hendrix 0.00242844811919


In [59]:
influence_hubs, influence_authorities = nx.hits(influence_graph_reversed)

In [60]:
influence_sorted_hubs = sorted(influence_hubs.items(), key=lambda x: x[1], reverse=True)
influence_sorted_authorities = sorted(influence_authorities.items(), key=lambda x: x[1], reverse=True)

In [76]:
for i, (id, val) in enumerate(influence_sorted_hubs[:25]):
    print str(i+1) + '.', influence_id_to_artist[id], val

1. Alice Cooper 0.00110348982188
2. Big Star 0.00109370733013
3. David Bowie 0.00108967887121
4. Pretenders 0.00105115555192
5. Kiss 0.00104831566171
6. Marc Bolan 0.00104660794901
7. Cheap Trick 0.00103559410147
8. R.E.M. 0.0010314313059
9. Elvis Costello 0.00102806831943
10. Matthew Sweet 0.00102232747235
11. The Smithereens 0.00102188840634
12. Nirvana 0.00101800204409
13. U2 0.00100942903629
14. The Feelies 0.00100240812138
15. Pixies 0.00100149804361
16. Guns N' Roses 0.000994724252373
17. Alex Chilton 0.000988965933535
18. Queen 0.00098342856001
19. Aerosmith 0.00097653598413
20. The Replacements 0.000970015466892
21. Mott the Hoople 0.000968772419745
22. Meat Puppets 0.000967947254137
23. Blur 0.000964394519206
24. Tom Petty 0.000963784215781
25. Ramones 0.000959161467634


In [77]:
for i, (id, val) in enumerate(influence_sorted_authorities[:25]):
    print str(i+1) + '.', influence_id_to_artist[id], val

1. The Beatles 0.0317775767464
2. The Rolling Stones 0.0160075987704
3. Bob Dylan 0.0156421704949
4. The Kinks 0.0113188012462
5. The Beach Boys 0.00927880102331
6. The Velvet Underground 0.00906398551774
7. The Byrds 0.00859470575419
8. The Who 0.0084090816982
9. David Bowie 0.00836046288874
10. Jimi Hendrix 0.00733849735106
11. Led Zeppelin 0.0067853388965
12. Neil Young 0.00662716521235
13. Chuck Berry 0.00592863930699
14. The Stooges 0.00569313144221
15. Pink Floyd 0.00564405026675
16. Sex Pistols 0.00522758290692
17. The Yardbirds 0.00470271466336
18. Elvis Presley 0.00458206764831
19. Big Star 0.00449945346147
20. Ramones 0.00439805379567
21. The Clash 0.00423098359496
22. Hank Williams 0.00389138214382
23. Cream 0.00368365489596
24. Buddy Holly 0.00364467760073
25. Black Sabbath 0.00353587360922
