In [17]:
import os,re
import requests as rq
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import json
from netwulf import visualize
import community

In [18]:
def get_rappers():
    
    def validate(name):
        # checks if the name has a
        illegal_chars = "|?:^*\""

        for c in illegal_chars:
            if c == '|':
                if name.find(c) > 0:
                    name = name.split("|")[1]
            if c == "\"":
                if name.find(c) > 0:
                    name = name.replace(r'"','',2)
            else: 
                name = name.replace(c,'')
        return name
    
    with open("data/rappers.txt", "r", encoding='utf-8') as f:
        rappers = f.read()
        
    return [validate(rapper).strip() for rapper in rappers.split("\n")]
rappers = get_rappers()

In [19]:
def find_collaborators(r):
    with open(("data/albums/%s.txt" % r), "r", encoding='utf-8') as fp:
        albums = eval(fp.read())
        
    artists = []
    for album in albums: 
        for artist in album['tracks']:
            for a in artist['collaborators']:
                artists.append(a)
    return artists

In [20]:
def get_collaborator_count(collaborators, r):
    #creating a dictionary from the collaborators to count the number of collaborations for weighted edges
    col_dict = {}
    for c in collaborators:
        #don't include self as a rapper
        if c == r: 
            continue
        #add new collaborators to the dict with a count of 1
        if c not in col_dict: 
            col_dict[c] =  1
        #add one to the count of previous collaborators
        else: 
            col_dict[c] += 1
    return col_dict  

In [86]:
def save_network(network):
    with open(("data/networks/%s.json" % network), "w") as fp:
        json.dump(nx.node_link_data(network), fp)
        print("network saved")

In [102]:
def get_senti():
    # this function returns a dictionary of the aggression scores with artists names as the keys
    # it also subtracts the aggression scores from 1, so that all the values will be positive and the 
    with open('data/senti.txt', "r") as fp:
        senti_scores = eval(fp.read())
        for k in senti_scores:
            # subtract from one so that they are all positive and the more aggressive scores (already negative) are larger
            # exponant 10 so that the differences between aggression scores is easier to visualize 
            (senti_scores[k]) = (1 - (senti_scores[k])) ** 10        
    return senti_scores


In [96]:
def get_lim_artists():
    # this function creates a limited list of artists from the data we were 
    # able to scrape on lyrics so we can make a preliminary network
    with open('data/senti.txt', "r") as fp:
        senti_scores = eval(fp.read())
    return senti_scores.keys()
lim_artists = get_lim_artists()        

In [103]:
get_senti()

{'Adam Saleh': 9765625.0,
 'K.A.A.N.': 2.439214067649521,
 'BewhY': 1.1167752601643648,
 'Buckshot': 3.1127221006947963,
 'C-Note': 2.1904911432040053,
 'Fiend': 3.0913043423808264,
 'Kutt Calhoun': 2.025227269550757,
 'Big Moe': 1.9914681589137302,
 'Greydon Square': 1.4467780910516743,
 'G-Eazy': 1.8620744031757492,
 'Dominique Young Unique': 9765625.0,
 'Kool Moe Dee': 1.3280305108063961,
 'Daz Dillinger': 4.690315351012273,
 'Eminem': 2.054798717513978,
 'Lil Debbie': 6.572517583238942,
 "K'naan": 1.0815550118766193,
 'Full Blooded': 9765625.0,
 'Kent Jones': 9765625.0,
 'Esham': 4.9105557659151176,
 'Apache': 1.2241977544301497,
 "D'Angelo": 0.8157206227120019,
 'A$AP Yams': 9765625.0,
 'Agallah': 9765625.0,
 'Killah Priest': 1.6321112243281701,
 'Criminal Manne': 4.50888819888727,
 'Ahmad Lewis': 9765625.0,
 'Koolade': 9765625.0,
 'DJ Drama': 2.404494217008267,
 '2Mex': 1.5345375760386701,
 'C-Bo': 5.228812928603104,
 'Flo Rida': 0.7382557972787647,
 'Jadakiss': 2.542714747711122

In [98]:
def create_network(rappers, network): 
    
    for r in list(rappers):
        try: 
            #get the collaborators from the data file in a list 
            collaborators = find_collaborators(r)
            
        except IndexError: 
            errors.append(r)
            continue
        except SyntaxError:
            errors.append(r)
            continue
        
        col_dict = get_collaborator_count(collaborators, r)
        
        for c in set(collaborators):
            #don't include self as a collaborator
            if c == r: 
                continue
            if c not in list(rappers):
                continue
            else:
                #get the weight for the edge as number of collaborations from dict
                w = col_dict[c]
                #add the weighted edges
                network.add_edge(r,c,weight=w)

                if network.degree(c) == 1 and w == 1:
                    network.remove_node(c)
    
    #set the node sizes to the aggression scores
    node_sizes = get_senti()
    nx.set_node_attributes(network, node_sizes, 'size')
    
    #sort the network into communities using the python-louvain method
    bb = community.best_partition(network)  # dict of node-community pairs
    nx.set_node_attributes(network, bb, 'group')
    
    #save the network data in a file so that you can look at it later
    with open(("data/networks/%s.json" % network), "w") as fp:
        json.dump(nx.node_link_data(network), fp)
        print("network saved")
    #save_network(network)
    
   

In [99]:
prelim_aggression_network_633 = nx.Graph()
create_network(lim_artists, prelim_aggression_network_633)

network saved


In [104]:
with open('data/networks/prelim_aggression_network_633.json', 'w') as fp:
    json.dump(nx.node_link_data(prelim_aggression_network_633), fp)

In [92]:
!git add .

The file will have its original line endings in your working directory.


In [93]:
!git commit -m "node sizes added"

[master c9bb552] node sizes added
 12 files changed, 1012 insertions(+), 9 deletions(-)
 rename rappers_network_communities.json => data/networks/rappers_network_communities.json (100%)
 rename rappers_network_nodes.json => data/networks/rappers_network_nodes.json (100%)
 rename rappers_network_senti.json => data/networks/rappers_network_senti.json (100%)
 rename rappers_network_senti_reg.json => data/networks/rappers_network_senti_reg.json (100%)
 rename rappers_network_sentiment.json => data/networks/rappers_network_sentiment.json (100%)
 rename rappers_network_sentiment2.json => data/networks/rappers_network_sentiment2.json (100%)
 rename IMG-4472.JPG => images/IMG-4472.JPG (100%)
 rename collaboration_network_communities image.png => images/collaboration_network_communities image.png (100%)
 rename collaborators_network_image.png => images/collaborators_network_image.png (100%)
 rename large network image.png => images/large network image.png (100%)
 rename preliminary aggresion ne

In [94]:
!git pull origin master

Merge made by the 'recursive' strategy.
 Untitled.ipynb                               |   10 +-
 data/.ipynb_checkpoints/done-checkpoint.txt  |  633 +++++++++++
 data/.ipynb_checkpoints/senti-checkpoint.txt |    2 +-
 data/dictionay.txt                           |    1 +
 data/done.txt                                |  634 ++++++++++-
 data/senti.txt                               |    2 +-
 get_lyrics.ipynb                             |    4 +-
 lyrics_analysis.ipynb                        | 1524 ++------------------------
 8 files changed, 1370 insertions(+), 1440 deletions(-)
 create mode 100644 data/dictionay.txt


From https://github.com/allengueco/rap_aggression
 * branch            master     -> FETCH_HEAD
   c534609..89eee14  master     -> origin/master


In [95]:
!git push origin master

To https://github.com/allengueco/rap_aggression.git
   89eee14..5744406  master -> master
