In [17]:
import os,re
import requests as rq
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import json
from netwulf import visualize
import community

In [18]:
def get_rappers():
    
    def validate(name):
        # checks if the name has a
        illegal_chars = "|?:^*\""

        for c in illegal_chars:
            if c == '|':
                if name.find(c) > 0:
                    name = name.split("|")[1]
            if c == "\"":
                if name.find(c) > 0:
                    name = name.replace(r'"','',2)
            else: 
                name = name.replace(c,'')
        return name
    
    with open("data/rappers.txt", "r", encoding='utf-8') as f:
        rappers = f.read()
        
    return [validate(rapper).strip() for rapper in rappers.split("\n")]
rappers = get_rappers()

In [19]:
def find_collaborators(r):
    with open(("data/albums/%s.txt" % r), "r", encoding='utf-8') as fp:
        albums = eval(fp.read())
        
    artists = []
    for album in albums: 
        for artist in album['tracks']:
            for a in artist['collaborators']:
                artists.append(a)
    return artists

In [20]:
def get_collaborator_count(collaborators, r):
    #creating a dictionary from the collaborators to count the number of collaborations for weighted edges
    col_dict = {}
    for c in collaborators:
        #don't include self as a rapper
        if c == r: 
            continue
        #add new collaborators to the dict with a count of 1
        if c not in col_dict: 
            col_dict[c] =  1
        #add one to the count of previous collaborators
        else: 
            col_dict[c] += 1
    return col_dict  

In [86]:
def save_network(network):
    with open(("data/networks/%s.json" % network), "w") as fp:
        json.dump(nx.node_link_data(network), fp)
        print("network saved")

In [110]:
def get_senti():
    # this function returns a dictionary of the aggression scores with artists names as the keys
    # it also subtracts the aggression scores from 1, so that all the values will be positive and the 
    with open('data/senti.txt', "r") as fp:
        senti_scores = eval(fp.read())
        for k in senti_scores:
            # subtract from one so that they are all positive and the more aggressive scores (already negative) are larger
            # exponant 10 so that the differences between aggression scores is easier to visualize 
            (senti_scores[k]) = (1 - (senti_scores[k])) ** 10        
    return senti_scores


In [109]:
def get_lim_artists():
    # this function creates a limited list of artists from the data we were 
    # able to scrape on lyrics so we can make a preliminary network
    with open('data/senti.txt', "r") as fp:
        senti_scores = eval(fp.read())
    return senti_scores.keys()
lim_artists = get_lim_artists()        

In [114]:
len(lim_artists)

464

In [112]:
def create_network(rappers, network): 
    
    for r in list(rappers):
        try: 
            #get the collaborators from the data file in a list 
            collaborators = find_collaborators(r)
            
        except IndexError: 
            errors.append(r)
            continue
        except SyntaxError:
            errors.append(r)
            continue
        
        col_dict = get_collaborator_count(collaborators, r)
        
        for c in set(collaborators):
            #don't include self as a collaborator
            if c == r: 
                continue
            if c not in list(rappers):
                continue
            else:
                #get the weight for the edge as number of collaborations from dict
                w = col_dict[c]
                #add the weighted edges
                network.add_edge(r,c,weight=w)

                if network.degree(c) == 1 and w == 1:
                    network.remove_node(c)
    
    #set the node sizes to the aggression scores
    node_sizes = get_senti()
    nx.set_node_attributes(network, node_sizes, 'size')
    
    #sort the network into communities using the python-louvain method
    bb = community.best_partition(network)  # dict of node-community pairs
    nx.set_node_attributes(network, bb, 'group')
    
    '''#save the network data in a file so that you can look at it later
    with open(("data/networks/%s.json" % network), "w") as fp:
        json.dump(nx.node_link_data(network), fp)
        print("network saved")
    #save_network(network)'''
    
   

In [115]:
prelim_aggression_network_464 = nx.Graph()
create_network(lim_artists, prelim_aggression_network_464)

network saved


In [116]:
with open('data/networks/prelim_aggression_network_464.json', 'w') as fp:
    json.dump(nx.node_link_data(prelim_aggression_network_464), fp)

In [105]:
!git add .

The file will have its original line endings in your working directory.


In [106]:
!git commit -m "node sizes added"

[master d90b9bd] node sizes added
 3 files changed, 1003 insertions(+), 1239 deletions(-)
 rewrite create_network.ipynb (87%)
 rewrite data/networks/.json (96%)
 create mode 100644 data/networks/prelim_aggression_network_633.json


In [107]:
!git pull origin master

Removing data/.ipynb_checkpoints/senti-checkpoint.txt
Merge made by the 'recursive' strategy.
 data/.ipynb_checkpoints/dictionay-checkpoint.txt |   1 +
 data/.ipynb_checkpoints/done-checkpoint.txt      | 169 -----------------------
 data/.ipynb_checkpoints/senti-checkpoint.txt     |   1 -
 data/done.txt                                    | 169 -----------------------
 data/senti.txt                                   |   2 +-
 lyrics_analysis.ipynb                            |  61 ++++----
 6 files changed, 38 insertions(+), 365 deletions(-)
 create mode 100644 data/.ipynb_checkpoints/dictionay-checkpoint.txt
 delete mode 100644 data/.ipynb_checkpoints/senti-checkpoint.txt


From https://github.com/allengueco/rap_aggression
 * branch            master     -> FETCH_HEAD
   5744406..dbd465e  master     -> origin/master


In [108]:
!git push origin master

To https://github.com/allengueco/rap_aggression.git
   dbd465e..33e691b  master -> master
