In [17]:
import os,re
import requests as rq
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import json
from netwulf import visualize
import community

In [18]:
def get_rappers():
    
    def validate(name):
        # checks if the name has a
        illegal_chars = "|?:^*\""

        for c in illegal_chars:
            if c == '|':
                if name.find(c) > 0:
                    name = name.split("|")[1]
            if c == "\"":
                if name.find(c) > 0:
                    name = name.replace(r'"','',2)
            else: 
                name = name.replace(c,'')
        return name
    
    with open("data/rappers.txt", "r", encoding='utf-8') as f:
        rappers = f.read()
        
    return [validate(rapper).strip() for rapper in rappers.split("\n")]
rappers = get_rappers()

In [19]:
def find_collaborators(r):
    with open(("data/albums/%s.txt" % r), "r", encoding='utf-8') as fp:
        albums = eval(fp.read())
        
    artists = []
    for album in albums: 
        for artist in album['tracks']:
            for a in artist['collaborators']:
                artists.append(a)
    return artists

In [20]:
def get_collaborator_count(collaborators, r):
    #creating a dictionary from the collaborators to count the number of collaborations for weighted edges
    col_dict = {}
    for c in collaborators:
        #don't include self as a rapper
        if c == r: 
            continue
        #add new collaborators to the dict with a count of 1
        if c not in col_dict: 
            col_dict[c] =  1
        #add one to the count of previous collaborators
        else: 
            col_dict[c] += 1
    return col_dict  

In [21]:
def save_network(network):
    with open(('data/networks/%s.json' % network), 'w') as fp:
        return json.dump(nx.node_link_data(network), fp)

In [50]:
def get_senti():
    # this function returns a dictionary of the sentiment scores with artists names as the keys
    with open('data/senti.txt', "r") as fp:
        senti_scores = eval(fp.read())
    return senti_scores

In [49]:
def get_lim_artists():
    # this function creates a limited list of artists from the data we were 
    # able to scrape on lyrics so we can make a preliminary network
    with open('data/senti.txt', "r") as fp:
        senti_scores = eval(fp.read())
    return senti_scores.keys()
lim_artists = get_lim_artists()        

In [25]:
def create_network(rappers, network): 
    
    for r in list(rappers):
        try: 
            #get the collaborators from the data file in a list 
            collaborators = find_collaborators(r)
            
        except IndexError: 
            errors.append(r)
            continue
        except SyntaxError:
            errors.append(r)
            continue
        
        col_dict = get_collaborator_count(collaborators, r)
        
        for c in set(collaborators):
            #don't include self as a collaborator
            if c == r: 
                continue
            if c not in list(rappers):
                continue
            else:
                #get the weight for the edge as number of collaborations from dict
                w = col_dict[c]
                #add the weighted edges
                network.add_edge(r,c,weight=w)

                if network.degree(c) == 1 and w == 1:
                    network.remove_node(c)
    
    #set the node sizes to the aggression scores
    node_sizes = get_senti()
    nx.set_node_attributes(network, node_sizes, 'size')
    
    #sort the network into communities using the python-louvain method
    bb = community.best_partition(network)  # dict of node-community pairs
    nx.set_node_attributes(network, bb, 'group')
    
    #save the network data in a file so that you can look at it later
    return save_network(network)
    
   

In [45]:
collaboration_network_sentiment2 = nx.Graph()
comms = create_network(lim_artists, collaboration_network_sentiment2)

In [178]:
with open('rappers_network.json', 'w') as fp:
    json.dump(nx.node_link_data(collaboration_network), fp)

In [46]:
with open('rappers_network_sentiment2.json', 'w') as fp:
    json.dump(nx.node_link_data(collaboration_network_sentiment2), fp)

In [51]:
plt.figure(figsize=[20,20])
nx.draw(collaboration_network_sentiment2)
plt.show()

In [12]:
!git add .

The file will have its original line endings in your working directory.
The file will have its original line endings in your working directory.


In [13]:
!git commit -m "working on node size"

[master a66985e] working on node size
 10 files changed, 477 insertions(+), 34 deletions(-)
 create mode 100644 .ipynb_checkpoints/collaboration_network_communities image-checkpoint.png
 create mode 100644 .ipynb_checkpoints/large network image-checkpoint.png
 create mode 100644 data/.ipynb_checkpoints/senti-checkpoint.txt
 rewrite data/networks/.json (99%)
 create mode 100644 large network image.png
 create mode 100644 rappers_network_nodes.json
 create mode 100644 rappers_network_senti.json
 create mode 100644 rappers_network_senti_reg.json


In [14]:
!git pull origin master

Merge made by the 'recursive' strategy.
 .../Casey Veggies-checkpoint.txt                   |    0
 .../.ipynb_checkpoints/Celph Titled-checkpoint.txt |    0
 data/lyrics/Canibus.txt                            |    1 +
 data/lyrics/Capital Steez.txt                      |    1 +
 data/lyrics/Capone.txt                             |    1 +
 data/lyrics/Cappadonna.txt                         |    1 +
 data/lyrics/Cardi B.txt                            |    1 +
 data/lyrics/Casey Veggies.txt                      |    1 +
 data/lyrics/Cash Out.txt                           |    1 +
 data/lyrics/Cashis.txt                             |    0
 data/lyrics/Caskey.txt                             |    1 +
 data/lyrics/Casper Nyovest.txt                     |    0
 data/lyrics/Cassidy.txt                            |    1 +
 data/lyrics/Cazwell.txt                            |    1 +
 data/lyrics/CeeLo Green.txt                        |    1 +
 data/lyrics/Cellski.txt                            |

From https://github.com/allengueco/rap_aggression
 * branch            master     -> FETCH_HEAD
   a599cb7..1103863  master     -> origin/master


In [15]:
!git push origin master

To https://github.com/allengueco/rap_aggression.git
   1103863..750f944  master -> master
