In [7]:
import os,re
import requests as rq
import numpy as np
import pandas as pd
import networkx as nx
#from networkx.algorithms import community
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
import json
import community

In [2]:
def get_rappers():
    
    def validate(name):
        # checks if the name has a
        illegal_chars = "|?:^*\""

        for c in illegal_chars:
            if c == '|':
                if name.find(c) > 0:
                    name = name.split("|")[1]
            if c == "\"":
                if name.find(c) > 0:
                    name = name.replace(r'"','',2)
            else: 
                name = name.replace(c,'')
        return name
    
    with open("data/rappers.txt".decode('utf-8'), "r") as f:
        rappers = f.read()
        
    return [validate(rapper).strip() for rapper in rappers.split("\n")]
rappers = get_rappers()

In [3]:
def find_collaborators(r):
    with open(("data/albums/%s.txt" % r.decode('utf-8')), "r") as fp:
        albums = eval(fp.read())
        
    artists = []
    for album in albums: 
        for artist in album['tracks']:
            for a in artist['collaborators']:
                artists.append(a)
    return artists

In [4]:
def get_collaborator_count(collaborators, r):
    #creating a dictionary from the collaborators to count the number of collaborations for weighted edges
    col_dict = {}
    for c in collaborators:
        #don't include self as a rapper
        if c == r: 
            continue
        #add new collaborators to the dict with a count of 1
        if c not in col_dict: 
            col_dict[c] =  1
        #add one to the count of previous collaborators
        else: 
            col_dict[c] += 1
    return col_dict  

In [19]:
def save_network(network):
    with open(('data/networks/%s.json' % network), 'w') as fp:
        json.dump(nx.node_link_data(network), fp)

In [20]:
def create_network(rappers, network): 
    
    errors = []
    for r in list(rappers):
        try: 
            #get the collaborators from the data file in a list 
            collaborators = find_collaborators(r)
            
        except IndexError: 
            errors.append(r)
            continue
        except SyntaxError:
            errors.append(r)
            continue
        
        col_dict = get_collaborator_count(collaborators, r)
        
        for c in set(collaborators):
            #don't include self as a collaborator
            if c == r: 
                continue
            if c not in list(rappers):
                continue
            else:
                #get the weight for the edge as number of collaborations from dict
                w = col_dict[c]
                #add the weighted edges
                network.add_edge(r,c,weight=w)

                if network.degree(c) == 1 and w == 1:
                    network.remove_node(c)
    
    #sort the network into communities using the python-louvain method
    bb = community.best_partition(network)  # dict of node-community pairs
    nx.set_node_attributes(network, bb, 'group')
    
    #save the network data in a file so that you can look at it later
    return save_network(network)
    
   

In [21]:
collaboration_network_communities = nx.Graph()
comms = create_network(rappers, collaboration_network_communities)

In [178]:
with open('rappers_network.json', 'w') as fp:
    json.dump(nx.node_link_data(collaboration_network), fp)

In [16]:
with open('rappers_network_communities.json', 'w') as fp:
    json.dump(nx.node_link_data(collaboration_network_communities), fp)