#### Two functions present ####
1  frequency_songs_together_by_name  
2  frequency_songs_together_by_id

These functions allow the user to input eather the names or the Spotify ID numbers of two songs, and they return
the fraction of times those two songs occur together in the million playlists

In [1]:
import numpy as np
import pandas as pd
import os

In [108]:
# make a list of the playlist files songs0.txt, songs1.txt, ...
filelist = []
for i in range(10):  # search 100,000 playlists
    file = 'songs'+str(i)+'.csv'
    filelist.append(file)

In [80]:
def frequency_songs_together_by_name(song1, song2, filelist):
    '''Function that searchs playlists in songs files and returns the fraction of times the
       two input songs appear together on the same playlist and the numbers of the playlists

        Input: 
           song1 (string) name of one of the song to search for
           song2 (string) name of the other song to search for
           filelist (list) list of file names to read playlists from

        Output:
           num_playlists_searched (int), number of playlists searched
           num_playlists_occuring_together (int), number of playlists with both song1 and song2
           fraction_together (int), fraction of playlist that contain both songs
           playlist_list (list), list of playlists that contain both songs
    '''
    playlist_list = []
    num_playlists_searched = 0 
    num_playlists_occuring_together = 0
    for file in filelist:
        df = pd.read_csv('RAW_DATA/songs/'+ file, encoding='utf-8')  # read file to df
        g = df.groupby('pid')                     # split into separate playlists
        for name, group in g:                     # for each playlist
            num_playlists_searched += 1
            a = group['track_name'].str.lower()
            if (a.str.contains(song1.lower()).any()) & (a.str.contains(song2.lower()).any()):
                num_playlists_occuring_together +=1
                playlist_list.append(name)
    fraction_together = num_playlists_occuring_together/num_playlists_searched
    return fraction_together, num_playlists_occuring_together, num_playlists_searched, playlist_list 

In [72]:
def frequency_songs_together_by_id(song1, song2, filelist):
    '''Function that searchs playlists in songs files and returns the fraction of times the
       two input songs appear together on the same playlist and the numbers of the playlists

        Input: 
           song1 (string) spotify id number one of the songs to search for
           song2 (string) spotify id number of the other song to search for
           filelist (list) list of file names to read playlists from

        Output:
           num_playlists_searched (int), number of playlists searched
           num_playlists_occuring_together (int), number of playlists with both song1 and song2
           fraction_together (int), fraction of playlist that contain both songs
           playlist_list (list), list of playlists that contain both songs
    '''
    playlist_list = []
    num_playlists_searched = 0 
    num_playlists_occuring_together = 0
    for file in filelist:
        df = pd.read_csv('RAW_DATA/songs/'+ file, encoding='utf-8')  # read file to df
        g = df.groupby('pid')                     # split into separate playlists
        for name, group in g:                     # for each playlist
            num_playlists_searched += 1
            a = group['track_uri'].str.lower()
            if (a.str.contains(song1.lower()).any()) & (a.str.contains(song2.lower()).any()):
                num_playlists_occuring_together +=1
                playlist_list.append(name)
    fraction_together = num_playlists_occuring_together/num_playlists_searched
    return fraction_together, num_playlists_occuring_together, num_playlists_searched, playlist_list 