In this notebook I will wrangle the data for the flop songs (non-hits). I will do this using a random name generator that I will feed to a search function. I will order the results by album release date and then pick a song whose URI is NOT in the billboard dataset.

There are two conditions:
1. The song cannot be part of the billboard hit data
2. The song needs to have been released in the same time period (2001-2020)

Let's get started.

I have downloaded a a list of random english words from MIT that is stored in a csv called Random_Words.csv. 

I will randomly pick words from this list that will be the basis for the tracknames of the flop songs

In [112]:
# Import Modules

import pandas as pd
import random
import datetime
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from random import sample
import math

In [50]:
#import csvs with billboard data and random word list
random_words = pd.read_csv('Random_Words.csv')
billboard = pd.read_csv('BillboardWithURI.csv')

In [51]:
random_words


Unnamed: 0,words
0,a
1,aa
2,aaa
3,aaron
4,ab
...,...
9995,zope
9996,zshops
9997,zu
9998,zum


In [113]:
# get api key from your .env file

import os

from dotenv import load_dotenv

from pathlib import Path

 

dotenv_path = Path('./SpotifyAPIKey.env')

load_dotenv(dotenv_path=dotenv_path)

 

#load_dotenv()

API_CLIENT_ID = os.getenv('SPOTIFY_CLIENT_ID')
API_CLIENT_SECRET = os.getenv('SPOTIFY_CLIENT_SECRET')
 
print(API_CLIENT_ID)
print(API_CLIENT_SECRET)

3f7295d24d8041688f494196baeb607f
42896ec37b22446680c5a485858a4843


Methodology: 


create function to get the random uri from the random word. then get the title and artist from that random uri. store it in a list. the function should look back over the list to make sure there are no matching uris.



In [53]:
URIs = []

In [54]:
for i in range(2001):
    word = random_words.sample(i, replace= True, random_state=42)

type(word)

pandas.core.frame.DataFrame

In [114]:
#Get Flop song URIs

def get_uri(rand_word):
    client_credentials_manager = SpotifyClientCredentials(client_id=API_CLIENT_ID,client_secret=API_CLIENT_SECRET)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    track = sp.search(q="track:" +f"{rand_word} + year: 2001-2020",type='track',limit=10)
    result = track['tracks']['items']
    
    for items in result:
        uri = items['id']
        dict1 = items['album'] 
        if uri not in billboard['URI']:
            URIs.append(uri)

In [45]:
get_uri('cheese')

In [56]:
URIs

[]

In [13]:
for lab,row in word.iterrows():
    get_uri(row['words'])

In [14]:
URIs

['3bT8vIfhau8ktNjo7scr9X',
 '2iiCDhPr51If2XCDQJJ0nC',
 '7EOGIqRUHjsnDFoerUgBWY',
 '0qIJPc8EF7EATnXbtNaVZP',
 '0kz35q6rsNDB6d6XHNfLO4',
 '1Jo3yW1tWC4JSjGS2s2DPy',
 '5o7dxKpuojix9gzcNE5DtE',
 '5tN5Y4VcX8K6k0WXpPI7LV',
 '37PoYGJMxkKsSRukMoU3Rc',
 '6DsP6w8eeyl1Hhef6NBVwC',
 '53BZ6XygAoubR5DU5w38Vq',
 '4VMiNOpnjRwSCwmHaUfOMM',
 '6MLsahMK3VvH9NUwXOktdS',
 '5qMcCmuqBDvE6iVxeuqunC',
 '4otQJBpb8okSeykALR3eCH',
 '4OmlsAT8r4q9vPFBvfYgyZ',
 '2WqTKOAUmv7hz9ZzGnXHrY',
 '74atKkOasLOVzvqB6mYgga',
 '5mzoBYJM5WKAMQEOHpcgsx',
 '7ifcJ5DDytGf8aITxmgsLN',
 '0xikWgPgYN9BEes0ieZ8Co',
 '4RIdKsqdglJ5JqTG8kTvw7',
 '3wGeNDu9Ed8qQecbaAJxv9',
 '0lMwaf3vDIAfv8ZOhnXCSG',
 '5cXne3fQ8Q2vbSMwGjunM8',
 '21ovsz7zVD2bbOivYshYDh',
 '3ELvknrrLz8QL36CeIb6Je',
 '356X9AHexTmUwTAz61nV0p',
 '0QEebnjzgRsHR0lO6gHsn0',
 '48ccRS16aHgWplSvvCYQ6y',
 '3ljr9ATeLs2BY9gNp7vm62',
 '2YRDTr0reanrGZOTsWXdek',
 '4iTSGS8Cqjw30SNyuGI92V',
 '4ITftSVpf4XkYoDqqpWjBX',
 '0jFGFUFKNuTTRE2j2gSI8Y',
 '44NiKhtsqtBNJuQLibpRBv',
 '4Sp4uON08GS4KYbB4UHLl5',
 

In [19]:
len(URIs)

18920

In [22]:
#number of unique values in URI flop list and remove duplicates
URI_set = set(URIs)
len(URI_set)

15706

In [24]:
#List of unique Flop song URIs

URI_list = list(URI_set)

In [47]:
# understanding how the sp.track function works
URI = '74atKkOasLOVzvqB6mYgga'
client_credentials_manager = SpotifyClientCredentials(client_id=API_CLIENT_ID,client_secret=API_CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
track_name = sp.track(URI)

In [48]:
print(track_name)

{'album': {'album_type': 'album', 'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/3TVXtAsR1Inumwj472S9r4'}, 'href': 'https://api.spotify.com/v1/artists/3TVXtAsR1Inumwj472S9r4', 'id': '3TVXtAsR1Inumwj472S9r4', 'name': 'Drake', 'type': 'artist', 'uri': 'spotify:artist:3TVXtAsR1Inumwj472S9r4'}], 'available_markets': ['AD', 'AE', 'AG', 'AL', 'AM', 'AO', 'AR', 'AT', 'AU', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BN', 'BO', 'BR', 'BS', 'BT', 'BW', 'BY', 'BZ', 'CA', 'CD', 'CG', 'CH', 'CI', 'CL', 'CM', 'CO', 'CR', 'CV', 'CW', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'ES', 'FI', 'FJ', 'FM', 'FR', 'GA', 'GB', 'GD', 'GE', 'GH', 'GM', 'GN', 'GQ', 'GR', 'GT', 'GW', 'GY', 'HK', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IQ', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KR', 'KW', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MD', 'ME', 'MG', 'MH', 'MK', 'ML', 'MN

In [127]:
def get_track_name (uri):
    client_credentials_manager = SpotifyClientCredentials(client_id=API_CLIENT_ID,client_secret=API_CLIENT_SECRET)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    tracks = sp.track(uri)
    
    track_name = tracks['name']
    return track_name

In [128]:
get_track_name('74atKkOasLOVzvqB6mYgga')

'The Real Her'

In [116]:
def get_artist_name(uri):
    client_credentials_manager = SpotifyClientCredentials(client_id=API_CLIENT_ID,client_secret=API_CLIENT_SECRET)
    sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
    tracks = sp.track(uri)
    
    artist_name = tracks['artists']
    return artist_name[0]['name']

In [117]:
get_artist_name('74atKkOasLOVzvqB6mYgga')

'Drake'

In [118]:
df= pd.DataFrame(URI_list)

In [119]:
df.head()

Unnamed: 0,0
0,1o7S218Z4CG6IVCOudaPcT
1,0OAo7Std90wZ0D97aha6q4
2,0t4z0WaQomQqPONghWn8c2
3,1IyB9D543YtwYDIgSEURUW
4,06c4ArkPjDQbUnn9KAzk39


In [122]:
df = df.rename(columns={0 : 'URI'})

In [123]:
df.head()

Unnamed: 0,URI
0,1o7S218Z4CG6IVCOudaPcT
1,0OAo7Std90wZ0D97aha6q4
2,0t4z0WaQomQqPONghWn8c2
3,1IyB9D543YtwYDIgSEURUW
4,06c4ArkPjDQbUnn9KAzk39


In [130]:
for lab,row in df.iterrows():
    df.loc[lab, 'Title'] = get_track_name(row['URI'])

In [131]:
df.head()

Unnamed: 0,URI,Title
0,1o7S218Z4CG6IVCOudaPcT,Lucy's Love (feat. Lil Wayne)
1,0OAo7Std90wZ0D97aha6q4,"Chevy Music (feat. Don Cisco, Brown Boy & Ramero)"
2,0t4z0WaQomQqPONghWn8c2,Me and Michael
3,1IyB9D543YtwYDIgSEURUW,Tenth Avenue Freeze-Out - Live at the Auditori...
4,06c4ArkPjDQbUnn9KAzk39,Hat Hair (feat. Sky The Pilot & Oweshin)


In [132]:
for lab,row in df.iterrows():
    df.loc[lab, 'Artist'] = get_artist_name(row['URI'])

In [133]:
df.head()

Unnamed: 0,URI,Title,Artist
0,1o7S218Z4CG6IVCOudaPcT,Lucy's Love (feat. Lil Wayne),SiR
1,0OAo7Std90wZ0D97aha6q4,"Chevy Music (feat. Don Cisco, Brown Boy & Ramero)",Baby Bash
2,0t4z0WaQomQqPONghWn8c2,Me and Michael,MGMT
3,1IyB9D543YtwYDIgSEURUW,Tenth Avenue Freeze-Out - Live at the Auditori...,Bruce Springsteen
4,06c4ArkPjDQbUnn9KAzk39,Hat Hair (feat. Sky The Pilot & Oweshin),PEABOD


In [134]:
df= df[['Title', 'Artist', 'URI']]

In [135]:
df.head()

Unnamed: 0,Title,Artist,URI
0,Lucy's Love (feat. Lil Wayne),SiR,1o7S218Z4CG6IVCOudaPcT
1,"Chevy Music (feat. Don Cisco, Brown Boy & Ramero)",Baby Bash,0OAo7Std90wZ0D97aha6q4
2,Me and Michael,MGMT,0t4z0WaQomQqPONghWn8c2
3,Tenth Avenue Freeze-Out - Live at the Auditori...,Bruce Springsteen,1IyB9D543YtwYDIgSEURUW
4,Hat Hair (feat. Sky The Pilot & Oweshin),PEABOD,06c4ArkPjDQbUnn9KAzk39


In [136]:
df.to_csv('flop_songs.csv')