In [1]:
# This section of code is to merge the the Top20byUSCity and Top20inUS dataframes and calculate each song's importance
# This code also creates a new dataframe of only Important songs 
# Important songs are those that have broken into at least one city's Top20 Charts but has not broken into the US Top20 Charts

import pandas as pd
import numpy as np

#Read in Top20 Shazamed Songs by City and US CSV files 
dfCityTop20 = pd.read_csv("/Users/zacharywong/github/zacharywong2023/AtlanticRecords/IntermediateDataFrames/Top20byUSCity.csv")
dfUSTop20 = pd.read_csv("/Users/zacharywong/github/zacharywong2023/AtlanticRecords/IntermediateDataFrames/Top20inUS.csv")

dfCityTop20.head()

Unnamed: 0,Song Name,Artist,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,Miami,Phoenix,...,Philadelphia,Boston,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando
0,love nwantiti [Remix],CKay Feat. Axel & Dj Yo!,1.0,,,,8.0,,12.0,13.0,...,8.0,,,,,,,,,9.0
1,Cold Heart (PNAU Remix),Elton John & Dua Lipa,2.0,2.0,2.0,5.0,15.0,3.0,2.0,1.0,...,1.0,2.0,3.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0
2,Chosen,Blxst & Tyga Feat. Ty Dolla $ign,3.0,,,,18.0,,,,...,10.0,11.0,,,,17.0,,,,
3,I Hate U,SZA,4.0,,,,19.0,,,,...,4.0,,,,,,,,,
4,Need To Know,Doja Cat,5.0,7.0,19.0,17.0,,19.0,13.0,10.0,...,13.0,,12.0,19.0,,13.0,19.0,12.0,11.0,15.0


In [2]:
# Set indexes on the City Chart dataframe to Song Name and Artist 

dfCityTop20 = dfCityTop20.set_index(['Song Name', 'Artist'])


In [3]:

# Importance Score Calculation
# 1. add up all rankings in each city (songs that didn't break into a city are all counted as 21 rank)
# 2. Divide by number of cities 
# 3. Thus, most important songs have lowest Importance Score 

NumberofCities = 20
dfCityTop20['Sum of Rankings'] = dfCityTop20.sum(axis = 1)
dfCityTop20['Number of Cities without Rank'] = dfCityTop20.isna().sum(axis = 1)
def calculateImportance(row):
    row['Importance Score'] = (row['Sum of Rankings'] + (row['Number of Cities without Rank'] * 21))/NumberofCities
    return row
dfCityTop20 = dfCityTop20.apply(calculateImportance, axis = 'columns')

#Clean up dataframe 

del(dfCityTop20['Sum of Rankings'])
del(dfCityTop20['Number of Cities without Rank'])

dfCityTop20.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,Miami,Phoenix,Riverside,"Washington, D.C.",...,Boston,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando,Importance Score
Song Name,Artist,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
love nwantiti [Remix],CKay Feat. Axel & Dj Yo!,1.0,,,,8.0,,12.0,13.0,,,...,,,,,,,,,9.0,17.25
Cold Heart (PNAU Remix),Elton John & Dua Lipa,2.0,2.0,2.0,5.0,15.0,3.0,2.0,1.0,3.0,2.0,...,2.0,3.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0,2.8
Chosen,Blxst & Tyga Feat. Ty Dolla $ign,3.0,,,,18.0,,,,12.0,,...,11.0,,,,17.0,,,,,18.25
I Hate U,SZA,4.0,,,,19.0,,,,,,...,,,,,,,,,,19.2
Need To Know,Doja Cat,5.0,7.0,19.0,17.0,,19.0,13.0,10.0,7.0,,...,,12.0,19.0,,13.0,19.0,12.0,11.0,15.0,14.75


In [4]:
# Set indexes on the USTop20 DataFrame to Song Name and Artist

dfUSTop20 = dfUSTop20.set_index(['Song Name', 'Artist'])
dfUSTop20.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Rank in USTop20 Chart
Song Name,Artist,Unnamed: 2_level_1
The Cowboy In Me (Yellowstone Edition),Tim McGraw,1
Cold Heart (PNAU Remix),Elton John & Dua Lipa,2
Big Energy,Latto,3
Super Gremlin,Kodak Black,4
Shivers,Ed Sheeran,5


In [5]:
# Add in column counting how many cities the song broke Top 20 Charts

dfCityTop20['Number of Cities where Song Broke Top20'] = dfCityTop20.count(axis = 'columns')-1
dfCityTop20.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,Miami,Phoenix,Riverside,"Washington, D.C.",...,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando,Importance Score,Number of Cities where Song Broke Top20
Song Name,Artist,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
love nwantiti [Remix],CKay Feat. Axel & Dj Yo!,1.0,,,,8.0,,12.0,13.0,,,...,,,,,,,,9.0,17.25,6
Cold Heart (PNAU Remix),Elton John & Dua Lipa,2.0,2.0,2.0,5.0,15.0,3.0,2.0,1.0,3.0,2.0,...,3.0,2.0,1.0,3.0,2.0,2.0,2.0,1.0,2.8,20
Chosen,Blxst & Tyga Feat. Ty Dolla $ign,3.0,,,,18.0,,,,12.0,,...,,,,17.0,,,,,18.25,6
I Hate U,SZA,4.0,,,,19.0,,,,,,...,,,,,,,,,19.2,3
Need To Know,Doja Cat,5.0,7.0,19.0,17.0,,19.0,13.0,10.0,7.0,,...,12.0,19.0,,13.0,19.0,12.0,11.0,15.0,14.75,16


In [6]:
# Concatenate the two dataframes into 1 dataframe

df = pd.concat([dfUSTop20, dfCityTop20], axis=1)
df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,Rank in USTop20 Chart,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,Miami,Phoenix,Riverside,...,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando,Importance Score,Number of Cities where Song Broke Top20
Song Name,Artist,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
323 Go Crazy,DOM KENNEDY,,,,,,,,,,,...,,,,,,,,,20.75,1
Ameno Amapiano (Remix),Goya Menor & Nektunez,,7.0,,,,17.0,,,,,...,,,,,,,,,19.6,3
Beautiful Lies,Yung Bleu & Kehlani,,,,,14.0,16.0,,,,,...,,,11.0,,14.0,,,,19.55,4
Beggin,Måneskin,20.0,16.0,,17.0,10.0,,,,19.0,,...,16.0,11.0,,14.0,,14.0,16.0,,18.05,10
Better Days,"NEIKED, Mae Muller & Polo G",13.0,,12.0,13.0,,,15.0,11.0,9.0,20.0,...,8.0,16.0,10.0,9.0,9.0,8.0,,19.0,14.4,15


In [7]:
# Add Another column that shows if the song broke US Top 20 Chart

df['Broke USTop20 Charts'] = np.where(pd.isna(df['Rank in USTop20 Chart']), False, True)
df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Rank in USTop20 Chart,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,Miami,Phoenix,Riverside,...,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando,Importance Score,Number of Cities where Song Broke Top20,Broke USTop20 Charts
Song Name,Artist,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
323 Go Crazy,DOM KENNEDY,,,,,,,,,,,...,,,,,,,,20.75,1,False
Ameno Amapiano (Remix),Goya Menor & Nektunez,,7.0,,,,17.0,,,,,...,,,,,,,,19.6,3,False
Beautiful Lies,Yung Bleu & Kehlani,,,,,14.0,16.0,,,,,...,,11.0,,14.0,,,,19.55,4,False
Beggin,Måneskin,20.0,16.0,,17.0,10.0,,,,19.0,,...,11.0,,14.0,,14.0,16.0,,18.05,10,True
Better Days,"NEIKED, Mae Muller & Polo G",13.0,,12.0,13.0,,,15.0,11.0,9.0,20.0,...,16.0,10.0,9.0,9.0,8.0,,19.0,14.4,15,True


In [8]:
#Reorder columns and export final, merged dataframe as CSV

columns = df.columns.tolist()
columns
columns = columns[0:1] + columns[-3:] + columns[1:]
columns
del columns[-3:]
df = df[columns]

TempImportanceScore = df['Importance Score']
df = df.drop(columns=['Importance Score'])
df.insert(loc=0, column='Importance Score', value=TempImportanceScore)

df.to_csv('/Users/zacharywong/github/zacharywong2023/AtlanticRecords/IntermediateDataFrames/Top20inUSandCity.csv')
df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,Importance Score,Rank in USTop20 Chart,Number of Cities where Song Broke Top20,Broke USTop20 Charts,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,...,Philadelphia,Boston,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando
Song Name,Artist,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
323 Go Crazy,DOM KENNEDY,20.75,,1,False,,,,,,,...,,,,,,,,,,
Ameno Amapiano (Remix),Goya Menor & Nektunez,19.6,,3,False,7.0,,,,17.0,,...,,,,,,,,,,
Beautiful Lies,Yung Bleu & Kehlani,19.55,,4,False,,,,14.0,16.0,,...,,,,,11.0,,14.0,,,
Beggin,Måneskin,18.05,20.0,10,True,16.0,,17.0,10.0,,,...,,18.0,16.0,11.0,,14.0,,14.0,16.0,
Better Days,"NEIKED, Mae Muller & Polo G",14.4,13.0,15,True,,12.0,13.0,,,15.0,...,15.0,9.0,8.0,16.0,10.0,9.0,9.0,8.0,,19.0


In [9]:
# Create a new dataframe with only important songs (songs in this dataframe that has not broken the USTop20 Chart yet)

important_df = df[(df['Number of Cities where Song Broke Top20']>0) & (df['Broke USTop20 Charts'] == False)]
important_df = important_df.reset_index()
important_df.head()

Unnamed: 0,Song Name,Artist,Importance Score,Rank in USTop20 Chart,Number of Cities where Song Broke Top20,Broke USTop20 Charts,New York City,Los Angeles,Chicago,Houston,...,Philadelphia,Boston,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando
0,323 Go Crazy,DOM KENNEDY,20.75,,1,False,,,,,...,,,,,,,,,,
1,Ameno Amapiano (Remix),Goya Menor & Nektunez,19.6,,3,False,7.0,,,,...,,,,,,,,,,
2,Beautiful Lies,Yung Bleu & Kehlani,19.55,,4,False,,,,14.0,...,,,,,11.0,,14.0,,,
3,Big Subwoofer,"MOUNT WESTMORE, Snoop Dogg, Ice Cube, E-40 & T...",17.0,,6,False,,8.0,,,...,,,13.0,4.0,3.0,10.0,,,,
4,Bubbly (with Drake & Travis Scott),Young Thug,19.6,,3,False,,,,19.0,...,,,,,,,,,,


In [10]:
# Create another dataframe containing all important songs
# Important Songs are songs that have broken into at least one city's Top20 Charts but has not broken into the US Top20 Chart
# delete the column 'Rank in USTop20 Chart'
important_df = (important_df
                    .set_index(['Importance Score'])
                    .sort_index(axis = 0, ascending = True))
important_df = important_df.drop(columns = ['Rank in USTop20 Chart'])
important_df.head()


Unnamed: 0_level_0,Song Name,Artist,Number of Cities where Song Broke Top20,Broke USTop20 Charts,New York City,Los Angeles,Chicago,Houston,Atlanta,Dallas,...,Philadelphia,Boston,Anaheim,Denver,Las Vegas,San Diego,Detroit,Seattle,San Antonio,Orlando
Importance Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15.55,You Problem,MONSTA X,6,False,,,1.0,,1.0,2.0,...,,,,,,,,,,5.0
16.0,Do It to It,Acraze Feat. Cherish,9,False,11.0,9.0,16.0,,,,...,,,6.0,,20.0,8.0,,,,6.0
16.4,Knife Talk [Mixed],Drake Feat. 21 Savage & Project Pat,11,False,,15.0,,6.0,14.0,10.0,...,18.0,,20.0,17.0,6.0,,,,7.0,
17.0,Big Subwoofer,"MOUNT WESTMORE, Snoop Dogg, Ice Cube, E-40 & T...",6,False,,8.0,,,,,...,,,13.0,4.0,3.0,10.0,,,,
17.25,love nwantiti [Remix],CKay Feat. Axel & Dj Yo!,6,False,1.0,,,,8.0,,...,8.0,,,,,,,,,9.0


In [11]:
#Export to CSV 
important_df.to_csv('/Users/zacharywong/github/zacharywong2023/AtlanticRecords/IntermediateDataFrames/MostImportantSongsinUS.csv')
