In [288]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import seaborn as sns
import plotly.express as px
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [289]:
# Creating empty lists to store song titles and artists
song_titles = []
song_artists = []

In [290]:
# Defining the URL to scrape
link = "https://www.billboard.com/charts/year-end/2022/pop-songs/"

# Using requests library to make a GET request to the URL and store the response
response = requests.get(link)

# print(response.status_code)

# Using BeautifulSoup library to parse the HTML content of the response text and create a BeautifulSoup object
soup = BeautifulSoup(response.text, 'html.parser')

In [291]:
# Loop through all the HTML elements with class "o-chart-results-list-row-container"
for song in soup.find_all("div", class_="o-chart-results-list-row-container"):
    # Find the first h3 element with class "c-title" inside the current element
    song_name = song.find_all("h3", class_="c-title")
    # Append the text content of this element to the song_titles list
    song_titles.append(song_name[0].text.strip())
    # Print the song name (without leading/trailing whitespace) to the console
    print(song_name[0].text.strip(), end=", ")
    
    # Find the second span element with class "c-label" inside the current element
    song_artist = song.find_all("span", class_="c-label")
    # Append the text content of this element to the song_artists list
    song_artists.append(song_artist[1].text.strip())
    # Print the song artist (without leading/trailing whitespace) to the console
    print(song_artist[1].text.strip())

As It Was, Harry Styles
Thats What I Want, Lil Nas X
Heat Waves, Glass Animals
Stay, The Kid LAROI & Justin Bieber
About Damn Time, Lizzo
Big Energy, Latto
Ghost, Justin Bieber
Need To Know, Doja Cat
Sunroof, Nicky Youre & dazy
First Class, Jack Harlow
Easy On Me, Adele
Late Night Talking, Harry Styles
Woman, Doja Cat
I Like You (A Happier Song), Post Malone Featuring Doja Cat
Industry Baby, Lil Nas X & Jack Harlow
One Right Now, Post Malone & The Weeknd
Boyfriend, Dove Cameron
abcdefu, GAYLE
Shivers, Ed Sheeran
Enemy, Imagine Dragons X JID
Numb Little Bug, Em Beihold
Vegas, Doja Cat
Sweetest Pie, Megan Thee Stallion & Dua Lipa
Get Into It (Yuh), Doja Cat
I Ain't Worried, OneRepublic
Running Up That Hill (A Deal With God), Kate Bush
Bad Habit, Steve Lacy
Bam Bam, Camila Cabello Featuring Ed Sheeran
Better Days, NEIKED X Mae Muller X Polo G
Bad Habits, Ed Sheeran
Fingers Crossed, Lauren Spencer-Smith
Cold Heart (PNAU Remix), Elton John & Dua Lipa
Thousand Miles, The Kid LAROI
She's All 

In [292]:
# Slice the lists to keep only the first 51 elements
song_titles = song_titles[0:51]
song_artists = song_artists[0:51]

In [293]:
# Print the number of songs and artists captured from the webpage
print(len(song_titles))
print(len(song_artists))

50
50


In [294]:
# Create a new pandas DataFrame from a list of song titles and artists
df = pd.DataFrame(list(zip(song_titles, song_artists)), columns=['Song Title', 'Artist'])

In [295]:
#printing the DataFrame
df

Unnamed: 0,Song Title,Artist
0,As It Was,Harry Styles
1,Thats What I Want,Lil Nas X
2,Heat Waves,Glass Animals
3,Stay,The Kid LAROI & Justin Bieber
4,About Damn Time,Lizzo
5,Big Energy,Latto
6,Ghost,Justin Bieber
7,Need To Know,Doja Cat
8,Sunroof,Nicky Youre & dazy
9,First Class,Jack Harlow


In [296]:
# Define a regular expression pattern to match strings containing '&', 'Featuring', or 'X'
pattern = r'\s&.+|\sFeaturing.+|\sX.+'

# Use the pattern to replace any matching substrings in the 'Artist' column of the DataFrame with an empty string
df['Artist'] = df['Artist'].str.replace(pattern, '', regex=True)

# Display the updated DataFrame
df

Unnamed: 0,Song Title,Artist
0,As It Was,Harry Styles
1,Thats What I Want,Lil Nas X
2,Heat Waves,Glass Animals
3,Stay,The Kid LAROI
4,About Damn Time,Lizzo
5,Big Energy,Latto
6,Ghost,Justin Bieber
7,Need To Know,Doja Cat
8,Sunroof,Nicky Youre
9,First Class,Jack Harlow


In [298]:
# Open the file "SpotifyAPI.txt" in read mode
with open("SpotifyAPI.txt", "r") as f:
    File = f.read()

# Extract the client ID from the first line of the file
client_id = File.split("\n")[0]
client_id = client_id.split(":")[1]
client_id = client_id.strip()

# Extract the client secret from the second line of the file
client_secret = File.split("\n")[1]
client_secret = client_secret.split(":")[1]
client_secret = client_secret.strip()

In [299]:
# Create a Spotify client credentials manager using the client ID and client secret
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)

# Create a new Spotify object with the client credentials manager
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [300]:
# Use the Spotify object to search for a track by name and artist
result = sp.search(q='track:sicko mode artist:travis scott', type='track')

# Print the Spotify URI of the first result
print(result['tracks']['items'][0]['uri'])

spotify:track:2xLMifQCjDGFmkHkpNLD9h


In [301]:
# Create an empty list to store the Spotify URIs
uri = []

# Loop through each row of the DataFrame
for i, row in df.iterrows():
    
    # Use the Spotify object to search for a track by name and artist
    results = sp.search(q=f'track:'+row["Song Title"]+' artist:'+row["Artist"], type='track')
    
    # Get the Spotify URI of the first result and append it to the URI list
    uri.append(results['tracks']['items'][0]['uri'])

# Add the URI column to the DataFrame
df['URI'] = uri

# Print the updated DataFrame
df

Unnamed: 0,Song Title,Artist,URI
0,As It Was,Harry Styles,spotify:track:4LRPiXqCikLlN15c3yImP7
1,Thats What I Want,Lil Nas X,spotify:track:0e8nrvls4Qqv5Rfa2UhqmO
2,Heat Waves,Glass Animals,spotify:track:02MWAaffLxlfxAUY7c5dvx
3,Stay,The Kid LAROI,spotify:track:5HCyWlXZPP0y6Gqq8TgA20
4,About Damn Time,Lizzo,spotify:track:1PckUlxKqWQs3RlWXVBLw3
5,Big Energy,Latto,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq
6,Ghost,Justin Bieber,spotify:track:6I3mqTwhRpn34SLVafSH7G
7,Need To Know,Doja Cat,spotify:track:3Vi5XqYrmQgOYBajMWSvCi
8,Sunroof,Nicky Youre,spotify:track:4h4QlmocP3IuwYEj2j14p8
9,First Class,Jack Harlow,spotify:track:0wHFktze2PHC5jDt3B17DC


In [302]:
# Create empty lists to store tempo and energy values
tempo = []
energy = []

# Loop through each track URI in the variable 'uri'
for i in uri:
    # Retrieve audio features of each track using Spotify API
    audio_features = sp.audio_features(i)
    # Extract tempo and energy values from the audio features and append them to their respective lists
    tempo.append(audio_features[0]['tempo'])
    energy.append(audio_features[0]['energy'])

# Add tempo and energy values as columns to an existing pandas DataFrame 'df'
df['Tempo'] = tempo
df['Energy'] = energy

# Print the resulting DataFrame 'df'
df

Unnamed: 0,Song Title,Artist,URI,Tempo,Energy
0,As It Was,Harry Styles,spotify:track:4LRPiXqCikLlN15c3yImP7,173.93,0.731
1,Thats What I Want,Lil Nas X,spotify:track:0e8nrvls4Qqv5Rfa2UhqmO,87.981,0.846
2,Heat Waves,Glass Animals,spotify:track:02MWAaffLxlfxAUY7c5dvx,80.87,0.525
3,Stay,The Kid LAROI,spotify:track:5HCyWlXZPP0y6Gqq8TgA20,169.928,0.764
4,About Damn Time,Lizzo,spotify:track:1PckUlxKqWQs3RlWXVBLw3,108.966,0.743
5,Big Energy,Latto,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq,106.017,0.807
6,Ghost,Justin Bieber,spotify:track:6I3mqTwhRpn34SLVafSH7G,153.96,0.741
7,Need To Know,Doja Cat,spotify:track:3Vi5XqYrmQgOYBajMWSvCi,130.041,0.609
8,Sunroof,Nicky Youre,spotify:track:4h4QlmocP3IuwYEj2j14p8,131.443,0.714
9,First Class,Jack Harlow,spotify:track:0wHFktze2PHC5jDt3B17DC,107.005,0.582


In [308]:
#Save the df as a csv file
df.to_csv('pop_data.csv', index=False)