In [106]:
# Importing necessary libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [87]:
# Creating empty lists to store song titles and artists
song_titles = []
song_artists = []

In [88]:
# Defining the URL to scrape
link = "https://www.billboard.com/charts/year-end/2022/hot-rap-songs/"

# Using requests library to make a GET request to the URL and store the response
response = requests.get(link)

# print(response.status_code)

# Using BeautifulSoup library to parse the HTML content of the response text and create a BeautifulSoup object
soup = BeautifulSoup(response.text, 'html.parser')

In [89]:
# Loop through all the HTML elements with class "o-chart-results-list-row-container"
for song in soup.find_all("div", class_="o-chart-results-list-row-container"):
    # Find the first h3 element with class "c-title" inside the current element
    song_name = song.find_all("h3", class_="c-title")
    # Append the text content of this element to the song_titles list
    song_titles.append(song_name[0].text.strip())
    # Print the song name (without leading/trailing whitespace) to the console
    print(song_name[0].text.strip(), end=", ")
    
    # Find the second span element with class "c-label" inside the current element
    song_artist = song.find_all("span", class_="c-label")
    # Append the text content of this element to the song_artists list
    song_artists.append(song_artist[1].text.strip())
    # Print the song artist (without leading/trailing whitespace) to the console
    print(song_artist[1].text.strip())

First Class, Jack Harlow
Wait For U, Future Featuring Drake & Tems
Big Energy, Latto
Super Gremlin, Kodak Black
Industry Baby, Lil Nas X & Jack Harlow
Jimmy Cooks, Drake Featuring 21 Savage
Vegas, Doja Cat
In A Minute, Lil Baby
Pushin P, Gunna & Future Featuring Young Thug
Sweetest Pie, Megan Thee Stallion & Dua Lipa
Super Freaky Girl, Nicki Minaj
Knife Talk, Drake Featuring 21 Savage & Project Pat
Way 2 Sexy, Drake Featuring Future & Young Thug
Broadway Girls, Lil Durk Featuring Morgan Wallen
What Happened To Virgil, Lil Durk Featuring Gunna
Puffin On Zootiez, Future
Girls Want Girls, Drake Featuring Lil Baby
To The Moon!, JNR CHOI & Sam Tompkins
Sticky, Drake
Who Want Smoke??, Nardo Wick Featuring G Herbo, Lil Durk & 21 Savage
Betty (Get Money), Yung Gravy
Cooped Up, Post Malone Featuring Roddy Ricch
F.N.F. (Let's Go), Hitkidd & GloRilla
Staying Alive, DJ Khaled Featuring Drake & Lil Baby
Sleazy Flow, SleazyWorld Go Featuring Lil Baby
Right On, Lil Baby
Get Into It (Yuh), Doja Cat
By

In [90]:
# Slice the lists to keep only the first 51 elements
song_titles = song_titles[0:51]
song_artists = song_artists[0:51]

In [91]:
# Print the number of songs and artists captured from the webpage
print(len(song_titles))
print(len(song_artists))

50
50


In [92]:
# Create a new pandas DataFrame from a list of song titles and artists
df = pd.DataFrame(list(zip(song_titles, song_artists)), columns=['Song Title', 'Artist'])

In [93]:
#printing the DataFrame
df

Unnamed: 0,Song Title,Artist
0,First Class,Jack Harlow
1,Wait For U,Future Featuring Drake & Tems
2,Big Energy,Latto
3,Super Gremlin,Kodak Black
4,Industry Baby,Lil Nas X & Jack Harlow
5,Jimmy Cooks,Drake Featuring 21 Savage
6,Vegas,Doja Cat
7,In A Minute,Lil Baby
8,Pushin P,Gunna & Future Featuring Young Thug
9,Sweetest Pie,Megan Thee Stallion & Dua Lipa


In [94]:
# Define a regular expression pattern to match strings containing '&', 'Featuring', or 'X'
pattern = r'\s&.+|\sFeaturing.+|\sX.+'

# Use the pattern to replace any matching substrings in the 'Artist' column of the DataFrame with an empty string
df['Artist'] = df['Artist'].str.replace(pattern, '', regex=True)

# Display the updated DataFrame
df

Unnamed: 0,Song Title,Artist
0,First Class,Jack Harlow
1,Wait For U,Future
2,Big Energy,Latto
3,Super Gremlin,Kodak Black
4,Industry Baby,Lil Nas
5,Jimmy Cooks,Drake
6,Vegas,Doja Cat
7,In A Minute,Lil Baby
8,Pushin P,Gunna
9,Sweetest Pie,Megan Thee Stallion


In [96]:
# Open the file "SpotifyAPI.txt" in read mode
with open("SpotifyAPI.txt", "r") as f:
    File = f.read()

# Extract the client ID from the first line of the file
client_id = File.split("\n")[0]
client_id = client_id.split(":")[1]
client_id = client_id.strip()

# Extract the client secret from the second line of the file
client_secret = File.split("\n")[1]
client_secret = client_secret.split(":")[1]
client_secret = client_secret.strip()

In [97]:
# Create a Spotify client credentials manager using the client ID and client secret
client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)

# Create a new Spotify object with the client credentials manager
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [98]:
# Use the Spotify object to search for a track by name and artist
result = sp.search(q='track:sicko mode artist:travis scott', type='track')

# Print the Spotify URI of the first result
print(result['tracks']['items'][0]['uri'])

spotify:track:2xLMifQCjDGFmkHkpNLD9h


In [99]:
# Create an empty list to store the Spotify URIs
uri = []

# Loop through each row of the DataFrame
for i, row in df.iterrows():
    
    # Use the Spotify object to search for a track by name and artist
    results = sp.search(q=f'track:'+row["Song Title"]+' artist:'+row["Artist"], type='track')
    
    # Get the Spotify URI of the first result and append it to the URI list
    uri.append(results['tracks']['items'][0]['uri'])

# Add the URI column to the DataFrame
df['URI'] = uri

# Print the updated DataFrame
df

Unnamed: 0,Song Title,Artist,URI
0,First Class,Jack Harlow,spotify:track:0wHFktze2PHC5jDt3B17DC
1,Wait For U,Future,spotify:track:59nOXPmaKlBfGMDeOVGrIK
2,Big Energy,Latto,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq
3,Super Gremlin,Kodak Black,spotify:track:4A8cWXxKfIL3lAyUDzXbCF
4,Industry Baby,Lil Nas,spotify:track:27NovPIUIRrOZoCHxABJwK
5,Jimmy Cooks,Drake,spotify:track:3F5CgOj3wFlRv51JsHbxhe
6,Vegas,Doja Cat,spotify:track:0hquQWY3xvYqN4qtiquniF
7,In A Minute,Lil Baby,spotify:track:31lopd32BkJBTSgYE16c5e
8,Pushin P,Gunna,spotify:track:3XOalgusokruzA5ZBA2Qcb
9,Sweetest Pie,Megan Thee Stallion,spotify:track:7mFj0LlWtEJaEigguaWqYh


In [110]:
#checked audio features for a particular songs
audio_features = sp.audio_features("spotify:track:0wHFktze2PHC5jDt3B17DC")
print(audio_features)

[{'danceability': 0.902, 'energy': 0.582, 'key': 5, 'loudness': -5.902, 'mode': 0, 'speechiness': 0.109, 'acousticness': 0.111, 'instrumentalness': 3.18e-06, 'liveness': 0.111, 'valence': 0.332, 'tempo': 107.005, 'type': 'audio_features', 'id': '0wHFktze2PHC5jDt3B17DC', 'uri': 'spotify:track:0wHFktze2PHC5jDt3B17DC', 'track_href': 'https://api.spotify.com/v1/tracks/0wHFktze2PHC5jDt3B17DC', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0wHFktze2PHC5jDt3B17DC', 'duration_ms': 173948, 'time_signature': 4}]


In [117]:
# Create empty lists to store tempo and energy values
tempo = []
energy = []

# Loop through each track URI in the variable 'uri'
for i in uri:
    # Retrieve audio features of each track using Spotify API
    audio_features = sp.audio_features(i)
    # Extract tempo and energy values from the audio features and append them to their respective lists
    tempo.append(audio_features[0]['tempo'])
    energy.append(audio_features[0]['energy'])

# Add tempo and energy values as columns to an existing pandas DataFrame 'df'
df['Tempo'] = tempo
df['Energy'] = energy

# Print the resulting DataFrame 'df'
df

Unnamed: 0,Song Title,Artist,URI,Tempo,Energy
0,First Class,Jack Harlow,spotify:track:0wHFktze2PHC5jDt3B17DC,107.005,0.582
1,Wait For U,Future,spotify:track:59nOXPmaKlBfGMDeOVGrIK,83.389,0.642
2,Big Energy,Latto,spotify:track:6Zu3aw7FfjAF9WA0fA81Oq,106.017,0.807
3,Super Gremlin,Kodak Black,spotify:track:4A8cWXxKfIL3lAyUDzXbCF,72.993,0.414
4,Industry Baby,Lil Nas,spotify:track:27NovPIUIRrOZoCHxABJwK,149.995,0.704
5,Jimmy Cooks,Drake,spotify:track:3F5CgOj3wFlRv51JsHbxhe,165.921,0.673
6,Vegas,Doja Cat,spotify:track:0hquQWY3xvYqN4qtiquniF,159.969,0.601
7,In A Minute,Lil Baby,spotify:track:31lopd32BkJBTSgYE16c5e,113.42,0.583
8,Pushin P,Gunna,spotify:track:3XOalgusokruzA5ZBA2Qcb,77.502,0.422
9,Sweetest Pie,Megan Thee Stallion,spotify:track:7mFj0LlWtEJaEigguaWqYh,123.977,0.628


In [123]:
#Save the df as a csv file
df.to_csv('rap_data.csv', index=False)