In [6]:
# Import the necessary libraries
import lyricsgenius  # to access the Genius API
import requests      # to make HTTP requests
from bs4 import BeautifulSoup  # to parse HTML content
import pandas as pd  # to manipulate data in a tabular format
import time          # to introduce delays in the program


In [7]:
# Open a file named "GeniusAPI.txt" in read mode
with open("GeniusAPI.txt", "r") as f:
    # Read the contents of the file
    File = f.read()


# Split the contents of the file by new line characters and retrieve the third line
Genius_token = File.split("\n")[2]

# Split the third line by colon characters and retrieve the second element
Genius_token = Genius_token.split(":")[1]

# Remove any leading or trailing white space characters from the second element
Genius_token = Genius_token.strip()

In [8]:
# Initialize empty lists to store song titles, artists, and years
song_titles = []
song_artists = []
years= []

# Loop through the years 2013 to 2022 (inclusive)
for year in range(2013, 2023):

    # Construct the URL for the Billboard Hot Rap Songs chart for the current year
    url = f"https://www.billboard.com/charts/year-end/{year}/hot-rap-songs/"

    # Make an HTTP GET request to the URL and retrieve the response
    response = requests.get(url)
    
    # Create a BeautifulSoup object from the HTML content of the response
    soup = BeautifulSoup(response.content, "html.parser")

    # Find all the song elements on the page and extract their titles and artists
    for song in soup.find_all("div", class_="o-chart-results-list-row-container"):
        song_name = song.find_all("h3", class_="c-title")
        song_titles.append(song_name[0].text.strip())
        
        song_artist = song.find_all("span", class_="c-label")
        song_artists.append(song_artist[1].text.strip())
        
        # Add the current year to the list of years
        years.append(year)

# Create a pandas DataFrame from the lists of song titles, artists, and years
billboard_list_df = pd.DataFrame({"title": song_titles, "artist": song_artists, "year": years})

# Remove any duplicate rows from the DataFrame
billboard_list_df.drop_duplicates(inplace=True)

In [9]:
# Create a Genius object using the API token retrieved from the "GeniusAPI.txt" file
genius = lyricsgenius.Genius(Genius_token)

# Loop through each row of the Billboard Hot Rap Songs DataFrame
for index, row in billboard_list_df.iterrows():
    # Create a new DataFrame to store the song lyrics
    lyrics_df = billboard_list_df
    
    # Search for the lyrics of the current song using the Genius API
    try:
        song = genius.search_song(row["title"], row["artist"])
        # If the lyrics are found, add them to the "lyrics" column of the DataFrame
        lyrics_df.loc[index, "lyrics"] = song.lyrics
    # If an error occurs (e.g. the song cannot be found), set the "lyrics" column to None
    except:
        lyrics_df.loc[index, "lyrics"] = None
    
    # Pause for 2 seconds to avoid overloading the Genius API with requests
    #time.sleep(2)

Searching for "Thrift Shop" by Macklemore & Ryan Lewis Featuring Wanz...
Searching for "Can't Hold Us" by Macklemore & Ryan Lewis Featuring Ray Dalton...
Done.
Searching for "Holy Grail" by Jay Z Featuring Justin Timberlake...
Done.
Searching for "Started From The Bottom" by Drake...
Searching for "Feel This Moment" by Pitbull Featuring Christina Aguilera...
Searching for "F**kin Problems" by A$AP Rocky Featuring Drake, 2 Chainz & Kendrick Lamar...
Done.
Searching for "Love Me" by Lil Wayne Featuring Drake & Future...
Done.
Searching for "Power Trip" by J. Cole Featuring Miguel...
Done.
Searching for "Same Love" by Macklemore & Ryan Lewis Featuring Mary Lambert...
Searching for "Bad" by Wale Featuring Tiara Thomas Or Rihanna...
No results found for: 'Bad Wale Featuring Tiara Thomas Or Rihanna'
Searching for "Gangnam Style" by PSY...
Searching for "I Cry" by Flo Rida...
Done.
Searching for "Swimming Pools (Drank)" by Kendrick Lamar...
Searching for "Berzerk" by Eminem...
Done.
Searching

In [10]:
# Write the lyrics DataFrame to a CSV file named "lyrics.csv"
lyrics_df.to_csv("lyrics.csv", index=False)