In [1]:
import requests
from bs4 import BeautifulSoup
import os
import re
import csv
import config

In [2]:
def request_artist_info(artist_name, page):
    base_url = 'https://api.genius.com'
    headers = {"Authorization": "Bearer " + config.api_key}
    search_url = base_url + "/search"
    params = {"q": artist_name, "per_page": 10, "page": page}
    response = requests.get(search_url, params=params, headers=headers)

    if response.status_code != 200:
        print(f"Error: {response.status_code}- {response.text}")
        return None
    return response


In [3]:
def reques_song_url(artist_name, song_cap):
    
    page = 1
    songs = []
    while True:
        response = request_artist_info(artist_name, page)
        if response is None:
            break
        json_data = response.json()

        if "response" not in json_data or "hits" not in json_data["response"]:
            print("Error: 'response' or 'hits' key not found in the response data")
            break
        song_info = []

        for hit in json_data["response"]["hits"]:
            if artist_name.lower() in hit["result"]["primary_artist"]["name"].lower():
                song_info.append(hit)
        
        for song in song_info:
            if len(songs) < song_cap:
                url = song["result"]["url"]
                songs.append(url)
        if len(songs) == song_cap:
            break
        else:
            page += 1
    print("Found {} songs by {}".format(len(songs), artist_name))

    return songs
    

In [4]:
def scrape_song_lyrics(url):
    
    # Make an HTTP GET request to the provided song URL
    page = requests.get(url)
    
    # Parse the page content using BeautifulSoup
    html = BeautifulSoup(page.text, 'html.parser')

    # Find all <div> elements with the 'data-lyrics-container' attribute which contains the lyrics
    lyrics_divs = html.find_all('div', attrs={'data-lyrics-container': 'true'})
    
    # If no lyrics are found, print an error message and return an empty string
    if not lyrics_divs:
        print(f"Could not find lyrics for {url}")
        return ""

    # Extract the text from each lyrics <div> and join them with a newline separator
    lyrics = '\n'.join([div.get_text(separator="\n") for div in lyrics_divs])
    
    # Remove unwanted identifiers like [Chorus], [Verse], etc. using regular expressions
    lyrics = re.sub(r'[\(\[].*?[\)\]]', '', lyrics)
    
    # Remove empty lines from the lyrics
    lyrics = os.linesep.join([s for s in lyrics.splitlines() if s])
    
    # Return the cleaned lyrics
    return lyrics

In [5]:

def write_lyrics_to_csv(artist_name, song_count):
    """
    Writes the lyrics of songs by a given artist to a CSV file, with each line of the lyrics as a separate row.
    
    Parameters:
    - artist_name: The name of the artist.
    - song_count: The number of songs to retrieve and write to the file.
    """
    
    # Create the 'lyrics' directory if it doesn't exist
    if not os.path.exists('lyrics'):
        os.makedirs('lyrics')

    # Generate the file path for the CSV file, replacing spaces with underscores
    file_path = 'lyrics/' + artist_name.lower().replace(' ', '_') + '.csv'
    
    # Open the CSV file for writing
    with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
        # Define the column names for the CSV file
        fieldnames = ['Song', 'Lyrics']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        # Write the header row
        writer.writeheader()
        
        # Retrieve song URLs from Genius API
        urls = reques_song_url(artist_name, song_count)
        
        # Loop through each song URL
        for url in urls:
            # Extract song name from the URL by replacing '-' with spaces and title-casing it
            song_name = url.split('/')[-1].replace('-', ' ').title()
            
            # Dynamically remove the artist's name from the song title
            song_name = song_name.replace(artist_name.title() + ' ', '').replace(' Lyrics', '')  # Clean song name
            
            # Scrape the lyrics for the current song
            lyrics = scrape_song_lyrics(url)
            
            # Only write to the CSV file if lyrics are found
            if lyrics:
                # Split lyrics into lines and write each line to a new row
                lyrics_text = "\n".join(lyrics.splitlines())
                writer.writerow({"Song": song_name, "Lyrics": lyrics_text})
                # for line in lyrics.splitlines():
                #     writer.writerow({'Song': song_name, 'Lyrics': line})
    
    # Print a message indicating success
    print(f'Lyrics written to {file_path}')

In [10]:
write_lyrics_to_csv("Sabrina Carpenter", 100)

Found 100 songs by Sabrina Carpenter
Could not find lyrics for https://genius.com/Sabrina-carpenter-espresso-on-vacation-version-lyrics
Lyrics written to lyrics/sabrina_carpenter.csv
