In [43]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy
import time
from langdetect import detect

# Spotify API Configuration
SPOTIFY_CLIENT_ID = "c8bc7b62276649389440f24232c1a3c3"
SPOTIFY_CLIENT_SECRET = "51b71da773004e96920e8998da1f399c"

# Genius API Configuration
GENIUS_API_KEY = "X76tPHFY7psNTiAbLTVPkz54h5kqHsl1K8pvN_gTb7RedI9ISs79AbQJ-9TsfH78"
GENIUS_BASE_URL = "https://api.genius.com/"

# Authenticate with Spotify
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=SPOTIFY_CLIENT_ID, client_secret=SPOTIFY_CLIENT_SECRET))

# Function to fetch songs by genre with multiple queries
def search_songs_by_genre(genre, total_limit=3000):
    """
    Fetch up to 3,000 songs by genre using Spotify API with multiple queries.
    """
    limit_per_request = 50  # Max allowed per request by Spotify
    max_offset = 1000  # Spotify restricts limit + offset to 1000
    query_splits = [f"{genre} year:2020-2024", f"{genre} year:2015-2019", f"{genre} year:2010-2014"]  # Example splits
    songs = []

    for query in query_splits:
        for offset in range(0, max_offset, limit_per_request):
            if len(songs) >= total_limit:
                break  # Stop if we reach the desired total
            try:
                results = sp.search(q=f"genre:{query}", type="track", limit=limit_per_request, offset=offset)
                tracks = results['tracks']['items']
                for track in tracks:
                    song_title = track['name']
                    artist_name = ', '.join([artist['name'] for artist in track['artists']])
                    songs.append({"Genre": genre, "Title": song_title, "Artist": artist_name})
                # Stop if fewer tracks are returned
                if len(tracks) < limit_per_request:
                    break
            except Exception as e:
                print(f"Spotify API error for query '{query}' at offset {offset}: {e}")
                break
            time.sleep(0.1)  # Short delay to avoid overwhelming the API
    return songs

# Function to search for a song on Genius
def search_song_on_genius(title, artist):
    """
    Search for a song on Genius using the API.
    """
    headers = {"Authorization": f"Bearer {GENIUS_API_KEY}"}
    query = f"{title} {artist}"
    try:
        response = requests.get(GENIUS_BASE_URL + "search", params={"q": query}, headers=headers)
        if response.status_code == 200:
            hits = response.json().get("response", {}).get("hits", [])
            if hits:
                return hits[0]["result"]["url"]
        else:
            print(f"Genius API error: {response.status_code}")
    except Exception as e:
        print(f"Error querying Genius for {title} by {artist}: {e}")
    return None

# Function to scrape lyrics from Genius
def get_lyrics_from_genius(genius_url):
    """
    Scrape lyrics from a Genius song URL.
    """
    try:
        response = requests.get(genius_url)
        if response.status_code != 200:
            print(f"Failed to fetch Genius URL: {genius_url} (Status: {response.status_code})")
            return None
        soup = BeautifulSoup(response.text, "html.parser")
        lyrics_div = soup.find("div", {"data-lyrics-container": "true"})
        if lyrics_div:
            return lyrics_div.get_text(separator="\n").strip()
    except Exception as e:
        print(f"Error scraping lyrics from Genius: {e}")
    return None

# Function to detect and filter English lyrics
def is_english(text):
    """
    Check if the given text is in English.
    """
    try:
        return detect(text) == "en"
    except:
        return False

# Main script to fetch Spotify data and lyrics
def main():
    genres = ["Pop", "Rock", "Hip-Hop", "Country", "Electronic", "Jazz"]
    for genre in genres:
        print(f"Fetching songs for genre: {genre}")
        data = []
        spotify_songs = search_songs_by_genre(genre)
        print(f"Fetched {len(spotify_songs)} songs for genre {genre}.")

        for song in spotify_songs:
            title = song["Title"]
            artist = song["Artist"]
            print(f"Fetching lyrics for: {title} by {artist}")
            genius_url = search_song_on_genius(title, artist)
            if genius_url:
                lyrics = get_lyrics_from_genius(genius_url)
                if lyrics and is_english(lyrics):
                    song["Lyrics"] = lyrics
                else:
                    song["Lyrics"] = "Lyrics not in English or unavailable"
            else:
                song["Lyrics"] = "Lyrics not found"
            data.append(song)
        
        # Save to CSV
        df = pd.DataFrame(data)
        csv_file = f"{genre}_spotify_lyrics.csv"
        df.to_csv(csv_file, index=False)
        print(f"Saved {genre} data to {csv_file}")

if __name__ == "__main__":
    main()


Fetching songs for genre: Pop
Fetched 3000 songs for genre Pop.
Fetching lyrics for: Die With A Smile by Lady Gaga, Bruno Mars
Fetching lyrics for: BIRDS OF A FEATHER by Billie Eilish
Fetching lyrics for: Sailor Song by Gigi Perez
Fetching lyrics for: Good Luck, Babe! by Chappell Roan
Fetching lyrics for: Taste by Sabrina Carpenter
Fetching lyrics for: Timeless (with Playboi Carti) by The Weeknd, Playboi Carti
Fetching lyrics for: WILDFLOWER by Billie Eilish
Fetching lyrics for: HOT TO GO! by Chappell Roan
Fetching lyrics for: Bed Chem by Sabrina Carpenter
Fetching lyrics for: Pink Pony Club by Chappell Roan
Fetching lyrics for: Stick Season by Noah Kahan
Fetching lyrics for: No One Noticed by The Marías
Fetching lyrics for: Juno by Sabrina Carpenter
Fetching lyrics for: Who by Jimin
Fetching lyrics for: I Had Some Help (Feat. Morgan Wallen) by Post Malone, Morgan Wallen
Fetching lyrics for: Espresso by Sabrina Carpenter
Fetching lyrics for: Good Graces by Sabrina Carpenter
Fetching ly