In [1]:
# adapted from https://towardsdatascience.com/become-a-lyrical-genius-4362e7710e43?gi=d438aadd83e9
# and https://github.com/christianlomboy/MIR-Genre-Predictor/blob/master/MIR_data_collection.ipynb

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import requests
from bs4 import BeautifulSoup
import json

class GetLyrics():
    def __init__(self, spotify_client_id, spotify_client_secret, user_id, playlist_id, genius_key):
        self.spotify_client_id = spotify_client_id
        self.spotify_client_secret = spotify_client_secret
        self.user_id = user_id
        self.playlist_id = playlist_id
        self.genius_key = genius_key
        
    def get_playlist_info(self):
        token = SpotifyClientCredentials(client_id=self.spotify_client_id, client_secret=self.spotify_client_secret).get_access_token()
        sp = spotipy.Spotify(token)
        playlist = sp.user_playlist_tracks(self.user_id, self.playlist_id)
        self.playlist = playlist
        return self.playlist
    
    def get_track_names(self):
        track_names = []
        for song in range(len(self.playlist['items'])):
            track_names.append(self.playlist['items'][song]['track']['name'])
        self.track_names = track_names
        return self.track_names
    
    def get_track_features(self):
        track_features = []
        for song in range(len(self.playlist['items'])):
            track_features.append(sp.audio_features(tracks = self.playlist['items'][song]['track']['id']))
        self.track_features = track_features
        return self.track_features

    def get_track_artists(self):
        track_artists = []
        for song in range(len(self.playlist['items'])):
            track_artists.append(self.playlist['items'][song]['track']['artists'][0]['name'])
        self.track_artists = track_artists
        return self.track_artists
         
    def get_track_genres(self):
        track_genres = []
        for song in range(len(self.playlist['items'])):
            track_genres.append(sp.artist(self.playlist['items'][song]['track']['artists'][0]['id'])['genres'])
        self.track_genres = track_genres
        return self.track_genres
        
    def request_song_info(self, track_name, track_artist):
        self.track_name = track_name
        self.track_artist = track_artist
        base_url = 'https://api.genius.com'
        headers = {'Authorization': 'Bearer ' + self.genius_key}
        search_url = base_url + '/search'
        data = {'q': track_name + ' ' + track_artist}
        response = requests.get(search_url, data=data, headers=headers)
        self.response = response
        return self.response

    def check_hits(self):
        json = self.response.json()
        remote_song_info = None
        for hit in json['response']['hits']:
            if self.track_artist.lower() in hit['result']['primary_artist']['name'].lower():
                remote_song_info = hit
                break
        self.remote_song_info = remote_song_info
        return self.remote_song_info
    
    def get_url(self):
        song_url = self.remote_song_info['result']['url']
        self.song_url = song_url
        return self.song_url
    
    def scrape_lyrics(self):
        page = requests.get(self.song_url)
        html = BeautifulSoup(page.text, 'html.parser')
        lyrics1 = html.find("div", class_="lyrics")
        lyrics2 = html.find("div", class_="Lyrics__Container-sc-1ynbvzw-2 jgQsqn")
        if lyrics1:
            lyrics = lyrics1.get_text()
        elif lyrics2:
            lyrics = lyrics2.get_text()
        elif lyrics1 == lyrics2 == None:
            lyrics = None
        return lyrics

    def get_lyrics(self):
        playlist = GetLyrics.get_playlist_info(self)
        track_names = GetLyrics.get_track_names(self)
        track_artists = GetLyrics.get_track_artists(self)
        song_lyrics = []
        for i in range(len(self.track_names)):
            print("\n")
            print(f"Working on track {i}.")
            response = GetLyrics.request_song_info(self, self.track_names[i], self.track_artists[i])
            remote_song_info = GetLyrics.check_hits(self)
            if remote_song_info == None:
                lyrics = 'None'
                print(f"Track {i} is not in the Genius database.")
            else:
                url = GetLyrics.get_url(self)
                lyrics = GetLyrics.scrape_lyrics(self)
                if lyrics == None:
                    lyrics = 'None'
                    print(f"Track {i} is not in the Genius database.")
                else:
                    print(f"Retrieved track {i} lyrics!")
            song_lyrics.append(lyrics)
        return song_lyrics

In [2]:
# Vpop 2021
spotify_user_id = 'https://open.spotify.com/user/31dcvftglnr6poeipfn5fhh6er4y?si=323d9529cac2464c'
spotify_playlist_id = 'https://open.spotify.com/playlist/0mrqaVOe6KYEEcaSYhVK4a?si=68dd2cc60d494360'

In [3]:
# Vpop 2020
spotify_user_id = 'https://open.spotify.com/user/7x629eaa5yen7rnd15bnz5fum?si=b6b84aabbfc04a8a'
spotify_playlist_id = 'https://open.spotify.com/playlist/77jIiSDGEkHlUFv2HH6ATA?si=e5c3e6031e334387'

In [4]:
# Vpop in 2015-2019
spotify_user_id = 'https://open.spotify.com/user/31dcvftglnr6poeipfn5fhh6er4y'
spotify_playlist_id = 'https://open.spotify.com/playlist/3Rso58hNRq63GTIFTiHLW7'

In [5]:
# client_id, client_secret, genius_key
spotify_client_id = '__'
spotify_client_secret = '___'
# spotify_user_id = '__'
# spotify_playlist_id = '__'
genius_key = '___'

In [6]:
# get lyrics from genius.com
songs = GetLyrics(spotify_client_id, spotify_client_secret, spotify_user_id, spotify_playlist_id, genius_key)
song_lyrics = songs.get_lyrics()

In [7]:
# get and write lyrics to a text file
song_lyrics_filtered = [i.replace('\n\n', '') for i in song_lyrics]
with open("lyrics.txt", "a+", encoding='utf-8') as output:
    output.write(str(song_lyrics_filtered))

In [8]:
song_lyrics_filtered

In [9]:
# get and write artists to a text file
song_artists = songs.get_track_artists()
with open("artists.txt", "a+", encoding='utf-8') as output:
    output.write(str(song_artists))

In [10]:
song_artists

In [11]:
# get and write genres to a text file
song_genres = songs.get_track_genres()
with open("genres.txt", "a+", encoding='utf-8') as output:
    output.write('.')
    output.write(str(song_genres))

In [12]:
song_genres

In [13]:
# get and write features to a json file
song_features = songs.get_track_features()
with open("features.json", "a+", encoding='utf-8') as output:
    json.dump(song_features, output)

In [14]:
song_features

In [15]:
# get and write names to a text file
song_names = songs.get_track_names()
with open("names.txt", "a+", encoding='utf-8') as output:
    output.write(str(song_names))

In [16]:
song_names