# Data collection - *a lyric's history and usage*

In [37]:
import requests
from re import sub
import lyricsgenius as genius
import time
import pandas as pd

Go to https://docs.genius.com/#/authentication-h1 to collect the below token - no sign-in required.

In [2]:
client_access_token = 'aKoyAcFSc5wKOQ7-_FWLU3sEPfT6H5av8vVNdiuP4lvEA1jWINFKr9nIiUaSXzVH'

## Functions

In [3]:
def prep_sdk(auth_code):
    '''
    Bring in genius sdk for more complex searches
    '''
    return genius.Genius(auth_code)

In [41]:
def get_artists_and_songs(response):
    '''
    Convert request info to output of artist and song
    '''
    output = []
    for resp in response.json()['response']['hits']:
        try:
            output.append((resp['result']['title'],
                          resp['result']['primary_artist']['name'],
                          resp['result']['stats']['pageviews']))
        except:
            pass
        
    return output

In [93]:
def extract_song(title, artist, sdk_object):
    '''
    Collect all song object
    '''
    song = sdk_object.search_song(title=title, artist=artist)
    
    if song.artist==artist and song.title==title:
        return song

In [76]:
def find_surrounding_lyrics(query, lyrics):
    '''
    Finds where a lyric is in context of the song
    '''
    output_lyric_blocks = []
    for i,v in enumerate(lyrics.split('\n')):
        if query.lower() in v.lower():
            output_lyric_blocks.append([lyrics.split('\n')[i-1],
                                       lyrics.split('\n')[i],
                                       lyrics.split('\n')[i+1]])
    return output_lyric_blocks


In [108]:
def collect_lyrics(raw_lyric, auth_code, max_pages, sdk_object):
    '''
    Convert string to correct format, issue request
    and collect artist and song
    '''
    
    lyric_query = sub('\s+','%20', raw_lyric).lower()
    request_uri = 'https://api.genius.com/search/?q={}'.format(lyric_query)
    token = 'Bearer {}'.format(auth_code)
    headers = {'Authorization': token}
    
    current_page = 1
    next_page = True
    songs = []
    while next_page:
        params = {'page': current_page}
        print('Searching page {}....'.format(current_page))
        result = get_artists_and_songs(requests.get(request_uri, headers=headers, params=params))
        if result:
            songs += result
            current_page += 1
        else:
            next_page = False
        if current_page>int(max_pages):
            break
            
    data = [song for song in songs if not raw_lyric.lower() in song[1].lower()]
    dataframe = pd.DataFrame(data, columns=['title', 'artist', 'pageviews'])
    
    song_objects = dataframe[['title', 'artist']]\
                    .apply(lambda row: extract_song(row[0], row[1], sdk_object), axis=1)
    
    
    dataframe['lyrics'] = [find_surrounding_lyrics(raw_lyric, song.lyrics) for song in song_objects]
    dataframe['year'] = [song.year for song in song_objects]
    
    return dataframe

## Exectute

In [111]:
output = collect_lyrics(input('What lyric would you like to look for?\n'),
                        client_access_token,
                        input('How many pages do you want to search through?\n'),
                        prep_sdk(client_access_token))


What lyric would you like to look for?
megatron
How many pages do you want to search through?
5
Searching page 1....
Searching page 2....
                     title       artist  pageviews
0                 MEGATRON  Nicki Minaj     333907
1                 MEGATRON       Laylow      23729
2  Outro (Planet Megatron)          RIN      11994
3                Megatron*   The Weeknd      14785
Searching for "MEGATRON" by Nicki Minaj...
Done.
Searching for "MEGATRON" by Laylow...
Done.
Searching for "Outro (Planet Megatron)" by RIN...
Done.
Searching for "Megatron*" by The Weeknd...
Done.


In [112]:
output

Unnamed: 0,title,artist,pageviews,lyrics,year
0,MEGATRON,Nicki Minaj,333907,"[[[Verse 1], They call me Megatron, just did a...",2019-06-21
1,MEGATRON,Laylow,23729,[],2019-12-11
2,Outro (Planet Megatron),RIN,11994,"[[Hur'nsöhne!, [Songtext zu „Outro (Planet Meg...",2018-06-22
3,Megatron*,The Weeknd,14785,[],
