In [1]:
import pandas as pd
import torch
import re
from transformers import AutoModel, AutoTokenizer

## Loading the BERT model

In [2]:
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Importing data, preparing the lyrics

In [3]:
df_music = pd.read_csv("data/music_dataset_final_hopefully.csv")
df_music

Unnamed: 0.1,Unnamed: 0,track,artist,number_of_emotion_tags,valence_tags,arousal_tags,dominance_tags,spotify_id,seeds_array,artist_name,...,Country,Opera,Movie,Children's Music,A Capella,lyrics,artist_track,genius_url,new_lyrics,sentiment
0,0,'Till I Collapse,Eminem,6,4.550000,5.273125,5.690625,4xkOaSrkexMciUUogZKVTS,['aggressive'],Eminem,...,0,0,0,0,0,[Intro: Eminem]\n'Cause sometimes you just fee...,Eminem 'Till I Collapse,https://genius.com/Eminem-till-i-collapse-lyrics,[Intro: Eminem]\n'Cause sometimes you just fee...,intense_aggressive
1,1,St. Anger,Metallica,8,3.710000,5.833000,5.427250,3fOc9x06lKJBhz435mInlH,['aggressive'],Metallica,...,0,0,0,0,0,[Verse]\nSaint Anger 'round my neck\nSaint Ang...,Metallica St. Anger,https://genius.com/Metallica-st-anger-lyrics,[Verse]\nSaint Anger 'round my neck\nSaint Ang...,intense_aggressive
2,2,Die MF Die,Dope,7,3.771176,5.348235,5.441765,5bU4KX47KqtDKKaLM4QCzh,['aggressive'],Dope,...,0,0,0,0,0,[Intro]\nDie!\n[Verse 1]\nI don't need your fo...,Dope Die MF Die,https://genius.com/Dope-die-mf-die-lyrics,[Intro]\nDie!\n[Verse 1]\nI don't need your fo...,intense_aggressive
3,3,Step Up,Drowning Pool,9,2.971389,5.537500,4.726389,4Q1w4Ryyi8KNxxaFlOQClK,['aggressive'],Drowning Pool,...,0,0,0,0,0,"[Intro]\nOne, two, three, go!\n[Verse 1]\nBrok...",Drowning Pool Step Up,https://genius.com/Drowning-pool-step-up-lyrics,"[Intro]\nOne, two, three, go!\n[Verse 1]\nBrok...",intense_aggressive
4,4,Feedback,Kanye West,1,3.080000,5.870000,5.490000,49fT6owWuknekShh9utsjv,['aggressive'],Kanye West,...,0,0,0,0,0,"[Chorus]\nAyy, ya heard about the good news?\n...",Kanye West Feedback,https://genius.com/Kanye-west-feedback-lyrics,"[Chorus]\nAyy, ya heard about the good news?\n...",intense_aggressive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4950,5624,Wild Hope,Mandy Moore,8,5.422895,3.040737,5.013316,2WCQbmF3pZH5u2R0OaEXxY,['quiet'],Mandy Moore,...,0,0,1,0,0,[Verse 1]\nIn the crazy world\nAnything can ha...,Mandy Moore Wild Hope,https://genius.com/Mandy-moore-wild-hope-lyrics,[Verse 1]\nIn the crazy world\nAnything can ha...,sad_melancholic
4951,5625,A Life (1895 - 1915),Mark Hollis,4,4.927500,2.735000,4.297500,7dXjq4Wp7eEPsGVsOhRQDQ,['quiet'],Mark Hollis,...,0,0,0,0,0,[Verse]\nUniform\nDream cites freedom\nAvow\nR...,Mark Hollis A Life (1895 - 1915),https://genius.com/Mark-hollis-a-life-1895-191...,[Verse]\nUniform\nDream cites freedom\nAvow\nR...,sad_melancholic
4952,5626,My Shit's Fucked Up,Warren Zevon,6,2.758333,3.813333,3.856667,26douMAqNELour6sKd2oR7,['cynical'],Warren Zevon,...,0,0,0,0,0,"[Verse 1]\nWell, I went to the doctor\nI said,...",Warren Zevon My Shit's Fucked Up,https://genius.com/Warren-zevon-my-shits-fucke...,"[Verse 1]\nWell, I went to the doctor\nI said,...",dark_gritty
4953,5627,Do I Have To?,Pet Shop Boys,6,3.680274,3.877534,4.406781,5Knl7BTPCME3Kh5WxctWLU,['cynical'],Pet Shop Boys,...,0,0,0,0,0,[Verse 1]\nI don't care what you've said\nAnd ...,Pet Shop Boys Do I Have To?,https://genius.com/Pet-shop-boys-do-i-have-to-...,[Verse 1]\nI don't care what you've said\nAnd ...,dark_gritty


### Define a transforming function

In [4]:
def transform_lyrics(row):
    base_lyrics = row['new_lyrics']
    artist_name = row['artist']
    track_title = row['track']
    
    lyrics = base_lyrics.replace("\r", " ")
    lyrics = re.sub(r"\[([^\]]+)\]", "", lyrics)
    lyrics = lyrics.lower()
    
    lines = lyrics.split('\n')
    #lines = list(set(lines))
    lines = list(dict.fromkeys(lines))    
    lyrics = ' '.join(lines)
      
    return "Lyrics of the song " + track_title + " by " + artist_name + " " + lyrics 
    

In [5]:
df_music['new_lyrics'] = df_music.apply(transform_lyrics, axis=1)
print(df_music['new_lyrics'].to_string(index=False))

Lyrics of the song 'Till I Collapse by Eminem  ...
Lyrics of the song St. Anger by Metallica  sain...
Lyrics of the song Die MF Die by Dope  die! i d...
Lyrics of the song Step Up by Drowning Pool  on...
Lyrics of the song Feedback by Kanye West  ayy,...
Lyrics of the song 7 Words by Deftones  i'll ne...
Lyrics of the song Comprachicos by Pendulum  ma...
Lyrics of the song Two Words by Kanye West  we ...
Lyrics of the song What I See by Black Flag thi...
Lyrics of the song Room 13 by Black Flag keep m...
Lyrics of the song Shake Ya Ass by Mystikal  mm...
Lyrics of the song Boom Boom by The Animals  bo...
Lyrics of the song Boys Wanna Be Her by Peaches...
Lyrics of the song Death Trend Setta by Crossfa...
Lyrics of the song Angel Duster by Run the Jewe...
Lyrics of the song Custer by Slipknot  how 'bou...
Lyrics of the song Welcome To The Fold by Filte...
Lyrics of the song Black by Sevendust  voices c...
Lyrics of the song Hush Hush; Hush Hush by The ...
Lyrics of the song Invaders Mus

### Tokenize lyrics

In [6]:
def tokenize_lyrics(row):
    return tokenizer.encode(row['new_lyrics'], add_special_tokens=True, max_length=512, truncation=True)

In [7]:
df_music['tokenized_lyrics'] = df_music.apply(tokenize_lyrics, axis=1)
print(df_music['tokenized_lyrics'].to_string(index=False))

[101, 4581, 1997, 1996, 2299, 1005, 6229, 1045,...
[101, 4581, 1997, 1996, 2299, 2358, 1012, 4963,...
[101, 4581, 1997, 1996, 2299, 3280, 1049, 2546,...
[101, 4581, 1997, 1996, 2299, 3357, 2039, 2011,...
[101, 4581, 1997, 1996, 2299, 12247, 2011, 2927...
[101, 4581, 1997, 1996, 2299, 1021, 2616, 2011,...
[101, 4581, 1997, 1996, 2299, 4012, 18098, 2104...
[101, 4581, 1997, 1996, 2299, 2048, 2616, 2011,...
[101, 4581, 1997, 1996, 2299, 2054, 1045, 2156,...
[101, 4581, 1997, 1996, 2299, 2282, 2410, 2011,...
[101, 4581, 1997, 1996, 2299, 6073, 8038, 4632,...
[101, 4581, 1997, 1996, 2299, 8797, 8797, 2011,...
[101, 4581, 1997, 1996, 2299, 3337, 10587, 2022...
[101, 4581, 1997, 1996, 2299, 2331, 9874, 2275,...
[101, 4581, 1997, 1996, 2299, 4850, 6497, 2121,...
[101, 4581, 1997, 1996, 2299, 28888, 2011, 7540...
[101, 4581, 1997, 1996, 2299, 6160, 2000, 1996,...
[101, 4581, 1997, 1996, 2299, 2304, 2011, 2698,...
[101, 4581, 1997, 1996, 2299, 20261, 20261, 102...
[101, 4581, 1997, 1996, 2299, 1

### Generate the embeddings

In [8]:
def generate_embeddings(row):
    # Convert the token IDs to a PyTorch tensor
    tokens_tensor = torch.tensor([row['tokenized_lyrics']])

    # Feed the tokens through the model
    model_output = model(tokens_tensor)
    
    print('Tokenizing: ' + row['track'])

    # Extract the final layer's output as the embedding
    return model_output.last_hidden_state.mean(dim=1).squeeze().detach().numpy()

In [9]:
df_music['embedded_lyrics'] = df_music.apply(generate_embeddings, axis=1)
print(df_music['embedded_lyrics'].to_string(index=False))

Tokenizing: 'Till I Collapse
Tokenizing: St. Anger
Tokenizing: Die MF Die
Tokenizing: Step Up
Tokenizing: Feedback
Tokenizing: 7 Words
Tokenizing: Comprachicos
Tokenizing: Two Words
Tokenizing: What I See
Tokenizing: Room 13
Tokenizing: Shake Ya Ass
Tokenizing: Boom Boom
Tokenizing: Boys Wanna Be Her
Tokenizing: Death Trend Setta
Tokenizing: Angel Duster
Tokenizing: Custer
Tokenizing: Welcome To The Fold
Tokenizing: Black
Tokenizing: Hush Hush; Hush Hush
Tokenizing: Invaders Must Die
Tokenizing: Requiem
Tokenizing: Self vs Self
Tokenizing: Shameful Metaphors
Tokenizing: Black Me Out
Tokenizing: Quick Death
Tokenizing: Motorcycle Ride
Tokenizing: Lockdown
Tokenizing: I Put a Spell on You
Tokenizing: A Passage In Time
Tokenizing: A Better Son-Daughter
Tokenizing: Tobacco Road
Tokenizing: Howwhywuz, Howwhyam
Tokenizing: Super Bitch
Tokenizing: Waste Of Time
Tokenizing: F _ _ _ tha Police
Tokenizing: Homecoming
Tokenizing: Cheating
Tokenizing: Triumph (Feat Cappadonna)
Tokenizing: Final So

Tokenizing: Swing
Tokenizing: Ride Through The Country f/ John Michael Montgomery
Tokenizing: Requiem
Tokenizing: the dead flag blues
Tokenizing: The End of All Things
Tokenizing: Angel
Tokenizing: Lullaby
Tokenizing: Climbing Up the Walls
Tokenizing: A Forest
Tokenizing: My Body Is a Cage
Tokenizing: Change (In the House of Flies)
Tokenizing: Untitled
Tokenizing: The Hills
Tokenizing: If I Had a Heart
Tokenizing: Mezzanine
Tokenizing: Sky Might Fall
Tokenizing: The Noose
Tokenizing: Fascination Street
Tokenizing: Stella Was a Diver and She Was Always Down
Tokenizing: Passenger
Tokenizing: Yonkers
Tokenizing: Coldest Winter
Tokenizing: Down by the Water
Tokenizing: Swim Good
Tokenizing: Splitting the Atom
Tokenizing: Threads
Tokenizing: A Wolf at the Door
Tokenizing: Afraid
Tokenizing: Skip Divided
Tokenizing: Tear You Apart
Tokenizing: Often
Tokenizing: Clap Hands
Tokenizing: Beggin for Thread
Tokenizing: Mask Off
Tokenizing: Red Right Hand
Tokenizing: Bloody Mary
Tokenizing: These Wa

Tokenizing: A Rose for Emily
Tokenizing: Swim Until You Can't See Land
Tokenizing: Cold Little Heart
Tokenizing: I Get Lonely
Tokenizing: Talkin’ 2 Myself
Tokenizing: Love Me In Whatever Way
Tokenizing: I Would Be Sad
Tokenizing: William It Was Really Nothing
Tokenizing: Lonely
Tokenizing: To Be Alone
Tokenizing: Fallin' Out
Tokenizing: The Only
Tokenizing: Wouldnt it be nice
Tokenizing: Bullet Proof... I Wish I Was
Tokenizing: Brian and Robert
Tokenizing: Lonely
Tokenizing: Whenever You're Around
Tokenizing: Out of My Mind
Tokenizing: Se me olvidó otra vez
Tokenizing: What's It Like
Tokenizing: Unstable
Tokenizing: Sitting Home
Tokenizing: Cable Car
Tokenizing: Hotel California (Spanish).
Tokenizing: Death Theme
Tokenizing: Wild Horses (Feat. Adam Levine)
Tokenizing: Autum Leaves
Tokenizing: Sympathy
Tokenizing: Home
Tokenizing: The Who - The Seeker
Tokenizing: Exit Music For a Film
Tokenizing: Lonely Stranger (unplugged)
Tokenizing: Stuck In  A Moment You Can't Get Out Of
Tokenizing:

Tokenizing: Inside Looking Out
Tokenizing: A New Jerusalem
Tokenizing: Peace of Mind
Tokenizing: Offertoire
Tokenizing: Full Moon
Tokenizing: Thais: Meditation
Tokenizing: Symphony No. 3: III. Lento - Cantablile Semplice
Tokenizing: Pink Moon
Tokenizing: History Eraser
Tokenizing: Are You In
Tokenizing: Dusk You & Me
Tokenizing: The Things That I Used To Do
Tokenizing: We Have All The Time In The World
Tokenizing: Love Unlimited
Tokenizing: Passerby
Tokenizing: Booty Call
Tokenizing: Refractions in the Plastic Pulse
Tokenizing: La Vida
Tokenizing: Ocean Wide
Tokenizing: Light Up the Sky
Tokenizing: Prince Caspian
Tokenizing: Two Figures by a Fountain
Tokenizing: Souls for Sale
Tokenizing: 7 days in sunny june
Tokenizing: Feels Like It Should
Tokenizing: (Dont) Give Hate A Chance
Tokenizing: On The 4th of July
Tokenizing: I Didn't Know I Was Looking for Love
Tokenizing: Thank You Reggae
Tokenizing: Ordinary People - Live
Tokenizing: Ol'55
Tokenizing: Ye Ye Ye
Tokenizing: Home On Monday


Tokenizing: Zzyzx Rd.
Tokenizing: Motorcycle Drive By
Tokenizing: Moment 4 Life
Tokenizing: Cherry
Tokenizing: Bros
Tokenizing: Itchin' On A Photograph
Tokenizing: Emperor's New Clothes
Tokenizing: Duel of the Fates
Tokenizing: Queen of Peace
Tokenizing: State of Grace
Tokenizing: The Bleeding Heart Show
Tokenizing: Liar
Tokenizing: Disloyal Order of Water Buffaloes
Tokenizing: Walking on Air
Tokenizing: Legendary Lovers
Tokenizing: The Wicked End
Tokenizing: Aura
Tokenizing: Small Things
Tokenizing: Floods
Tokenizing: ARTPOP
Tokenizing: Cortez The Killer
Tokenizing: Screen Shot
Tokenizing: Hips Don't Lie (feat. Wyclef Jean)
Tokenizing: Secret Crowds
Tokenizing: Strength of the World
Tokenizing: God Made Man
Tokenizing: Horsepower
Tokenizing: Donatella
Tokenizing: Scenes from an Italian Restaurant
Tokenizing: Encoder
Tokenizing: In Keeping Secrets Of Silent Earth: 3
Tokenizing: The Royal We
Tokenizing: MANiCURE
Tokenizing: Goin' Against Your Mind
Tokenizing: Lily, Rosemary and the Jack

Tokenizing: As We Go Along
Tokenizing: Give It to Me
Tokenizing: New Day
Tokenizing: God With Us
Tokenizing: Get Back Up
Tokenizing: Just for Today
Tokenizing: Revelation
Tokenizing: Never Sleep Alone
Tokenizing: Do You Love Me
Tokenizing: Fall
Tokenizing: Perfecta
Tokenizing: High Society
Tokenizing: Carried to The Table
Tokenizing: Show Me
Tokenizing: Washed By The Water
Tokenizing: How Wonderful
Tokenizing: W.A.Y.S.
Tokenizing: No Plan B
Tokenizing: I Want To See The Bright Lights Tonight
Tokenizing: Cheerleader (Felix Jaehn Remix)
Tokenizing: Shed A Little Light
Tokenizing: Hold Your Head High
Tokenizing: Alive (feat. Coucheron)
Tokenizing: Modern Love (Single Version)
Tokenizing: Do Your Thing
Tokenizing: Young, Wild & Free
Tokenizing: Tell the World
Tokenizing: Relentless
Tokenizing: Summer Time
Tokenizing: Salvation Is Here
Tokenizing: Dandelions
Tokenizing: Dark & Long (Dark Train)
Tokenizing: Hazy Shade of Winter
Tokenizing: Around the World/Harder Better Faster Stronger
Token

Tokenizing: Hey Mama (feat. Nicki Minaj & Afrojack)
Tokenizing: Class Historian
Tokenizing: High School Never Ends
Tokenizing: Pablo Picasso
Tokenizing: Short Skirt, Long Jacket
Tokenizing: Me and You
Tokenizing: It's All Been Done
Tokenizing: Bills
Tokenizing: Ohio (Come Back to Texas)
Tokenizing: Changing
Tokenizing: Do It Again
Tokenizing: Baby's Got Sauce
Tokenizing: Toop Toop
Tokenizing: Tell Me Something I Don't Know
Tokenizing: Girlfriend
Tokenizing: Jump Then Fall
Tokenizing: Must Have Done Something Right
Tokenizing: Heartbreaker
Tokenizing: Cooler Than Me (Single Mix)
Tokenizing: I Can Hear Music
Tokenizing: Particle Man
Tokenizing: Block After Block
Tokenizing: Touchin on My
Tokenizing: The Old Apartment
Tokenizing: Cameo Lover
Tokenizing: Say Hey (I Love You)
Tokenizing: Why Not
Tokenizing: Shark In The Water
Tokenizing: I Don't Know How To Love
Tokenizing: Rain Man
Tokenizing: Double Vision
Tokenizing: Escapade
Tokenizing: Mary
Tokenizing: Let's Go
Tokenizing: Cold Beverag

Tokenizing: Everyone
Tokenizing: But I Feel Good
Tokenizing: Dance Apocalyptic
Tokenizing: Cybele's Reverie
Tokenizing: The Lion Sleeps Tonight (Wimoweh)
Tokenizing: Happy
Tokenizing: Happy
Tokenizing: Wash Away
Tokenizing: Somebody Loves You
Tokenizing: Clean Living
Tokenizing: HULA HOOP
Tokenizing: Love Like Woe
Tokenizing: I Like It Like That
Tokenizing: Geek USA
Tokenizing: I Choose You
Tokenizing: Love Don't Die
Tokenizing: Collar Full
Tokenizing: Darlin'
Tokenizing: Here I Am
Tokenizing: Uncharted
Tokenizing: Ride A White Swan
Tokenizing: Día de Enero
Tokenizing: Les Yper-sound
Tokenizing: Cecilia And The Satellite
Tokenizing: Feel So Good
Tokenizing: Barrytown
Tokenizing: Back in Judy's Jungle
Tokenizing: Another F.U. Song
Tokenizing: Right On
Tokenizing: In the Middle of the Night
Tokenizing: Uneasy Rider
Tokenizing: Suzy Greenberg
Tokenizing: Pre-Ex-Girlfriend
Tokenizing: Wikipedia
Tokenizing: Yes, We Have No Bananas
Tokenizing: Right and Wrong
Tokenizing: Yo, Ho (A Pirate's L

Tokenizing: Go to Sleep.
Tokenizing: Something There
Tokenizing: Stop
Tokenizing: Healer
Tokenizing: Zihuatanejo
Tokenizing: Everything I Do, I Do It For You
Tokenizing: Obsession
Tokenizing: Wassup With It
Tokenizing: Sunday Bloody Sunday (Remastered)
Tokenizing: Tourette’s
Tokenizing: Holy
Tokenizing: Lookin' For My Rainbow
Tokenizing: Never Had A Dream Come True
Tokenizing: Hey There Delilah
Tokenizing: I Don't Want to Miss a Thing
Tokenizing: Your Song
Tokenizing: Wherever You Will Go
Tokenizing: Love Story
Tokenizing: Your Body Is a Wonderland
Tokenizing: Wonderful Tonight
Tokenizing: Goodbye My Lover
Tokenizing: Flightless Bird, American Mouth
Tokenizing: Kiss from a Rose
Tokenizing: Careless Whisper
Tokenizing: You Give Me Something
Tokenizing: Slow Dancing in a Burning Room
Tokenizing: Thinking of You
Tokenizing: More Than Words
Tokenizing: Look After You
Tokenizing: All My Loving
Tokenizing: At Last
Tokenizing: And I Love Her
Tokenizing: Always
Tokenizing: Heaven
Tokenizing: C

Tokenizing: Kaleidoscope Dream
Tokenizing: Sweet November
Tokenizing: Temptation
Tokenizing: Arch & Point
Tokenizing: Love Is The Answer
Tokenizing: So Into You
Tokenizing: Into You
Tokenizing: Lemme See (feat. Rick Ross)
Tokenizing: Landing
Tokenizing: In A Mellow Tone
Tokenizing: Pink Cloud
Tokenizing: Cross My Heart
Tokenizing: The Daily Planet
Tokenizing: 2 Wicky
Tokenizing: Pumpkin (Edit)
Tokenizing: Turn My Swag On
Tokenizing: Loose Change
Tokenizing: #GETITRIGHT
Tokenizing: Making the Most of the Night
Tokenizing: Favourite Colour
Tokenizing: Lovers in the Parking Lot
Tokenizing: Only For The Night
Tokenizing: Feel It Boy
Tokenizing: 89 Vision
Tokenizing: Do Me
Tokenizing: Lip & Hip
Tokenizing: Wherever Whenever
Tokenizing: Invented Sex
Tokenizing: Cyclone f/ T-Pain
Tokenizing: Give It Up (Radio Edit)
Tokenizing: Be With You
Tokenizing: Its All Over
Tokenizing: Aint No Way
Tokenizing: Ten Miles Back
Tokenizing: Bailamos
Tokenizing: Sex on Fire
Tokenizing: Supermassive Black Hole

Tokenizing: Love You to Death
Tokenizing: Versace on the Floor
Tokenizing: Shuffle A Dream
Tokenizing: O
Tokenizing: Say Yes
Tokenizing: Breathe Again
Tokenizing: Let's Get Married
Tokenizing: No Angel
Tokenizing: Best Mistake
Tokenizing: So Anxious
Tokenizing: Touch
Tokenizing: Oh
Tokenizing: Hazey
Tokenizing: Brown Skin
Tokenizing: U Know What's Up
Tokenizing: Nothing Can Come Between Us
Tokenizing: Papi
Tokenizing: DNA
Tokenizing: Infatuation
Tokenizing: Mambo Sun
Tokenizing: Never As Good As the First Time
Tokenizing: Moog Island
Tokenizing: Do Somethin'
Tokenizing: You Look Like Rain
Tokenizing: Him & I (with Halsey)
Tokenizing: Move Ya Body
Tokenizing: Work Bitch
Tokenizing: Play
Tokenizing: Falsetto
Tokenizing: BED (feat. Ariana Grande)
Tokenizing: Alright
Tokenizing: Right There
Tokenizing: Nice & Slow
Tokenizing: The Anthem
Tokenizing: Over Our Heads
Tokenizing: Tough Love
Tokenizing: My Lovin' (You're Never Gonna Get It)
Tokenizing: Dirt
Tokenizing: Chantaje
Tokenizing: Wakin

Tokenizing: Northern Sky
Tokenizing: Start a War
Tokenizing: Swimming In The Flood
Tokenizing: The Headmaster Ritual
Tokenizing: World Spins Madly On
Tokenizing: Imagine
Tokenizing: Small
Tokenizing: Lemonworld
Tokenizing: Brainy
Tokenizing: No Distance Left to Run
Tokenizing: I Think Ur a Contra
Tokenizing: Little Motel
Tokenizing: The Golden Age
Tokenizing: Eet
Tokenizing: Let It Die
Tokenizing: To Forgive
Tokenizing: Jolene
Tokenizing: Something I Can Never Have
Tokenizing: Cheers Darlin'
Tokenizing: The Penalty
Tokenizing: Sisters of Mercy
Tokenizing: Fever Dream
Tokenizing: Never Meant
Tokenizing: Gospel
Tokenizing: Secret Meeting
Tokenizing: Mexico
Tokenizing: Vanderlyle Crybaby Geeks
Tokenizing: Hey, That's No Way to Say Goodbye
Tokenizing: Poison Oak
Tokenizing: I And Love And You
Tokenizing: Racing Like a Pro
Tokenizing: Laughing With
Tokenizing: Reel Around the Fountain
Tokenizing: Parasite
Tokenizing: I Hope That I Don't Fall in Love With You
Tokenizing: Together
Tokenizing:

Tokenizing: Tomorrow
Tokenizing: Closer
Tokenizing: How's It Going To Be
Tokenizing: Cruel World
Tokenizing: Changes
Tokenizing: Yellow Light
Tokenizing: Doll Parts
Tokenizing: Take the Box
Tokenizing: Right Where It Belongs
Tokenizing: Landing in London
Tokenizing: Back to December
Tokenizing: Working Class Hero
Tokenizing: Nothing
Tokenizing: Skyscraper
Tokenizing: SAD!
Tokenizing: Pretty When You Cry
Tokenizing: Song for Zula
Tokenizing: Spaceboy
Tokenizing: Teardrops on My Guitar
Tokenizing: You Know What I Mean
Tokenizing: The Pieces Don't Fit Anymore
Tokenizing: Rain
Tokenizing: Too Much Love Will Kill You
Tokenizing: Show Me the Meaning of Being Lonely
Tokenizing: Viva Forever
Tokenizing: The Freshmen
Tokenizing: Crying
Tokenizing: Breathe
Tokenizing: Broken
Tokenizing: You Really Got a Hold on Me
Tokenizing: Sometime Around Midnight
Tokenizing: Sorry Seems to Be the Hardest Word
Tokenizing: Hello, I'm In Delaware
Tokenizing: I Hate U, I Love U (feat. Olivia O'Brien)
Tokenizing:

Tokenizing: Good Luck Charm
Tokenizing: Lovers Rock
Tokenizing: Babylon Sisters
Tokenizing: Diagonals
Tokenizing: In My Bed
Tokenizing: Distant Land
Tokenizing: She Said
Tokenizing: Closer
Tokenizing: Lilly
Tokenizing: Lions, Tigers & Bears
Tokenizing: Heaven
Tokenizing: Sugar Hill
Tokenizing: If I Have My Way
Tokenizing: Stingy
Tokenizing: Can We Talk
Tokenizing: Got 'Til It's Gone
Tokenizing: Same Ol' G
Tokenizing: Heartbreak Hotel
Tokenizing: TOGETHER
Tokenizing: Slow Down
Tokenizing: Get It On Tonite
Tokenizing: Come Here
Tokenizing: Animal
Tokenizing: Feel No Pain
Tokenizing: Age Ain't Nothing but a Number
Tokenizing: A.D. 2000
Tokenizing: If I Was Your Man
Tokenizing: Feenin'
Tokenizing: Dress On
Tokenizing: Cross My Mind
Tokenizing: You
Tokenizing: Hey Lover
Tokenizing: I Can Change
Tokenizing: Gimme What I Don't Know (I Want)
Tokenizing: Hello Good Morning (Remix)
Tokenizing: Mothership Reconnection (Feat Parliament/Funkadelic) (Daft Punk Remix)
Tokenizing: Tell Him
Tokenizing:

Tokenizing: Lionheart
Tokenizing: Keep It to Yourself
Tokenizing: Blood
Tokenizing: Crazy
Tokenizing: Make It Funky, Pt. 1
Tokenizing: Wine After Whiskey
Tokenizing: Lemon Drop
Tokenizing: Tough
Tokenizing: Apologize
Tokenizing: Bad Example
Tokenizing: Trailer For Rent
Tokenizing: Housewife's Prayer
Tokenizing: Flaws & All
Tokenizing: Worth It
Tokenizing: Human After All (Sebastian Remix )
Tokenizing: Second Wind
Tokenizing: I Dare You
Tokenizing: Celebrated Walkin' Blues
Tokenizing: Lose Yourself (8 Mile)
Tokenizing: Space
Tokenizing: I Am Invincible
Tokenizing: Interstate Prelude
Tokenizing: When I Was His Wife
Tokenizing: This Too Shall Pass
Tokenizing: Laying Low
Tokenizing: Rough And Ready (Single Edit)
Tokenizing: Because Of You
Tokenizing: Cold War
Tokenizing: Ah Mary
Tokenizing: Scream
Tokenizing: Ecstasy
Tokenizing: Taking Tiger Mountain
Tokenizing: Walk The Walk
Tokenizing: Chorus Of One
Tokenizing: Blaze
Tokenizing: New Architects
Tokenizing: Extinguish
Tokenizing: Resignati

Tokenizing: Closer
Tokenizing: Music To Walk Home By
Tokenizing: The Way I Live
Tokenizing: Everything in Its Right Place
Tokenizing: I Might Be Wrong
Tokenizing: Feels Like We Only Go Backwards
Tokenizing: Zodiac Shit
Tokenizing: I Never Learnt to Share
Tokenizing: Gyroscope
Tokenizing: It's What I Want
Tokenizing: Somebody's Calling Me
Tokenizing: Hailin From the Edge
Tokenizing: Boys Latin
Tokenizing: River
Tokenizing: The Humbling River
Tokenizing: Harlem River
Tokenizing: The Sniper at the Gates of Heaven
Tokenizing: Rainbo Conversation
Tokenizing: I Dreamed I Dream
Tokenizing: Shiverman
Tokenizing: Comfortably Numb
Tokenizing: Fast
Tokenizing: Yeah
Tokenizing: Feel good Hit of Summer
Tokenizing: The Continuous Life
Tokenizing: Day and night
Tokenizing: Addicted
Tokenizing: 1983
Tokenizing: I Am the Walrus
Tokenizing: Within
Tokenizing: Tomorrow Never Knows
Tokenizing: Space Song
Tokenizing: L$D
Tokenizing: The Color of the Fire
Tokenizing: Half Full Glass Of Wine
Tokenizing: Mass

Tokenizing: Boy With a Coin
Tokenizing: Homesick
Tokenizing: One of These Things First
Tokenizing: I Could Die for You
Tokenizing: Bitch, Don’t Kill My Vibe
Tokenizing: Stable Song
Tokenizing: Electric Relaxation
Tokenizing: Pink Bullets
Tokenizing: I Miss You
Tokenizing: Like a Star
Tokenizing: Girl Inform Me
Tokenizing: Seven Years
Tokenizing: Beach Baby
Tokenizing: You Make It Easy
Tokenizing: No Such Thing
Tokenizing: Harvest Moon
Tokenizing: Jacksonville
Tokenizing: The Heart of Life
Tokenizing: Young Pilgrims
Tokenizing: Shoot the Moon
Tokenizing: Ordinary People
Tokenizing: We Are Nowhere and It's Now
Tokenizing: Belief
Tokenizing: Orange Sky
Tokenizing: A Comet Appears
Tokenizing: Venus
Tokenizing: Road
Tokenizing: Why Georgia
Tokenizing: The Suburbs (Continued)
Tokenizing: Name
Tokenizing: Other Side of the World
Tokenizing: Make Love
Tokenizing: Blame It On The Tetons
Tokenizing: Gravity Rides Everything
Tokenizing: Mother Nature's Son
Tokenizing: Let It Go
Tokenizing: Song f

In [10]:
# Export to CSV
df_music = df_music.drop('tokenized_lyrics', axis=1)
df_music.to_csv("processed data/music_bert_2.csv")
