In [11]:
import pandas as pd
import torch
import re
from transformers import AutoModel, AutoTokenizer

## Loading the BERT model

In [12]:
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Importing data, preparing the lyrics

In [13]:
df_music = pd.read_csv("processed data/music_dataset.csv")
df_music

Unnamed: 0.1,Unnamed: 0,track,artist,number_of_emotion_tags,valence_tags,arousal_tags,dominance_tags,spotify_id,artist_name,track_name,...,Reggae,Dance,Country,Opera,Movie,Children's Music,A Capella,lyrics,seeds_array,sentiment
0,0,'Till I Collapse,Eminem,6,4.550000,5.273125,5.690625,4xkOaSrkexMciUUogZKVTS,Eminem,'Till I Collapse,...,0,0,0,0,0,0,0,[Intro: Eminem]\r\n'Cause sometimes you just f...,['aggressive'],intense_aggressive
1,1,St. Anger,Metallica,8,3.710000,5.833000,5.427250,3fOc9x06lKJBhz435mInlH,Metallica,St. Anger,...,0,0,0,0,0,0,0,[Verse]\r\nSaint Anger 'round my neck\r\nSaint...,['aggressive'],intense_aggressive
2,2,Die MF Die,Dope,7,3.771176,5.348235,5.441765,5bU4KX47KqtDKKaLM4QCzh,Dope,Die MF Die,...,0,0,0,0,0,0,0,[Intro]\r\nDie!\r\n[Verse 1]\r\nI don't need y...,['aggressive'],intense_aggressive
3,3,Step Up,Drowning Pool,9,2.971389,5.537500,4.726389,4Q1w4Ryyi8KNxxaFlOQClK,Drowning Pool,Step Up,...,0,0,0,0,0,0,0,"[Intro]\r\nOne, two, three, go!\r\n[Verse 1]\r...",['aggressive'],intense_aggressive
4,4,Feedback,Kanye West,1,3.080000,5.870000,5.490000,49fT6owWuknekShh9utsjv,Kanye West,Feedback,...,0,0,0,0,0,0,0,"[Chorus]\r\nAyy, ya heard about the good news?...",['aggressive'],intense_aggressive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4971,6547,Wild Hope,Mandy Moore,8,5.422895,3.040737,5.013316,2WCQbmF3pZH5u2R0OaEXxY,Mandy Moore,Wild Hope,...,0,0,0,0,1,0,0,[Verse 1]\r\nIn the crazy world\r\nAnything ca...,['quiet'],sad_melancholic
4972,6548,A Life (1895 - 1915),Mark Hollis,4,4.927500,2.735000,4.297500,7dXjq4Wp7eEPsGVsOhRQDQ,Mark Hollis,A Life (1895 - 1915),...,0,0,0,0,0,0,0,[Verse]\r\nUniform\r\nDream cites freedom\r\nA...,['quiet'],sad_melancholic
4973,6551,My Shit's Fucked Up,Warren Zevon,6,2.758333,3.813333,3.856667,26douMAqNELour6sKd2oR7,Warren Zevon,My Shit's Fucked Up,...,0,0,0,0,0,0,0,"[Verse 1]\r\nWell, I went to the doctor\r\nI s...",['cynical'],dark_gritty
4974,6552,Do I Have To?,Pet Shop Boys,6,3.680274,3.877534,4.406781,5Knl7BTPCME3Kh5WxctWLU,Pet Shop Boys,What Have I Done To Deserve This? (With Dusty ...,...,0,1,0,0,0,0,0,[Verse 1]\r\nI don't care what you've said\r\n...,['cynical'],dark_gritty


### Define a transforming function

In [14]:
def transform_lyrics(row):
    base_lyrics = row['lyrics']
    artist_name = row['artist']
    track_title = row['track']
    
    lyrics = base_lyrics.replace("\r", " ")
    lyrics = re.sub(r"\[([^\]]+)\]", "", lyrics)
    lyrics = lyrics.lower()
    
    lines = lyrics.split('\n')
    #lines = list(set(lines))
    lines = list(dict.fromkeys(lines))    
    lyrics = ' '.join(lines)
      
    return "Lyrics of the song " + track_title + " by " + artist_name + " " + lyrics 
    

In [15]:
df_music['lyrics'] = df_music.apply(transform_lyrics, axis=1)
print(df_music['lyrics'].to_string(index=False))

Lyrics of the song 'Till I Collapse by Eminem  ...
Lyrics of the song St. Anger by Metallica   sai...
Lyrics of the song Die MF Die by Dope   die!  i...
Lyrics of the song Step Up by Drowning Pool   o...
Lyrics of the song Feedback by Kanye West   ayy...
Lyrics of the song 7 Words by Deftones   i'll n...
Lyrics of the song Comprachicos by Pendulum   m...
Lyrics of the song Two Words by Kanye West   we...
Lyrics of the song What I See by Black Flag thi...
Lyrics of the song Room 13 by Black Flag keep m...
Lyrics of the song Shake Ya Ass by Mystikal   m...
Lyrics of the song Boom Boom by The Animals   b...
Lyrics of the song Boys Wanna Be Her by Peaches...
Lyrics of the song Death Trend Setta by Crossfa...
Lyrics of the song Angel Duster by Run the Jewe...
Lyrics of the song Custer by Slipknot   how 'bo...
Lyrics of the song Welcome To The Fold by Filte...
Lyrics of the song Black by Sevendust   voices ...
Lyrics of the song Hush Hush; Hush Hush by The ...
Lyrics of the song Wollt Ihr Da

### Tokenize lyrics

In [16]:
def tokenize_lyrics(row):
    return tokenizer.encode(row['lyrics'], add_special_tokens=True, max_length=512, truncation=True)

In [17]:
df_music['tokenized_lyrics'] = df_music.apply(tokenize_lyrics, axis=1)
print(df_music['tokenized_lyrics'].to_string(index=False))

[101, 4581, 1997, 1996, 2299, 1005, 6229, 1045,...
[101, 4581, 1997, 1996, 2299, 2358, 1012, 4963,...
[101, 4581, 1997, 1996, 2299, 3280, 1049, 2546,...
[101, 4581, 1997, 1996, 2299, 3357, 2039, 2011,...
[101, 4581, 1997, 1996, 2299, 12247, 2011, 2927...
[101, 4581, 1997, 1996, 2299, 1021, 2616, 2011,...
[101, 4581, 1997, 1996, 2299, 4012, 18098, 2104...
[101, 4581, 1997, 1996, 2299, 2048, 2616, 2011,...
[101, 4581, 1997, 1996, 2299, 2054, 1045, 2156,...
[101, 4581, 1997, 1996, 2299, 2282, 2410, 2011,...
[101, 4581, 1997, 1996, 2299, 6073, 8038, 4632,...
[101, 4581, 1997, 1996, 2299, 8797, 8797, 2011,...
[101, 4581, 1997, 1996, 2299, 3337, 10587, 2022...
[101, 4581, 1997, 1996, 2299, 2331, 9874, 2275,...
[101, 4581, 1997, 1996, 2299, 4850, 6497, 2121,...
[101, 4581, 1997, 1996, 2299, 28888, 2011, 7540...
[101, 4581, 1997, 1996, 2299, 6160, 2000, 1996,...
[101, 4581, 1997, 1996, 2299, 2304, 2011, 2698,...
[101, 4581, 1997, 1996, 2299, 20261, 20261, 102...
[101, 4581, 1997, 1996, 2299, 2

### Generate the embeddings

In [18]:
def generate_embeddings(row):
    # Convert the token IDs to a PyTorch tensor
    tokens_tensor = torch.tensor([row['tokenized_lyrics']])

    # Feed the tokens through the model
    model_output = model(tokens_tensor)
    
    print('Tokenizing: ' + row['track'])

    # Extract the final layer's output as the embedding
    return model_output.last_hidden_state.mean(dim=1).squeeze().detach().numpy()

In [19]:
df_music['embedded_lyrics'] = df_music.apply(generate_embeddings, axis=1)
print(df_music['embedded_lyrics'].to_string(index=False))

Tokenizing: 'Till I Collapse
Tokenizing: St. Anger
Tokenizing: Die MF Die
Tokenizing: Step Up
Tokenizing: Feedback
Tokenizing: 7 Words
Tokenizing: Comprachicos
Tokenizing: Two Words
Tokenizing: What I See
Tokenizing: Room 13
Tokenizing: Shake Ya Ass
Tokenizing: Boom Boom
Tokenizing: Boys Wanna Be Her
Tokenizing: Death Trend Setta
Tokenizing: Angel Duster
Tokenizing: Custer
Tokenizing: Welcome To The Fold
Tokenizing: Black
Tokenizing: Hush Hush; Hush Hush
Tokenizing: Wollt Ihr Das Bett In Flammen
Tokenizing: Invaders Must Die
Tokenizing: Requiem
Tokenizing: Self vs Self
Tokenizing: Shameful Metaphors
Tokenizing: Black Me Out
Tokenizing: Quick Death
Tokenizing: Motorcycle Ride
Tokenizing: Lockdown
Tokenizing: I Put a Spell on You
Tokenizing: Reise Reise
Tokenizing: A Passage In Time
Tokenizing: A Better Son-Daughter
Tokenizing: Tobacco Road
Tokenizing: Asleep
Tokenizing: Howwhywuz, Howwhyam
Tokenizing: Super Bitch
Tokenizing: Waste Of Time
Tokenizing: F _ _ _ tha Police
Tokenizing: Homec

Tokenizing: the dead flag blues
Tokenizing: The End of All Things
Tokenizing: Angel
Tokenizing: Lullaby
Tokenizing: Climbing Up the Walls
Tokenizing: A Forest
Tokenizing: My Body Is a Cage
Tokenizing: Change (In the House of Flies)
Tokenizing: Untitled
Tokenizing: The Hills
Tokenizing: If I Had a Heart
Tokenizing: Mezzanine
Tokenizing: Sky Might Fall
Tokenizing: The Noose
Tokenizing: Fascination Street
Tokenizing: Stella Was a Diver and She Was Always Down
Tokenizing: Passenger
Tokenizing: Yonkers
Tokenizing: Coldest Winter
Tokenizing: Down by the Water
Tokenizing: Swim Good
Tokenizing: Splitting the Atom
Tokenizing: Threads
Tokenizing: A Wolf at the Door
Tokenizing: Afraid
Tokenizing: Skip Divided
Tokenizing: Tear You Apart
Tokenizing: Often
Tokenizing: Clap Hands
Tokenizing: Beggin for Thread
Tokenizing: Mask Off
Tokenizing: Bloody Mary
Tokenizing: These Walls
Tokenizing: idontwannabeyouanymore
Tokenizing: This Is Halloween
Tokenizing: The Rifle's Spiral
Tokenizing: Medicine
Tokenizi

Tokenizing: Perfectly Lonely
Tokenizing: A Rose for Emily
Tokenizing: Swim Until You Can't See Land
Tokenizing: Cold Little Heart
Tokenizing: I Get Lonely
Tokenizing: Talkin’ 2 Myself
Tokenizing: Love Me In Whatever Way
Tokenizing: William It Was Really Nothing
Tokenizing: Lonely
Tokenizing: To Be Alone
Tokenizing: Fallin' Out
Tokenizing: The Only
Tokenizing: Wouldnt it be nice
Tokenizing: Bullet Proof... I Wish I Was
Tokenizing: Brian and Robert
Tokenizing: Lonely
Tokenizing: Whenever You're Around
Tokenizing: Out of My Mind
Tokenizing: Se me olvidó otra vez
Tokenizing: What's It Like
Tokenizing: Unstable
Tokenizing: Sitting Home
Tokenizing: Cable Car
Tokenizing: Hotel California (Spanish).
Tokenizing: Death Theme
Tokenizing: Wild Horses (Feat. Adam Levine)
Tokenizing: Autum Leaves
Tokenizing: Sympathy
Tokenizing: Home
Tokenizing: The Who - The Seeker
Tokenizing: Exit Music For a Film
Tokenizing: Lonely Stranger (unplugged)
Tokenizing: Stuck In  A Moment You Can't Get Out Of
Tokenizin

Tokenizing: Hide
Tokenizing: Mirrors
Tokenizing: Salvation
Tokenizing: Nose Grows Some
Tokenizing: Time = Cause
Tokenizing: There Is No Ice (For My Drink)
Tokenizing: I Am the Antichrist to You
Tokenizing: Crush
Tokenizing: Numb
Tokenizing: Especially Me
Tokenizing: Two
Tokenizing: Cubism Dream
Tokenizing: Fancy Clown
Tokenizing: Come Home
Tokenizing: Colour Me In
Tokenizing: Concerning the UFO Sighting Near Highland, IL
Tokenizing: All I Wanna Do
Tokenizing: ...and the Rain
Tokenizing: Tear Down the House
Tokenizing: Diary
Tokenizing: Climax (Girl Shit)
Tokenizing: Pi's Lullaby
Tokenizing: Trampled Rose
Tokenizing: (You Don't Know) How Glad I Am
Tokenizing: Hazy (feat. William Fitzsimmons)
Tokenizing: Take Your Time
Tokenizing: Dog Faced Boy
Tokenizing: Speak Softly Love
Tokenizing: No Woman No Cry [Live]
Tokenizing: Daylight Katy
Tokenizing: (Sittin' on The) Dock of the Bay
Tokenizing: Cantique de Jean Racine Op. 11
Tokenizing: When You Wish Upon A Star - From "Pinocchio"/Soundtrack 

Tokenizing: Seasons (Waiting on You)
Tokenizing: Marquee Moon
Tokenizing: Partition
Tokenizing: Firestone
Tokenizing: Shine on You Crazy Diamond
Tokenizing: Run Boy Run
Tokenizing: Bixby Canyon Bridge
Tokenizing: Storm
Tokenizing: Estranged
Tokenizing: New Person, Same Old Mistakes
Tokenizing: Warriors
Tokenizing: Desolation Row
Tokenizing: Iridescent
Tokenizing: Wildest Dreams
Tokenizing: I Am the Resurrection
Tokenizing: While You Wait for the Others
Tokenizing: Monsters
Tokenizing: Money Power Glory
Tokenizing: On My Mind
Tokenizing: Birthday Cake
Tokenizing: Hair
Tokenizing: Scheiße
Tokenizing: Galway Girl
Tokenizing: Superheroes
Tokenizing: Bat Out of Hell
Tokenizing: On Hold
Tokenizing: Spitting Venom
Tokenizing: Big Girls Cry
Tokenizing: The Shrine / An Argument
Tokenizing: He's a Pirate
Tokenizing: Dry the Rain
Tokenizing: Mr. November
Tokenizing: Fast Lane
Tokenizing: Kill Your Heroes
Tokenizing: Caught By The River
Tokenizing: Twilight of the Thunder God
Tokenizing: Pusher Lo

Tokenizing: My Sweet Lord
Tokenizing: Hallelujah
Tokenizing: Ultralight Beam
Tokenizing: River
Tokenizing: Rivers Of Babylon
Tokenizing: All Things Must Pass
Tokenizing: Andúril
Tokenizing: Lightning
Tokenizing: Glory (From the Motion Picture Selma)
Tokenizing: Magnificat
Tokenizing: Whenever God Shines His Light
Tokenizing: The MC
Tokenizing: The Waters Of March
Tokenizing: You Are More
Tokenizing: Our Last Moment in Song
Tokenizing: Amazing Grace
Tokenizing: Lonesome Lover
Tokenizing: Different Times
Tokenizing: I Will Be Here For You
Tokenizing: That's No Way to Get Along
Tokenizing: Time's A Wastin'
Tokenizing: Redeemed
Tokenizing: Spirit Fall
Tokenizing: Lord Of Lords
Tokenizing: Shout to the Lord
Tokenizing: Silent Ganges
Tokenizing: Whole World In His Hands (Sing Over Me Album Version)
Tokenizing: King Without a Crown (live)
Tokenizing: Down by the Riverside
Tokenizing: symphonia
Tokenizing: Ol' Man River
Tokenizing: Pie Jesu
Tokenizing: Let Your Mercy Rain
Tokenizing: Secret Am

Tokenizing: We Are Golden
Tokenizing: Flagpole Sitta
Tokenizing: Lips Are Movin
Tokenizing: It's Not Unusual
Tokenizing: Bandages
Tokenizing: Gone Daddy Gone
Tokenizing: Pumpin Blood
Tokenizing: Archie, Marry Me
Tokenizing: Danger! High Voltage
Tokenizing: Ooh La La
Tokenizing: Fun, Fun, Fun
Tokenizing: Ways To Go
Tokenizing: One Pure Thought
Tokenizing: Give It To Me
Tokenizing: Qué Onda Guero
Tokenizing: Istanbul (Not Constantinople)
Tokenizing: Karate Schnitzel
Tokenizing: Objection (Tango)
Tokenizing: Coin-Operated Boy
Tokenizing: When I'm With You
Tokenizing: Good Girls Go Bad (feat. Leighton Meester)
Tokenizing: All Night
Tokenizing: The Yeah Yeah Yeah Song
Tokenizing: Tightrope
Tokenizing: Undone (The Sweater Song)
Tokenizing: Tous les mêmes
Tokenizing: Yoshimi Battles the Pink Robots Pt. 1
Tokenizing: Catch My Disease
Tokenizing: Pinch Me
Tokenizing: Rollercoaster
Tokenizing: The Future Freaks Me Out
Tokenizing: Ocean Man
Tokenizing: Good Times Roll
Tokenizing: Little Miss Can'

Tokenizing: Folding Chair
Tokenizing: I Can See Clearly Now
Tokenizing: Take Me Over
Tokenizing: I Wanna Get Better
Tokenizing: Best Friend
Tokenizing: Soak Up the Sun
Tokenizing: A.M. 180
Tokenizing: Bridges
Tokenizing: End of the Line
Tokenizing: The Man In Me
Tokenizing: Feel Again
Tokenizing: A Sentence Of Sorts In Kongsvinger
Tokenizing: Beach Comber
Tokenizing: In Transit
Tokenizing: Daydream
Tokenizing: The Curse Of Curves
Tokenizing: Oh No
Tokenizing: The River of Dreams
Tokenizing: Swim and Sleep (Like A Shark)
Tokenizing: Little Deuce Coupe
Tokenizing: Rocket
Tokenizing: Care of Cell 44
Tokenizing: There's More to Life Than This
Tokenizing: Let's Kill Tonight
Tokenizing: Ordinary Day
Tokenizing: I Know What I Know
Tokenizing: Rhythm of Love
Tokenizing: The Black Hawk War, or, How to Demolish an Entire Civilization and Still Feel Good About Yourself in the Morning, or, We Apologize for the Inconvenience But You're Going to Have to Leave Now, or...
Tokenizing: MoneyGrabber
Toke

Tokenizing: One by One
Tokenizing: La-La for You
Tokenizing: Just Wanna Be With You
Tokenizing: I Heard Wonders
Tokenizing: We Walk the Same Line
Tokenizing: Nightlite (Demo Version)
Tokenizing: Earth To Heaven
Tokenizing: I Can't Help It
Tokenizing: Judas
Tokenizing: go
Tokenizing: Rene And Georgette Magritte With Their Dog After The War
Tokenizing: Here There and Everywhere
Tokenizing: Everything I Do Gohn Be Funky (From Now On)
Tokenizing: Unconditional Love
Tokenizing: Miracle
Tokenizing: Pier 57
Tokenizing: One of Those Days
Tokenizing: Your Hands
Tokenizing: A Night to Remember
Tokenizing: Chocolate High
Tokenizing: Sweet Touch Of Love
Tokenizing: Monday Night
Tokenizing: Moonshadow
Tokenizing: Anticipation
Tokenizing: Neighborhood 1 (Tunnels)
Tokenizing: Help Me
Tokenizing: Shark Attack
Tokenizing: Sum
Tokenizing: Jump N'shout
Tokenizing: Head
Tokenizing: Violently Happy
Tokenizing: Justify My Love
Tokenizing: Go to Hell, for Heaven's Sake
Tokenizing: The House of Wolves
Tokeniz

Tokenizing: Biggest Part of Me
Tokenizing: Even the Nights Are Better
Tokenizing: SOMETHING ABOUT HIM
Tokenizing: Lady
Tokenizing: Goodnight Moon
Tokenizing: Vivir sin aire
Tokenizing: My Love
Tokenizing: That's Us/Wild Combination
Tokenizing: (I Love You) for Sentimental Reasons
Tokenizing: River
Tokenizing: You
Tokenizing: Forget
Tokenizing: World So Cold
Tokenizing: Love on the Rocks
Tokenizing: Coleccionista De Canciones
Tokenizing: Breaking the Law
Tokenizing: And Then You
Tokenizing: Still
Tokenizing: Algo está cambiando
Tokenizing: Cut
Tokenizing: Waiting in Vain
Tokenizing: She's a Mystery to Me
Tokenizing: Still
Tokenizing: Underneath the Stars
Tokenizing: Noviembre sin ti
Tokenizing: It Had To Be You (Big Band and Vocals)
Tokenizing: Besame Mucho
Tokenizing: Beauty and the Beast
Tokenizing: Pretend
Tokenizing: Make Someone Happy
Tokenizing: I Don't Want To Talk About It
Tokenizing: Me Duele Amarte
Tokenizing: I Need You Tonight
Tokenizing: Carmen - March of the Toreador
Token

Tokenizing: Get Me Bodied
Tokenizing: Focus
Tokenizing: Lady Marmalade
Tokenizing: Your Body
Tokenizing: Sweet Lady
Tokenizing: Paradise
Tokenizing: Pull Out
Tokenizing: I Just Want to Make Love to You
Tokenizing: Say OK
Tokenizing: Slow Down
Tokenizing: Prototype
Tokenizing: Crush
Tokenizing: Dreams
Tokenizing: Get You (feat. Kali Uchis)
Tokenizing: It's Like That
Tokenizing: Thrills
Tokenizing: Honey
Tokenizing: Black & Gold
Tokenizing: Closer To The Sun
Tokenizing: Sweat (Snoop Dogg vs. David Guetta) [Remix]
Tokenizing: U.R.A. Fever
Tokenizing: You Would Know
Tokenizing: Do the Rump
Tokenizing: Paradise Circus (Gui Boratto Remix)
Tokenizing: Berlin
Tokenizing: Too Close
Tokenizing: I Can't Hear You
Tokenizing: Soldier of Love
Tokenizing: Secrets
Tokenizing: How You Gonna Act Like That
Tokenizing: You Gonna Want Me
Tokenizing: Bloodflows
Tokenizing: Blow
Tokenizing: I Want You
Tokenizing: Fetish (feat. Gucci Mane)
Tokenizing: Jasmine (Demo)
Tokenizing: Neon Lights
Tokenizing: Upgrade

Tokenizing: Knives Out
Tokenizing: In Limbo
Tokenizing: Crown of Love
Tokenizing: Fast Car
Tokenizing: Lump Sum
Tokenizing: Hometown Glory
Tokenizing: 9 Crimes
Tokenizing: Blood Bank
Tokenizing: The Greatest
Tokenizing: No One's Gonna Love You
Tokenizing: Nantes
Tokenizing: What Sarah Said
Tokenizing: Brothers on a Hotel Bed
Tokenizing: Delicate
Tokenizing: The Rip
Tokenizing: The Long and Winding Road
Tokenizing: Suzanne
Tokenizing: You and Whose Army?
Tokenizing: Heaven Knows I'm Miserable Now
Tokenizing: This Place Is a Prison
Tokenizing: (Nice Dream)
Tokenizing: Fake Empire
Tokenizing: The Boy With The Thorn In His Side
Tokenizing: Colorblind
Tokenizing: Sea of Love
Tokenizing: NYC
Tokenizing: Volcano
Tokenizing: Bother
Tokenizing: Lua
Tokenizing: She's Leaving Home
Tokenizing: Look What You've Done
Tokenizing: Glycerine
Tokenizing: Communist Daughter
Tokenizing: Codex
Tokenizing: Lost Cause
Tokenizing: Nobody Home
Tokenizing: Vera
Tokenizing: Julia
Tokenizing: Place to Be
Tokenizi

Tokenizing: Unfaithful
Tokenizing: Samson
Tokenizing: Mockingbird
Tokenizing: Because of You
Tokenizing: The A Team
Tokenizing: Stan
Tokenizing: The Only Exception
Tokenizing: You Found Me
Tokenizing: Jar of Hearts
Tokenizing: Nothing Compares 2 U
Tokenizing: Hurt
Tokenizing: The Ghost of You
Tokenizing: Russian Roulette
Tokenizing: Everything Will Be Alright
Tokenizing: Knockin' on Heaven's Door
Tokenizing: Porcelain
Tokenizing: Soldier Side
Tokenizing: Last Kiss
Tokenizing: Roulette
Tokenizing: Hello
Tokenizing: Outside
Tokenizing: Cancer
Tokenizing: For No One
Tokenizing: Valentine's Day
Tokenizing: Angel
Tokenizing: Say Something
Tokenizing: Ohne dich
Tokenizing: I Need My Girl
Tokenizing: Better Than Me
Tokenizing: When I'm Gone
Tokenizing: Only One
Tokenizing: Boston
Tokenizing: The Fool on the Hill
Tokenizing: Disenchanted
Tokenizing: Stop This Train
Tokenizing: Pink + White
Tokenizing: Harrowdown Hill
Tokenizing: Somebody That I Used To Know
Tokenizing: Death of a Martian
Token

Tokenizing: Put That Woman First
Tokenizing: Corona And Lime
Tokenizing: Lowdown
Tokenizing: Be (Intro)
Tokenizing: Show Me
Tokenizing: Must Be Nice
Tokenizing: Bartender
Tokenizing: Flow
Tokenizing: Daddy's Lambo
Tokenizing: Diary
Tokenizing: Soon As I Get Home
Tokenizing: Other Side Of The Game
Tokenizing: Neighbors Know My Name
Tokenizing: This Woman's Work
Tokenizing: If I Ever Fall In Love
Tokenizing: Just In Case
Tokenizing: XO / The Host
Tokenizing: Me
Tokenizing: Make Love
Tokenizing: Number One
Tokenizing: Nirvana
Tokenizing: Gettin' Up
Tokenizing: Cupid
Tokenizing: Heaven or Las Vegas
Tokenizing: In Love With You
Tokenizing: Another Again
Tokenizing: Rollercoaster
Tokenizing: Shorty (Got Her Eyes On Me)
Tokenizing: Quickie
Tokenizing: Come Get to This
Tokenizing: These Are The Times
Tokenizing: Love
Tokenizing: Someone That Loves You
Tokenizing: The Way
Tokenizing: Mrs. Officer
Tokenizing: The Sweetest Gift
Tokenizing: Fly Like An Eagle
Tokenizing: Beauty
Tokenizing: Sumthin'

Tokenizing: Mind Games
Tokenizing: Forty Days
Tokenizing: Let It Burn
Tokenizing: Crushed
Tokenizing: What a Wicked Gang Are We
Tokenizing: "X"
Tokenizing: Never Been Hurt
Tokenizing: Hallelujah
Tokenizing: Last One Standing
Tokenizing: At My Best (feat. Hailee Steinfeld)
Tokenizing: The Antidote
Tokenizing: Chasing the Sun
Tokenizing: Flowers
Tokenizing: Has Anyone Ever Written Anything for You
Tokenizing: Never Get to Know
Tokenizing: Funeral
Tokenizing: It's Quiet Uptown
Tokenizing: Collide
Tokenizing: One Night Only
Tokenizing: Say Love
Tokenizing: Father, Spirit, Jesus
Tokenizing: Born Again
Tokenizing: Which Witch (demo)
Tokenizing: Blow Wind Blow
Tokenizing: Love Comes to Everyone
Tokenizing: To Our Unborn Daughters
Tokenizing: Bright
Tokenizing: There Is Nothing Like
Tokenizing: Awesome God
Tokenizing: B.O.B
Tokenizing: Out Loud
Tokenizing: Govinda Jai Jai
Tokenizing: Great Gig in the Sky
Tokenizing: Until The Morning (Rewound By Thievery Corporation)
Tokenizing: What Now
Token

Tokenizing: Amo Bishop Roden
Tokenizing: The Vowels Pt. 2
Tokenizing: Dream
Tokenizing: London Thunder
Tokenizing: Hurt Me
Tokenizing: Left Side Drive
Tokenizing: The Man Who Told Everything
Tokenizing: Pay No Mind
Tokenizing: Nightwalker
Tokenizing: Most of the Time
Tokenizing: Season Song
Tokenizing: Be the One
Tokenizing: Rise
Tokenizing: Heartbeats (Live)
Tokenizing: Let Me Be Him
Tokenizing: Maybes
Tokenizing: Cross Eyed Mary
Tokenizing: Loner
Tokenizing: Wind Up
Tokenizing: This Time
Tokenizing: Alan
Tokenizing: Learn
Tokenizing: Last Time
Tokenizing: Bedroom Floor
Tokenizing: Organs
Tokenizing: Who Is It (Carry My Joy on the Left, Carry My Pain on the Right)
Tokenizing: Sea Song
Tokenizing: Comatose
Tokenizing: The Spoils
Tokenizing: A Song For Jeffrey
Tokenizing: Little Dreamer
Tokenizing: Closer
Tokenizing: Music To Walk Home By
Tokenizing: The Way I Live
Tokenizing: Everything in Its Right Place
Tokenizing: I Might Be Wrong
Tokenizing: Feels Like We Only Go Backwards
Tokenizi

Tokenizing: Burning
Tokenizing: Lebanese Blonde
Tokenizing: Daughters
Tokenizing: Moondance
Tokenizing: It Ends Tonight
Tokenizing: Homesick
Tokenizing: One of These Things First
Tokenizing: I Could Die for You
Tokenizing: Bitch, Don’t Kill My Vibe
Tokenizing: Stable Song
Tokenizing: Electric Relaxation
Tokenizing: Pink Bullets
Tokenizing: I Miss You
Tokenizing: Like a Star
Tokenizing: Girl Inform Me
Tokenizing: Seven Years
Tokenizing: Beach Baby
Tokenizing: You Make It Easy
Tokenizing: No Such Thing
Tokenizing: Harvest Moon
Tokenizing: Jacksonville
Tokenizing: The Heart of Life
Tokenizing: Young Pilgrims
Tokenizing: Shoot the Moon
Tokenizing: Meadowlarks
Tokenizing: Ordinary People
Tokenizing: We Are Nowhere and It's Now
Tokenizing: Belief
Tokenizing: Orange Sky
Tokenizing: A Comet Appears
Tokenizing: Venus
Tokenizing: Road
Tokenizing: Why Georgia
Tokenizing: The Suburbs (Continued)
Tokenizing: Name
Tokenizing: Other Side of the World
Tokenizing: Make Love
Tokenizing: Blame It On The 

In [10]:
# Export to CSV
df_music = df_music.drop('tokenized_lyrics', axis=1)
df_music.to_csv("processed data/music_bert.csv")


Unnamed: 0.1,Unnamed: 0,track,artist,number_of_emotion_tags,valence_tags,arousal_tags,dominance_tags,spotify_id,artist_name,track_name,...,Dance,Country,Opera,Movie,Children's Music,A Capella,lyrics,seeds_array,sentiment,embedded_lyrics
0,0,'Till I Collapse,Eminem,6,4.550000,5.273125,5.690625,4xkOaSrkexMciUUogZKVTS,Eminem,'Till I Collapse,...,0,0,0,0,0,0,Lyrics of the song 'Till I Collapse by Eminem ...,['aggressive'],intense_aggressive,"[0.16638432, 0.03817972, 0.55852205, -0.190560..."
1,1,St. Anger,Metallica,8,3.710000,5.833000,5.427250,3fOc9x06lKJBhz435mInlH,Metallica,St. Anger,...,0,0,0,0,0,0,Lyrics of the song St. Anger by Metallica i ne...,['aggressive'],intense_aggressive,"[0.15030225, 0.24022837, 0.5402645, -0.1539074..."
2,2,Die MF Die,Dope,7,3.771176,5.348235,5.441765,5bU4KX47KqtDKKaLM4QCzh,Dope,Die MF Die,...,0,0,0,0,0,0,Lyrics of the song Die MF Die by Dope i don't ...,['aggressive'],intense_aggressive,"[0.44278228, 0.26310068, 0.38261956, -0.104149..."
3,3,Step Up,Drowning Pool,9,2.971389,5.537500,4.726389,4Q1w4Ryyi8KNxxaFlOQClK,Drowning Pool,Step Up,...,0,0,0,0,0,0,Lyrics of the song Step Up by Drowning Pool yo...,['aggressive'],intense_aggressive,"[0.17807148, -0.09108832, 0.4345166, -0.176492..."
4,4,Feedback,Kanye West,1,3.080000,5.870000,5.490000,49fT6owWuknekShh9utsjv,Kanye West,Feedback,...,0,0,0,0,0,0,Lyrics of the song Feedback by Kanye West even...,['aggressive'],intense_aggressive,"[0.37301844, 0.021944059, 0.40659544, -0.08707..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4971,6547,Wild Hope,Mandy Moore,8,5.422895,3.040737,5.013316,2WCQbmF3pZH5u2R0OaEXxY,Mandy Moore,Wild Hope,...,0,0,0,1,0,0,Lyrics of the song Wild Hope by Mandy Moore ev...,['quiet'],sad_melancholic,"[0.26744282, -0.14680435, 0.5437876, 0.0458194..."
4972,6548,A Life (1895 - 1915),Mark Hollis,4,4.927500,2.735000,4.297500,7dXjq4Wp7eEPsGVsOhRQDQ,Mark Hollis,A Life (1895 - 1915),...,0,0,0,0,0,0,Lyrics of the song A Life (1895 - 1915) by Mar...,['quiet'],sad_melancholic,"[0.10143524, 0.2869345, 0.3007569, -0.35665578..."
4973,6551,My Shit's Fucked Up,Warren Zevon,6,2.758333,3.813333,3.856667,26douMAqNELour6sKd2oR7,Warren Zevon,My Shit's Fucked Up,...,0,0,0,0,0,0,Lyrics of the song My Shit's Fucked Up by Warr...,['cynical'],dark_gritty,"[0.28827754, 0.123752534, 0.42537773, -0.09139..."
4974,6552,Do I Have To?,Pet Shop Boys,6,3.680274,3.877534,4.406781,5Knl7BTPCME3Kh5WxctWLU,Pet Shop Boys,What Have I Done To Deserve This? (With Dusty ...,...,1,0,0,0,0,0,Lyrics of the song Do I Have To? by Pet Shop B...,['cynical'],dark_gritty,"[0.26369944, 0.11783783, 0.28638732, -0.291826..."
