In [1]:
import pandas as pd
import os

# arousal = emotional intensity
# valence = positivity (higher is more positive)

files = [
    'arousal_cont_average.csv', # each row is a song and each column is the average arousal 15-45 seconds
    'arousal_cont_std.csv', #
    'songs_info.csv', # metadata = file, artist, title, genre
    'static_annotations.csv', # overall mean and std of arousal and valence # use as source of truth for each song
    'valence_cont_average.csv', # each row is a song and each column is the average valence 15-45 seconds
    'valence_cont_std.csv'
]

In [2]:
df = pd.read_csv(f'data/annotations/{files[3]}')
print(df.info())
display(df.head(3))

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 744 entries, 0 to 743
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   song_id       744 non-null    int64  
 1   mean_arousal  744 non-null    float64
 2   std_arousal   744 non-null    float64
 3   mean_valence  744 non-null    float64
 4   std_valence   744 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 29.2 KB
None


Unnamed: 0,song_id,mean_arousal,std_arousal,mean_valence,std_valence
0,2,3.1,0.99443,3.0,0.66667
1,3,3.5,1.8409,3.3,1.7029
2,4,5.7,1.4944,5.5,1.7159


In [3]:
def remove_missing_mp3():
    ls = list(range(1, 1001))
    song_truth = pd.read_csv(f'data/annotations/{files[3]}')
    exclude = set(ls) ^ set(song_truth['song_id'])

    for n in exclude:
        file = f"data/clips_45seconds/{n}.mp3"
        if os.path.exists(file):
            os.remove(file)

#remove_missing_mp3()

In [4]:
# TODO: convert mp3 to wav using pydub package
# REQUIRES FFMPEG

def wav_conversion():
    from pydub import AudioSegment
    song_truth = pd.read_csv(f'data/annotations/{files[3]}')

    for n in song_truth['song_id']:
        file = f"data/clips_45seconds/{n}.mp3"
        wav_output = f"data/wav45/{n}.wav"
        audio = AudioSegment.from_mp3(file)
        audio.export(wav_output, format="wav")

In [5]:
from audiologic.utils import transcribe_audio
import whisper

def lyricize():
    song_truth = pd.read_csv(f'data/annotations/{files[3]}')
    model = whisper.load_model('base')

    lyric_dict = {'song_id':[], 'lyrics':[]}
    for n in song_truth['song_id']:
        file = f"data/clips_45seconds/{n}.mp3"
        txt = transcribe_audio(file, preloaded_model=model)
        lyric_dict['song_id'].append(n)
        lyric_dict['lyrics'].append(txt)
    
    df = pd.DataFrame(lyric_dict)

    return df

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
df_lyric = lyricize()

df_lyric.head()

Unnamed: 0,song_id,lyrics
0,2,DecodingResult(audio_features=tensor([[-0.6818...
1,3,DecodingResult(audio_features=tensor([[-0.5704...
2,4,DecodingResult(audio_features=tensor([[-0.4866...
3,5,DecodingResult(audio_features=tensor([[-0.5512...
4,7,DecodingResult(audio_features=tensor([[-0.5190...


In [8]:
df_lyric['text'] = [i.text for i in df_lyric['lyrics']]
df_lyric.head()

Unnamed: 0,song_id,lyrics,text
0,2,DecodingResult(audio_features=tensor([[-0.6818...,Can't find their friends to make just mine Pro...
1,3,DecodingResult(audio_features=tensor([[-0.5704...,Music
2,4,DecodingResult(audio_features=tensor([[-0.4866...,Music
3,5,DecodingResult(audio_features=tensor([[-0.5512...,"I ought to leave my main, I ought to leave my ..."
4,7,DecodingResult(audio_features=tensor([[-0.5190...,Music


In [11]:
df_lyric.iloc[1]['lyrics']

DecodingResult(audio_features=tensor([[-0.5704, -0.1351,  0.7946,  ..., -0.1442,  1.1092,  0.2652],
        [-0.1802,  0.4886,  0.9549,  ...,  0.2980,  0.8849,  0.1513],
        [-0.2676,  1.4002,  0.8311,  ..., -0.0368,  0.7257,  0.0360],
        ...,
        [ 0.6215, -1.0229, -0.6634,  ..., -0.1429,  1.1305,  1.3437],
        [-0.4202,  0.0605, -0.2755,  ...,  0.2335,  0.6126,  0.7600],
        [-0.7873,  0.6563,  0.0455,  ..., -0.1563,  0.3479,  0.0976]]), language='en', language_probs=None, tokens=[50364, 7609, 50664], text='Music', avg_logprob=-1.5689871311187744, no_speech_prob=0.4768819510936737, temperature=0.0, compression_ratio=0.38461538461538464)

In [10]:
df_lyric.to_pickle('data/lyrics.pkl')
df_lyric.to_csv('data/lyrics.csv')