# **Spotify Analysis: Building a Simple Mood-Based Recommendation System**


In [17]:
pip install spotipy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [18]:
%run pip_installs.py
%run packages.py

Defaulting to user installation because normal site-packages is not writeable


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/varshis./nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt to /Users/varshis./nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     /Users/varshis./nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /Users/varshis./nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /Users/varshis./nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_eng to
[nltk_data]    |     /Users/varshis./nltk_data...
[nltk_data]    |   Package

In [19]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import requests
import base64
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import ipywidgets as widgets
from IPython.display import display, clear_output

In [20]:
import pandas as pd

def read_music_data_from_csv():
    # Read the CSV file into a DataFrame
    csv_file_path = 'processed_csv/music_df.csv'
    df = pd.read_csv(csv_file_path)

    return df

# Call the function to read the music data from the CSV and store it in a DataFrame
music_df = read_music_data_from_csv()


In [21]:
print(music_df.isnull().sum())

Spotify ID           21
Artist IDs           21
Track Name            0
Album Name            2
Artist Name(s)        2
Release Date         21
Duration (ms)         0
Popularity            0
Added By              0
Added At              0
Genres              322
Danceability         21
Energy               21
Key                  21
Loudness             21
Mode                 21
Speechiness          21
Acousticness         21
Instrumentalness     21
Liveness             21
Valence              21
Tempo                21
Time Signature       21
Playlist              0
Season                0
Lyrics_Clean          0
Words                 0
Sentiment_Label       0
Sentiment_Score       0
dtype: int64


In [22]:
music_df = music_df.fillna(0)  # or another suitable value

In [24]:
# Assuming music_df is already defined and includes 'Sentiment_Score' and 'Sentiment_Label'
data = music_df

# Function to calculate weighted popularity scores based on release date
def calculate_weighted_popularity(release_date):
    release_date = datetime.strptime(release_date, '%Y-%m-%d')
    time_span = datetime.now() - release_date
    weight = 1 / (time_span.days + 1)
    return weight

# Normalize the music features using Min-Max scaling
scaler = MinMaxScaler()
music_features = music_df[['Danceability', 'Energy', 'Key',
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo', 'Sentiment_Score']].values
music_features_scaled = scaler.fit_transform(music_features)

# Authenticate with Spotify API
credentials = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=credentials)

# Function to get song details from Spotify
def get_song_details(track_name, artist_name):
    query = f'track:{track_name} artist:{artist_name}'
    results = sp.search(q=query, type='track', limit=1)

    if results['tracks']['items']:
        track = results['tracks']['items'][0]
        return {
            'Song Name': track['name'],
            'Artist': track['artists'][0]['name'],
            'Spotify URL': track['external_urls']['spotify'],
            'Preview URL': track['preview_url'],
            'Album Art URL': track['album']['images'][0]['url'],
        }
    return None

# Function to get content-based recommendations based on sentiment label
def content_based_recommendations_by_sentiment(sentiment_label, num_recommendations=5):
    sentiment_filtered_df = music_df[music_df['Sentiment_Label'] == sentiment_label]

    if sentiment_filtered_df.empty:
        print(f"No songs found with sentiment label '{sentiment_label}'. Please enter a valid sentiment label.")
        return pd.DataFrame()  # Return an empty DataFrame

    sentiment_filtered_df = sentiment_filtered_df.sample(frac=1).reset_index(drop=True)
    sentiment_recommendations = sentiment_filtered_df[['Track Name', 'Artist Name(s)', 'Album Name', 'Release Date', 'Popularity', 'Sentiment_Label']].head(num_recommendations)
    
    return sentiment_recommendations

# Function to get hybrid recommendations based on weighted popularity for a sentiment label
def hybrid_recommendations_by_sentiment(sentiment_label, num_recommendations=5, alpha=0.5):
    content_based_rec = content_based_recommendations_by_sentiment(sentiment_label, num_recommendations)

    if content_based_rec.empty:
        return pd.DataFrame()  # Return an empty DataFrame

    content_based_rec['Weighted Popularity'] = content_based_rec['Release Date'].apply(calculate_weighted_popularity) * content_based_rec['Popularity']
    hybrid_rec = content_based_rec.sort_values(by='Weighted Popularity', ascending=False)

    detailed_recommendations = []
    for _, row in hybrid_rec.iterrows():
        song_details = get_song_details(row['Track Name'], row['Artist Name(s)'])
        if song_details:
            song_details['Sentiment'] = row['Sentiment_Label']
            song_details['Weighted Popularity'] = float(row['Weighted Popularity'])
            detailed_recommendations.append(song_details)

    return pd.DataFrame(detailed_recommendations)

# Function to handle dropdown selection and display recommendations
def on_sentiment_change(change):
    sentiment_label = change['new']
    clear_output(wait=True)  # Clear previous output
    display(sentiment_dropdown)  # Redisplay the dropdown

    recommendations = hybrid_recommendations_by_sentiment(sentiment_label)
    
    if not recommendations.empty:
        display(recommendations)
    else:
        print(f"No recommendations found for sentiment: {sentiment_label}")

# Create a dropdown for sentiments
sentiment_labels = music_df['Sentiment_Label'].unique().tolist()
sentiment_dropdown = widgets.Dropdown(
    options=sentiment_labels,
    description='Sentiment:',
    value=sentiment_labels[0]  # Default value
)

# Attach the handler to the dropdown
sentiment_dropdown.observe(on_sentiment_change, names='value')

# Display the dropdown
display(sentiment_dropdown)

# Initial display of recommendations for the default value
on_sentiment_change({'new': sentiment_labels[0]})


Dropdown(description='Sentiment:', index=9, options=('neutral', 'love', 'amusement', 'admiration', 'sadness', …

Unnamed: 0,Song Name,Artist,Spotify URL,Preview URL,Album Art URL,Sentiment,Weighted Popularity
0,All Ur Luv,Wavedash,https://open.spotify.com/track/3K1RnBxBkrMwoBE...,,https://i.scdn.co/image/ab67616d0000b273a85915...,optimism,0.18
1,Me Up (feat. Chateau),Chocolate Puma,https://open.spotify.com/track/4FYiIggovSUI7BI...,https://p.scdn.co/mp3-preview/556e9caabee4cef5...,https://i.scdn.co/image/ab67616d0000b273fd2b58...,optimism,0.02
2,Gotta Go My Own Way,Zac Efron,https://open.spotify.com/track/5rWIO1uP0GwAoU4...,,https://i.scdn.co/image/ab67616d0000b273342a94...,optimism,0.01
3,Jump into the Fog,The Wombats,https://open.spotify.com/track/3l8dM1wjgFh98jp...,https://p.scdn.co/mp3-preview/8676059923114735...,https://i.scdn.co/image/ab67616d0000b273ee47a7...,optimism,0.01
4,Find You There,We The Kings,https://open.spotify.com/track/7FVA0PjjYqjYR7z...,https://p.scdn.co/mp3-preview/899aef1288158dff...,https://i.scdn.co/image/ab67616d0000b273734dbf...,optimism,0.0


In [26]:
import json 
import os

# Function to get recommendations for all sentiments and output as JSON
def get_all_sentiment_recommendations():
    all_recommendations = {}
    sentiment_labels = music_df['Sentiment_Label'].unique().tolist()

    for sentiment in sentiment_labels:
        recommendations = hybrid_recommendations_by_sentiment(sentiment)
        # Convert DataFrame to list of dictionaries before adding to JSON
        all_recommendations[sentiment] = recommendations.to_dict(orient='records')

    # Create 'deliverables' folder if it doesn't exist
    deliverables_folder = os.path.join(os.getcwd(), 'deliverables')
    os.makedirs(deliverables_folder, exist_ok=True)

    # Save recommendations to 'recommendations.json' in the 'deliverables' folder
    file_path = os.path.join(deliverables_folder, 'recommendations.json')
    with open(file_path, 'w') as f:
        json.dump(all_recommendations, f, indent=2)

# Call the function to generate and save recommendations
get_all_sentiment_recommendations()


---