In [2]:
import requests
import base64

CLIENT_ID = "f9761c2fec534cb1a5cff8397d9884e6"
CLIENT_SECRET = "d7219c2ed840468e87d0bc21a0101ec6"

client_credentials = f"{CLIENT_ID}:{CLIENT_SECRET}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

token_url = "https://accounts.spotify.com/api/token"
headers = {
    "Authorization" : f"Basic {client_credentials_base64.decode()}"
}
data = {
    "grant_type" : "client_credentials"
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()["access_token"]
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token")
    exit()

Access token obtained successfully.


In [2]:
pip install spotipy

Collecting spotipy
  Downloading spotipy-2.24.0-py3-none-any.whl.metadata (4.9 kB)
Collecting redis>=3.5.3 (from spotipy)
  Downloading redis-5.2.0-py3-none-any.whl.metadata (9.1 kB)
Downloading spotipy-2.24.0-py3-none-any.whl (30 kB)
Downloading redis-5.2.0-py3-none-any.whl (261 kB)
   ---------------------------------------- 0.0/261.4 kB ? eta -:--:--
   ----------------- ---------------------- 112.6/261.4 kB 3.2 MB/s eta 0:00:01
   ---------------------------------------  256.0/261.4 kB 3.9 MB/s eta 0:00:01
   ---------------------------------------- 261.4/261.4 kB 3.2 MB/s eta 0:00:00
Installing collected packages: redis, spotipy
Successfully installed redis-5.2.0 spotipy-2.24.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def get_trending_playlist_data(playlist_ids, access_token):
    sp = spotipy.Spotify(auth=access_token)
    music_data = []
    for playlist_id in playlist_ids:
        print(f"Fetching data from playlist: {playlist_id}")
        playlist_tracks = sp.playlist_tracks(playlist_id, fields="items(track(id, name, artists, album(id, name)))")
        for track_info in playlist_tracks["items"]:
            track = track_info["track"]
            track_name = track["name"]
            artists = ", ".join([artist["name"] for artist in track["artists"]])
            album_name = track["album"]["name"]
            album_id = track["album"]["id"]
            track_id = track["id"]
    
            audio_features = sp.audio_features(track_id)[0] if track_id != "Not available" else None
            try:
                album_info = sp.album(album_id) if album_id != "Not available" else None
                release_date = album_info["release_date"] if album_info else None
            except:
                release_date = None
            try:
                track_info = sp.track(track_id) if track_id != "Not available" else None
                popularity = track_info["popularity"] if track_info else None
            except:
                popularity = None

            track_data = {
                "Track Name" : track_name,
                "Artist" : artists,
                "Album Name" : album_name,
                "Album ID" : album_id,
                "Track ID" : track_id,
                "Popularity" : popularity,
                "Release Date" : release_date,
                "Duration (ms) " : audio_features["duration_ms"] if audio_features else None,
                "Explicit" : track_info.get("explicit",None),
                "External URLs" : track_info.get("external_urls",{}).get("spotify",None),
                "Danceability" : audio_features["danceability"] if audio_features else None,
                "Energy" : audio_features["energy"] if audio_features else None,
                "Key" : audio_features["key"] if audio_features else None,
                "Loudness" : audio_features["loudness"] if audio_features else None,
                "Mode" : audio_features["mode"] if audio_features else None,
                "Speechiness" : audio_features["speechiness"] if audio_features else None,
                "Acousticness" : audio_features["acousticness"] if audio_features else None,
                "Instrumentalness" : audio_features["instrumentalness"] if audio_features else None,
                "Liveness" : audio_features["liveness"] if audio_features else None,
                "Valence" : audio_features["valence"] if audio_features else None,
                "Tempo" : audio_features["tempo"] if audio_features else None,
            }
            music_data.append(track_data)
    df = pd.DataFrame(music_data)
    return df

In [4]:
playlist_ids = [
    "37i9dQZF1DX76Wlfdnj7AP",  # Add more playlist IDs here
    "37i9dQZF1DXcBWIGoYBM5M",  # Example additional playlist
    "37i9dQZF1DX4JAvHpjipBk"   # Another playlistL
]
music_df = get_trending_playlist_data(playlist_ids, access_token)
print(f"Total unique songs fetched: {len(music_df)}")
print(music_df["Track Name"])
music_df.to_csv("music_data.csv", index=False)


Fetching data from playlist: 37i9dQZF1DX76Wlfdnj7AP
Fetching data from playlist: 37i9dQZF1DXcBWIGoYBM5M
Fetching data from playlist: 37i9dQZF1DX4JAvHpjipBk
Total unique songs fetched: 250
0                               APT.
1              The Emptiness Machine
2                        Not Like Us
3                        Rah Tah Tah
4                            Disease
                   ...              
245                       Good to Me
246                     APOLOGY SONG
247                       Recorrerte
248    Benefits (feat. Tee Grizzley)
249                        BLUEBERRY
Name: Track Name, Length: 250, dtype: object


In [7]:
print(music_df.isnull().sum())

Track Name          0
Artist              0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

data = pd.read_csv("music_data.csv")

In [6]:
def calculate_weighted_popularity(release_date):
    release_date = datetime.strptime(release_date, "%Y-%m-%d")
    time_span = datetime.now() - release_date
    weight = 1 / (time_span.days +1 )
    return weight

In [7]:
scaler = MinMaxScaler()
music_features = music_df[["Danceability","Energy","Key","Loudness","Mode","Speechiness","Acousticness","Instrumentalness","Liveness","Valence","Tempo"]].values
music_features_scaled = scaler.fit_transform(music_features)

In [8]:
def content_based_recommendations(input_song_name, num_recommendations=5):
    if input_song_name not in music_df["Track Name"].values:
        print(f"'{input_song_name}' not found in dataset. Please enter a valid song name.")
        return
    input_song_index = music_df[music_df["Track Name"] == input_song_name].index[0]
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations + 1]
    content_based_recommendations = music_df.iloc[similar_song_indices][["Track Name", "Artist", "Album Name", "Release Date", "Popularity"]]
    return content_based_recommendations

In [9]:
import pandas as pd

def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df["Track Name"].values:
         print(f"'{input_song_name}' not found in dataset. Please enter a valid song name.")
         return
    content_based_rec = content_based_recommendations(input_song_name, num_recommendations)
    popularity_score = music_df.loc[music_df["Track Name"] == input_song_name, "Popularity"].values[0]
    weighted_popularity_score = popularity_score * calculate_weighted_popularity(
        music_df.loc[music_df["Track Name"] == input_song_name, "Release Date"].values[0]
    )
    new_entry = pd.DataFrame({
        "Track Name" : [input_song_name],
        "Artist" : [music_df.loc[music_df["Track Name"] == input_song_name, "Artist"].values[0]],
        "Album Name" : [music_df.loc[music_df["Track Name"] == input_song_name, "Album Name"].values[0]],
        "Release Date" : [music_df.loc[music_df["Track Name"] == input_song_name, "Release Date"].values[0]],
        "Popularity" : [weighted_popularity_score]
    })
    hybrid_recommendations = pd.concat([content_based_rec, new_entry], ignore_index=True)
    hybrid_recommendations = hybrid_recommendations.sort_values(by="Popularity", ascending=False)
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations["Track Name"] != input_song_name]
    return hybrid_recommendations

In [None]:
input_song_name = input(print("enter song"))
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}' :")
print(recommendations)

enter song
