In [80]:
import pandas as pd
import numpy as np

In [81]:
df=pd.read_csv('../Data/Spotify_with_genre_clustered.csv')

In [82]:
df.head()

Unnamed: 0,artist_name,track_name,artist_pop,popularity,genre,track_uri,artist_uri,year,cluster
0,34,Preachin' To The Choir,0.37,0.0,1,6JKj9seJCshLVCfBLFp7dS,55RI2GNCfyXr0f14uIdhwd,2017,0
1,2911,You Work Days I Work Nights,0.29,0.108696,2,7L7u4PrNt5WMtVI9lHWlLm,6k3UpifDbb2ox25edM5j3P,2012,0
2,485,Sera,0.44,0.336957,1,12EyOqMPMyAKlKSynICAGN,2qhLqZ1pkiUl5HNrU2Nz0R,2007,0
3,1268,Big Shit Poppin,0.69,0.206522,1,3tCDjKseLQJhShfbeg4m7K,4yBK75WVCQXej1p04GWqxH,2007,0
4,1525,No La Voy A Engañar,0.74,0.586957,1,0UICwSsSpUt98XkeCfupFn,5bSfBBCxY8QAk4Pifveisz,2012,0


In [83]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.client import SpotifyException

In [84]:
client_id = '6444952d25014134affbddb6854f27c7'
client_secret = '49c9c42e4b54426785ae5856b45b719e'
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [85]:
# Function to fetch track info
def get_track_info(track_name):
    results = sp.search(q=track_name, limit=1)
    if results['tracks']['items']:
        track = results['tracks']['items'][0]
        track_uri = track['uri'].split(':')[-1]  # Extracting the track URI
        artist_name = track['artists'][0]['name']  # Extracting the artist name
        release_date = track['album']['release_date'][:4]  # Extracting the year of release
        popularity = track['popularity']  # Track popularity
        artist_pop = sp.artist(track['artists'][0]['id'])['popularity']  # Artist popularity
        return track_uri, artist_name, release_date, popularity, artist_pop
    else:
        return None, None, None, None, None

In [86]:
# Function to fetch specific audio features
def get_audio_features(track_url):
    audio_features = sp.audio_features([track_url])
    if audio_features:
        # Extracting only desired audio features
        selected_features = {
            'danceability': audio_features[0]['danceability'],
            'energy': audio_features[0]['energy'],
            'key': audio_features[0]['key'],
            'loudness': audio_features[0]['loudness'],
            'mode': audio_features[0]['mode'],
            'speechiness': audio_features[0]['speechiness'],
            'acousticness': audio_features[0]['acousticness'],
            'instrumentalness': audio_features[0]['instrumentalness'],
            'liveness': audio_features[0]['liveness'],
            'valence': audio_features[0]['valence'],
            'tempo': audio_features[0]['tempo']
        }
        return selected_features
    else:
        return None

In [87]:
# Example usage
track_name = input("Enter the track name: ")
track_uri, artist_name, release_date, popularity, artist_pop = get_track_info(track_name)
if track_uri:
    print("Track URL:", track_uri)
    print("Artist:", artist_name)
    print("Release Year:", release_date)
    print("Track Popularity:", popularity)
    print("Artist Popularity:", artist_pop)
    audio_features = get_audio_features(track_uri)
    if audio_features:
        print("Audio Features:", audio_features)
    else:
        print("Failed to fetch audio features.")
else:
    print("Track not found.")

Track URL: 2LKOHdMsL0K9KwcPRlJK2v
Artist: Mr.Kitty
Release Year: 2014
Track Popularity: 83
Artist Popularity: 65
Audio Features: {'danceability': 0.585, 'energy': 0.595, 'key': 8, 'loudness': -10.444, 'mode': 1, 'speechiness': 0.0328, 'acousticness': 0.0696, 'instrumentalness': 0.266, 'liveness': 0.0837, 'valence': 0.039, 'tempo': 140.037}


In [88]:
import pickle

In [89]:
# Load scaler and XGBoost model
base_path = '../Models/'
with open(base_path+'scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

with open(base_path+'xgb.pkl', 'rb') as f:
    xgb_model = pickle.load(f)

In [90]:
pd.DataFrame(audio_features, index=[0]).head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.585,0.595,8,-10.444,1,0.0328,0.0696,0.266,0.0837,0.039,140.037


In [91]:
# Function to preprocess input features using scaler
def preprocess_input(audio_features):
    # Convert audio features to numpy array
    # features_array = np.array(list(audio_features.values())).reshape(1, -1)
    scaled_features = scaler.transform(pd.DataFrame(audio_features, index=[0]))
    return scaled_features

In [92]:
# Function to make prediction using XGBoost model
def predict(features):
    return xgb_model.predict(features)[0]

In [93]:
# Preprocess input features
scaled_features = preprocess_input(audio_features)

In [94]:
# Make prediction
genre = predict(scaled_features)

In [95]:


with open(base_path+'encoder.pkl', 'rb') as f:
    genre_map = pickle.load(f)
print("Predicted Genre:", genre_map.inverse_transform([int(genre)])[0])

Predicted Genre: Rap


In [96]:
with open(base_path+'label_encoder.pkl', 'rb') as f:
    artist_name_map = pickle.load(f)
    
artists = df['artist_name'].unique()

print(artist_name_map.inverse_transform(artists))

['A Thousand Horses' 'Water Liars' 'Chambao' ... 'Steve Angello'
 'Sebastian Ingrosso' 'Man Man']


In [97]:
# Create DataFrame
data_df = pd.DataFrame({
    "artist_pop": [artist_pop],
    "popularity": [popularity],
    "year": [release_date],
    "genre": [genre],
    # "artist_name": [artist_name_map.inverse_transform([artist_name])[0]]
})

In [98]:
# Load the model
with open(base_path+'birch.pkl', 'rb') as f:
    birch_model = pickle.load(f)

In [105]:
# Use the model to predict clusters
cluster = birch_model.predict(data_df)
print("Birch Clusters:", cluster)

Birch Clusters: [0]


In [108]:
#recommendation system
from sklearn.metrics.pairwise import cosine_similarity

#find nearest neighbors using cosine similarity
def find_nearest_neighbors(data, index, n_neighbors=5):
    similarities = cosine_similarity(data, data[index:index+1]).flatten()
    similar_indices = similarities.argsort()[-n_neighbors-1:-1]
    return similar_indices

# Function to recommend tracks
def recommend_tracks(data, cluster, n_recommendations=5):
    # Filter data for the same cluster
    cluster_data = data[data['cluster'] == cluster]
    # Find nearest neighbors
    similar_indices = find_nearest_neighbors(cluster_data.drop('cluster', axis=1), 0, n_recommendations)
    # Return recommendations
    return cluster_data.iloc[similar_indices]

# Example usage
recommendations = recommend_tracks(df, cluster[0])
print("Recommendations for the track:")
print(recommendations[['track_name', 'artist_name', 'popularity', 'genre']])







ValueError: could not convert string to float: "Preachin' To The Choir"