<a href="https://colab.research.google.com/github/vsswethaa/Projects/blob/main/Spotify.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import LabelEncoder
import numpy as np

# Step 1: Load Dataset
file_path = "/content/generated_music_dataset.csv"
df = pd.read_csv(file_path)

# Step 2: Preprocess Data
df_filtered = df[['user_id', 'song', 'listen_count', 'liked', 'added_to_playlist']].copy()

# Step 3: Create an interaction score
df_filtered['interaction_score'] = df_filtered['listen_count'] + df_filtered['liked'] * 3 + df_filtered['added_to_playlist'] * 2

# Step 4: Encode Categorical Variables
user_encoder = LabelEncoder()
song_encoder = LabelEncoder()

df_filtered['user_encoded'] = user_encoder.fit_transform(df_filtered['user_id'])
df_filtered['song_encoded'] = song_encoder.fit_transform(df_filtered['song'])

# Step 5: Create User-Song Interaction Matrix
interaction_matrix = df_filtered.pivot_table(index='user_encoded', columns='song_encoded', values='interaction_score', fill_value=0)

# Step 6: Train SVD Model
svd = TruncatedSVD(n_components=50, random_state=42)
user_factors = svd.fit_transform(interaction_matrix)
song_factors = svd.components_

# Step 7: Define Recommendation Function
def recommend_songs(user_id, n=3):
    """ Recommend top N songs for a given user_id """
    if user_id not in user_encoder.classes_:
        return []  # If the user is new, we can't recommend based on past data

    user_idx = user_encoder.transform([user_id])[0]
    scores = np.dot(user_factors[user_idx], song_factors)

    # Get top N song indices
    top_song_indices = np.argsort(scores)[::-1][:n]

    # Decode song names
    top_songs = song_encoder.inverse_transform(top_song_indices)

    return list(top_songs)

# Step 8: Get Recommendations for a User
user_id_example = 114
top_3_songs = recommend_songs(user_id_example)

# Output the recommended songs
print(f"Top 3 Recommended Songs for User {user_id_example}:")
for idx, song in enumerate(top_3_songs, 1):
    print(f"{idx}. {song}")


Top 3 Recommended Songs for User 114:
1. In the End
2. Hound Dog
3. Uptown Funk


In [10]:
from sklearn.metrics import precision_score

def precision_at_k(user_id, k=3):
    """Calculates Precision@K for a given user_id"""
    if user_id not in user_encoder.classes_:
        return 0  # No recommendations possible

    actual_songs = df_filtered[df_filtered['user_id'] == user_id]['song'].tolist()
    recommended_songs = recommend_songs(user_id, k)

    if not recommended_songs:
        return 0  # No recommendations available

    relevant_count = sum(1 for song in recommended_songs if song in actual_songs)
    return relevant_count / k  # Precision = Relevant Recommendations / K

# Example usage
user_id_example = 113
precision = precision_at_k(user_id_example, k=3)
print(f"Precision@3: {precision:.2f}")


Precision@3: 1.00


In [11]:
def recall_at_k(user_id, k=3):
    """Calculates Recall@K for a given user_id"""
    if user_id not in user_encoder.classes_:
        return 0  # No recommendations possible

    actual_songs = df_filtered[df_filtered['user_id'] == user_id]['song'].tolist()
    recommended_songs = recommend_songs(user_id, k)

    if not actual_songs:
        return 0  # No ground truth available

    relevant_count = sum(1 for song in recommended_songs if song in actual_songs)
    return relevant_count / len(actual_songs)

# Example usage
recall = recall_at_k(user_id_example, k=3)
print(f"Recall@3: {recall:.2f}")


Recall@3: 0.30


In [12]:
def dcg_at_k(scores, k):
    """Computes Discounted Cumulative Gain (DCG)"""
    return sum((score / np.log2(idx + 2)) for idx, score in enumerate(scores[:k]))

def ndcg_at_k(user_id, k=3):
    """Computes Normalized DCG@K"""
    if user_id not in user_encoder.classes_:
        return 0  # No recommendations possible

    actual_songs = df_filtered[df_filtered['user_id'] == user_id]['song'].tolist()
    recommended_songs = recommend_songs(user_id, k)

    relevance = [1 if song in actual_songs else 0 for song in recommended_songs]
    ideal_relevance = sorted(relevance, reverse=True)  # Best possible ranking

    dcg = dcg_at_k(relevance, k)
    idcg = dcg_at_k(ideal_relevance, k)

    return dcg / idcg if idcg > 0 else 0  # Normalize

# Example usage
ndcg = ndcg_at_k(user_id_example, k=3)
print(f"NDCG@3: {ndcg:.2f}")


NDCG@3: 1.00
