# Music Recommendation System - Demo Notebook

This notebook demonstrates how to use the Music Recommendation System to get personalized song recommendations.

## Overview

The system uses **collaborative filtering** with k-nearest neighbors (k-NN) to recommend songs based on:
- **User-based recommendations**: Find users with similar music tastes and recommend songs they like
- **Item-based recommendations**: Find songs similar to a given track based on user listening patterns


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Import our modules
from data_loader import load_and_clean_data
from preprocess import build_user_item_matrix
from model import MusicRecommender
from recommend import RecommendationEngine

print("‚úÖ All imports successful!")


## Step 1: Load and Explore Data

First, let's load the music data and explore its structure.


In [None]:
# Load and clean the data
# Replace 'data/music_data.csv' with your actual data file path
data_path = "data/music_data.csv"

try:
    df = load_and_clean_data(data_path, min_interactions=1)
    print(f"‚úÖ Loaded {len(df)} interactions")
    print(f"\nData shape: {df.shape}")
    print(f"\nColumns: {list(df.columns)}")
    print(f"\nFirst few rows:")
    print(df.head())
except FileNotFoundError:
    print("‚ùå Data file not found. Please ensure 'data/music_data.csv' exists.")
    print("\nCreating sample data for demonstration...")
    
    # Create sample data
    np.random.seed(42)
    n_users = 100
    n_tracks = 50
    n_interactions = 500
    
    sample_data = {
        'user_id': [f'user_{np.random.randint(1, n_users+1)}' for _ in range(n_interactions)],
        'track_id': [f'track_{np.random.randint(1, n_tracks+1)}' for _ in range(n_interactions)],
        'rating': np.random.randint(1, 6, n_interactions),
        'song_name': [f'Song {i}' for i in range(n_interactions)],
        'artist_name': [f'Artist {np.random.randint(1, 20)}' for _ in range(n_interactions)]
    }
    
    df = pd.DataFrame(sample_data)
    df = df.drop_duplicates(subset=['user_id', 'track_id'], keep='first')
    print(f"‚úÖ Created sample data with {len(df)} interactions")
    print(df.head())


In [None]:
# Explore data statistics
print("üìä Data Statistics:")
print(f"  Unique Users: {df['user_id'].nunique()}")
print(f"  Unique Tracks: {df['track_id'].nunique()}")
print(f"  Total Interactions: {len(df)}")
print(f"  Average interactions per user: {len(df) / df['user_id'].nunique():.2f}")
print(f"  Average interactions per track: {len(df) / df['track_id'].nunique():.2f}")

if 'rating' in df.columns:
    print(f"\n  Rating Statistics:")
    print(f"    Min: {df['rating'].min()}")
    print(f"    Max: {df['rating'].max()}")
    print(f"    Mean: {df['rating'].mean():.2f}")
    print(f"    Median: {df['rating'].median():.2f}")


## Step 2: Build User-Item Matrix

Convert the interaction data into a sparse matrix format for efficient computation.


In [None]:
# Build user-item matrix
user_item_matrix, user_to_idx, track_to_idx, metadata = build_user_item_matrix(df)

print(f"‚úÖ User-Item Matrix created!")
print(f"  Matrix shape: {user_item_matrix.shape} (users √ó tracks)")
print(f"  Non-zero entries: {user_item_matrix.nnz}")
print(f"  Sparsity: {(1 - user_item_matrix.nnz / (user_item_matrix.shape[0] * user_item_matrix.shape[1])) * 100:.2f}%")
print(f"\n  User mapping: {len(user_to_idx)} users")
print(f"  Track mapping: {len(track_to_idx)} tracks")


## Step 3: Train the Recommendation Model

Train a k-NN model using cosine similarity to find similar users and tracks.


In [None]:
# Initialize and train the recommender
recommender = MusicRecommender(n_neighbors=10, metric='cosine')
recommender.fit(user_item_matrix, user_to_idx, track_to_idx, metadata)

print("‚úÖ Model trained successfully!")
print(f"  Using {recommender.n_neighbors} nearest neighbors")
print(f"  Similarity metric: {recommender.metric}")


## Step 4: Get User-Based Recommendations

Recommend songs for a specific user based on similar users' preferences.


In [None]:
# Get recommendations for a user
test_user = list(user_to_idx.keys())[0]  # Use first user as example
print(f"Getting recommendations for user: {test_user}")

recommendations = recommender.recommend_for_user(test_user, n_recommendations=10)

if recommendations:
    print(f"\n‚úÖ Found {len(recommendations)} recommendations:")
    print("\nTop Recommendations:")
    for i, (track_id, score) in enumerate(recommendations, 1):
        # Try to get track metadata
        track_info = ""
        if metadata is not None and not metadata.empty and track_id in metadata.index:
            if 'song_name' in metadata.columns:
                track_info = f" - {metadata.loc[track_id, 'song_name']}"
            if 'artist_name' in metadata.columns:
                track_info += f" by {metadata.loc[track_id, 'artist_name']}"
        print(f"  {i}. {track_id}{track_info} (similarity: {score:.4f})")
else:
    print("‚ùå No recommendations found for this user.")


## Step 5: Get Item-Based Recommendations

Find tracks similar to a given track based on user listening patterns.


In [None]:
# Get similar tracks for a given track
test_track = list(track_to_idx.keys())[0]  # Use first track as example
print(f"Finding tracks similar to: {test_track}")

# Show track info if available
if metadata is not None and not metadata.empty and test_track in metadata.index:
    if 'song_name' in metadata.columns:
        print(f"  Song: {metadata.loc[test_track, 'song_name']}")
    if 'artist_name' in metadata.columns:
        print(f"  Artist: {metadata.loc[test_track, 'artist_name']}")

similar_tracks = recommender.recommend_similar_tracks(test_track, n_recommendations=10)

if similar_tracks:
    print(f"\n‚úÖ Found {len(similar_tracks)} similar tracks:")
    print("\nSimilar Tracks:")
    for i, (track_id, score) in enumerate(similar_tracks, 1):
        track_info = ""
        if metadata is not None and not metadata.empty and track_id in metadata.index:
            if 'song_name' in metadata.columns:
                track_info = f" - {metadata.loc[track_id, 'song_name']}"
            if 'artist_name' in metadata.columns:
                track_info += f" by {metadata.loc[track_id, 'artist_name']}"
        print(f"  {i}. {track_id}{track_info} (similarity: {score:.4f})")
else:
    print("‚ùå No similar tracks found.")


## Step 6: Using the High-Level Recommendation Engine

For easier usage, use the `RecommendationEngine` class that handles everything automatically.


In [None]:
# Initialize the recommendation engine
# This automatically loads data, preprocesses, and trains the model
try:
    engine = RecommendationEngine(data_path, n_neighbors=10, min_interactions=1)
    
    # Get system statistics
    stats = engine.get_user_stats()
    print("üìä System Statistics:")
    for key, value in stats.items():
        print(f"  {key}: {value}")
    
    # Get recommendations using the engine
    if len(engine.user_map) > 0:
        example_user = list(engine.user_map.keys())[0]
        print(f"\nüéµ Recommendations for user '{example_user}':")
        user_recs = engine.recommend_for_user(example_user, n_recommendations=5)
        print(user_recs.to_string(index=False))
        
except FileNotFoundError:
    print("‚ö†Ô∏è  Using sample data from previous steps...")
    # Create engine from existing data
    engine = RecommendationEngine.__new__(RecommendationEngine)
    engine.df = df
    engine.matrix = user_item_matrix
    engine.user_map = user_to_idx
    engine.track_map = track_to_idx
    engine.metadata = metadata
    engine.recommender = recommender
    
    # Get recommendations
    example_user = list(user_to_idx.keys())[0]
    print(f"\nüéµ Recommendations for user '{example_user}':")
    user_recs = engine.recommend_for_user(example_user, n_recommendations=5)
    print(user_recs.to_string(index=False))


## Step 7: Batch Recommendations

Get recommendations for multiple users at once.


In [None]:
# Get recommendations for multiple users
sample_users = list(user_to_idx.keys())[:5]  # First 5 users

all_recommendations = {}
for user_id in sample_users:
    recs = recommender.recommend_for_user(user_id, n_recommendations=5)
    all_recommendations[user_id] = recs

print("üìã Batch Recommendations:")
for user_id, recs in all_recommendations.items():
    print(f"\n  User: {user_id}")
    if recs:
        print(f"    Top 3: {[track_id for track_id, _ in recs[:3]]}")
    else:
        print("    No recommendations available")


## Summary

This notebook demonstrated:

1. ‚úÖ Loading and cleaning music interaction data
2. ‚úÖ Building a sparse user-item matrix
3. ‚úÖ Training a k-NN collaborative filtering model
4. ‚úÖ Getting user-based recommendations
5. ‚úÖ Getting item-based (similar tracks) recommendations
6. ‚úÖ Using the high-level RecommendationEngine interface

### Next Steps

- Try the Streamlit web app: `streamlit run app.py`
- Experiment with different `n_neighbors` values
- Try different similarity metrics (cosine, euclidean, etc.)
- Use your own music data!

### Tips

- **More data = Better recommendations**: The more user interactions you have, the better the recommendations
- **Tune k**: Experiment with different values of `n_neighbors` to find the best balance
- **Handle cold start**: New users or tracks with no interactions won't get recommendations - consider hybrid approaches
