# Primavera Sound Discovery: Generate a playlist comprised of the most popular songs from the artists in the lineup

In [1]:
# pip install -r requirements.txt

In [1]:
import os
import sys
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials, SpotifyOAuth
import pandas as pd
from time import sleep

## Define Functions

### Credentials Management

In [2]:
def load_spotify_credentials():
    """Load Spotify client ID and secret from various possible sources"""
    # First try environment variables
    client_id = os.environ.get('SPOTIFY_CLIENT_ID')
    client_secret = os.environ.get('SPOTIFY_CLIENT_SECRET')
    
    if client_id and client_secret:
        return client_id, client_secret
    
    # Try to find the secrets folder in current or parent directory
    possible_paths = [
        'secrets/spotify_credentials.json',
        '../secrets/spotify_credentials.json',
        'spotify_credentials.json'
    ]
    
    # Try JSON file first
    for json_path in possible_paths:
        if os.path.exists(json_path):
            try:
                with open(json_path, 'r') as f:
                    creds = json.load(f)
                    return creds.get('client_id'), creds.get('client_secret')
            except Exception as e:
                print(f"Error loading JSON credentials from {json_path}: {e}")
    
    # Try individual text files
    for base_path in ['secrets', '../secrets', '.']:
        client_id_path = os.path.join(base_path, 'spotify_client_id.txt')
        client_secret_path = os.path.join(base_path, 'spotify_client_secret.txt')
        
        if os.path.exists(client_id_path) and os.path.exists(client_secret_path):
            try:
                with open(client_id_path, 'r') as f:
                    client_id = f.read().strip()
                with open(client_secret_path, 'r') as f:
                    client_secret = f.read().strip()
                return client_id, client_secret
            except Exception as e:
                print(f"Error loading credentials from text files in {base_path}: {e}")
    
    return None, None

def get_spotify_client_with_auth_flow():
    """Create and return a Spotify client using Authorization Code flow"""
    client_id, client_secret = load_spotify_credentials()
    
    if not client_id or not client_secret:
        print("Error: Spotify API credentials not found.")
        return None
    
    # Define redirect URI (must match what you set in Spotify dashboard)
    redirect_uri = "http://127.0.0.1:8888/callback"
    
    # Define the scopes needed
    scope = "user-library-read playlist-read-private"
    
    # Create a SpotifyOAuth instance
    sp_oauth = SpotifyOAuth(
        client_id=client_id,
        client_secret=client_secret,
        redirect_uri=redirect_uri,
        scope=scope,
        cache_path=".spotify_cache"  # Cache the token
    )
    
    # Get the cached token
    token_info = sp_oauth.get_cached_token()
    
    # If no token or token has expired, get a new one
    if not token_info or sp_oauth.is_token_expired(token_info):
        print("No token found or token expired. Please authorize Spotify access.")
        auth_url = sp_oauth.get_authorize_url()
        print(f"Please navigate to this URL in your browser: {auth_url}")
        
        # Get the code from the response URL
        response = input("Enter the full URL you were redirected to: ")
        code = sp_oauth.parse_response_code(response)
        
        # Exchange the code for a token
        token_info = sp_oauth.get_access_token(code)
    
    # Create the Spotify client with the token
    return spotipy.Spotify(auth=token_info['access_token'])

### Primavera Playlist

In [3]:
# Configuration - artists lists
main_artists = [
    "Charli XCX", "Chappell Roan", "Sabrina Carpenter", "LCD Soundsystem", "Sturgill Simpson",
    "HAIM", "FKA twigs", "TV on the Radio", "Clairo", "Fontaines D.C.", "IDLES", "Turnstile",
    "Spiritualized", "Stereolab", "Beach House", "Denzel Curry", "Jamie xx", "ANOHNI and the Johnsons",
    "The Mars Volta", "Kim Deal", "Waxahatchee", "MJ Lenderman", "Wet Leg", "Beabadoobee",
    "Black Country, New Road", "The Jesus Lizard", "Amelie Lens", "Caribou", "Cat Power",
    "Destroyer", "Cap'n Jazz", "Aminé", "Brutalismus 3000", "Hinds", "Squid", "Alan Sparhawk",
    "Floating Points", "Michael Bibi", "Magdalena Bay", "Feeble Little Horse", "Julie",
    "Christian Lee Hutson", "Parcels", "Carolina Durante", "Amaia", "Young Beef", "Armand Van Helden"
]

ciutat_artists_sold_out = [
    "Dummy", "underscores (2nd of June)", "Christopher Owens", "Huir", "Nilüfer Yanya", "Youth Lagoon",
    "Allie X", "Lambrini Girls", "Pipiolas", "Big Special", "Good Looks", "Dave P", "Fcukers",
    "Kelly Lee Owens", "Nathan Shepherd", "Beach House", "Momma", "Sailor Honeymoon", "Chat Pile",
    "Dehd", "Horsegirl", "Los Campesinos", "Sal de Cocheeterna", "Jane Remover", "Kneecap", "Machine Girl"
]

ciutat_artists_available = [
    "Pete Doherty", "The Jesus Lizard", "Kim Deal", "Nadah El Shazly", "Seefeel", "Real Farmer", 
    "Warmduscher", "Damoridemort", "Tristwch y Fenywod", "Aka Hex", "Frost Children Djs", "Jawnino", 
    "Salem", "Mohama Saz", "Soul Jazz"
]


In [4]:
def search_artist(sp, artist_name):
    """Search for an artist by name and return the best match."""
    if sp is None:
        print("Error: Spotify client not initialized")
        return None
        
    # Clean up artist name - remove any parenthetical text
    clean_name = artist_name.split('(')[0].strip()
    
    try:
        results = sp.search(q=f'artist:"{clean_name}"', type='artist', limit=1)
        
        if not results['artists']['items']:
            print(f"Could not find artist: {artist_name}")
            return None
        
        return results['artists']['items'][0]
    except Exception as e:
        print(f"Error searching for artist {artist_name}: {e}")
        return None
    
def get_artist_top_tracks(sp, artist_id, country='ES', limit=10):
    """Get an artist's top tracks. Default country is Spain (ES) for Primavera Sound."""
    if sp is None:
        print("Error: Spotify client not initialized")
        return []
        
    try:
        top_tracks = sp.artist_top_tracks(artist_id, country=country)['tracks']
        return top_tracks[:limit]
    except Exception as e:
        print(f"Error getting top tracks for artist {artist_id}: {e}")
        return []

def get_all_artist_top_tracks(sp, artist_lists):
    """Get top tracks for all artists in the provided lists and return their Spotify links"""
    all_tracks = []
    
    # Combine all artist lists
    all_artists = []
    for artist_list in artist_lists:
        all_artists.extend(artist_list)
    
    print(f"Processing {len(all_artists)} artists...")
    
    for i, artist_name in enumerate(all_artists):
        print(f"[{i+1}/{len(all_artists)}] Processing: {artist_name}")
        
        # Search for the artist
        artist = search_artist(sp, artist_name)
        if not artist:
            continue
            
        # Get top tracks
        top_tracks = get_artist_top_tracks(sp, artist['id'])
        
        # Add track links to our list
        for track in top_tracks:
            track_info = {
                'artist': artist_name,
                'track_name': track['name'],
                'spotify_link': track['external_urls']['spotify']
            }
            all_tracks.append(track_info)
            
        # Add a small delay to avoid rate limiting
        sleep(0.5)
    
    return all_tracks


### Structured Playlist Data

In [5]:
def get_playlist_tracks(sp, playlist_url):
    """Get all tracks and their attributes from a Spotify playlist URL"""
    if sp is None:
        print("Error: Spotify client not initialized")
        return pd.DataFrame()
        
    # Extract playlist ID from URL
    if 'spotify.com/playlist/' in playlist_url:
        playlist_id = playlist_url.split('playlist/')[1].split('?')[0]
    else:
        playlist_id = playlist_url  # Assume it's already an ID
    
    print(f"Fetching playlist with ID: {playlist_id}")
    
    # Get playlist tracks
    try:
        results = sp.playlist_tracks(playlist_id)
        tracks = results['items']
        
        # Handle pagination if playlist has more than 100 tracks
        while results['next']:
            results = sp.next(results)
            tracks.extend(results['items'])
            
        print(f"Found {len(tracks)} tracks in playlist")
    except Exception as e:
        print(f"Error retrieving playlist: {e}")
        return pd.DataFrame()
    
    # Process in smaller batches
    batch_size = 20  # Smaller batch size
    track_data = []
    
    for i in range(0, len(tracks), batch_size):
        batch = tracks[i:i+batch_size]
        print(f"Processing batch {i//batch_size + 1}/{(len(tracks)-1)//batch_size + 1} ({i+1}-{min(i+batch_size, len(tracks))}/{len(tracks)} tracks)")
        
        # Process each track
        for j, item in enumerate(batch):
            if i + j < len(tracks):
                print(f"  Track {i+j+1}/{len(tracks)}", end="\r")
                
            track = item['track']
            if not track:  # Skip if track is None
                continue
            
            # Basic track info that doesn't require additional API calls
            artists = [artist['name'] for artist in track['artists']]
            artists_str = ", ".join(artists)
            
            track_info = {
                'Track Name': track['name'],
                'Artist Name(s)': artists_str,
                'Popularity': track.get('popularity', 0),
                'Spotify Link': track['external_urls']['spotify'],
            }
            
            # Get audio features - with error handling
            try:
                audio_features = sp.audio_features(track['id'])
                if audio_features and audio_features[0]:
                    af = audio_features[0]
                    # Add audio features to track info
                    track_info.update({
                        'Danceability': af.get('danceability', 0),
                        'Energy': af.get('energy', 0),
                        'Key': af.get('key', 0),
                        'Loudness': af.get('loudness', 0),
                        'Mode': af.get('mode', 0),
                        'Speechiness': af.get('speechiness', 0),
                        'Acousticness': af.get('acousticness', 0),
                        'Instrumentalness': af.get('instrumentalness', 0),
                        'Liveness': af.get('liveness', 0),
                        'Valence': af.get('valence', 0),
                        'Tempo': af.get('tempo', 0),
                        'Time Signature': af.get('time_signature', 4)
                    })
                else:
                    print(f"  No audio features available for: {track['name']}")
                    # Add default values
                    track_info.update({
                        'Danceability': 0, 'Energy': 0, 'Key': 0,
                        'Loudness': 0, 'Mode': 0, 'Speechiness': 0,
                        'Acousticness': 0, 'Instrumentalness': 0,
                        'Liveness': 0, 'Valence': 0, 'Tempo': 0,
                        'Time Signature': 4
                    })
            except Exception as e:
                print(f"  Error getting audio features for {track['name']}: {e}")
                # Add default values
                track_info.update({
                    'Danceability': 0, 'Energy': 0, 'Key': 0,
                    'Loudness': 0, 'Mode': 0, 'Speechiness': 0,
                    'Acousticness': 0, 'Instrumentalness': 0,
                    'Liveness': 0, 'Valence': 0, 'Tempo': 0,
                    'Time Signature': 4
                })
            
            track_data.append(track_info)
        
        # Add a delay between batches to avoid rate limiting
        if i + batch_size < len(tracks):
            print("\nPausing to avoid rate limiting...")
            sleep(2)
    
    print("\nFinished processing all tracks")
    
    # Convert to DataFrame
    df = pd.DataFrame(track_data)
    return df

## Initialize

In [None]:
# Initialize Spotify client with Authorization Code flow
print("Initializing Spotify client with Authorization Code flow...")
sp = get_spotify_client_with_auth_flow()

if sp is None:
    print("Failed to initialize Spotify client.")
    
print("Successfully authenticated with Spotify!")

## Make Primavera Playlist

In [None]:
# Part 1: Create a playlist of top tracks for Primavera artists
print("\n--- Part 1: Get top tracks for Primavera artists ---")
create_primavera_tracks = input("Do you want to create a playlist of top tracks for Primavera artists? (y/n): ").lower() == 'y'

if create_primavera_tracks:
    # Get all artist lists
    artist_lists = [main_artists, ciutat_artists_sold_out, ciutat_artists_available]
    
    # Get all top tracks
    all_tracks = get_all_artist_top_tracks(sp, artist_lists)
    
    # Save to CSV
    all_tracks_df = pd.DataFrame(all_tracks)
    output_file = 'primavera_top_tracks.csv'
    all_tracks_df.to_csv(output_file, index=False)
    print(f"Saved {len(all_tracks)} tracks to {output_file}")