In [None]:
import os
import requests
import pandas as pd
from collections import deque
from datetime import datetime, timedelta

# Load API Key

LASTFM_API_KEY = os.getenv(LASTFM_API_KEY)

BASE_URL = 'http://ws.audioscrobbler.com/2.0/'
HEADERS = {'user-agent': 'DataCollector'}

def lastfm_get(params):
    params['api_key'] = LASTFM_API_KEY
    params['format'] = 'json'
    response = requests.get(BASE_URL, headers=HEADERS, params=params)
    return response.json()

def get_recent_tracks(user, from_ts):
    payload = {
        'method': 'user.getrecenttracks',
        'user': user,
        'from': from_ts,
        'limit': 200
    }
    tracks = []
    page = 1
    while True:
        payload['page'] = page
        response = lastfm_get(payload)
        track_data = response.get('recenttracks', {}).get('track', [])
        if not track_data:
            break
        tracks.extend(track_data)
        attr = response.get('recenttracks', {}).get('@attr', {})
        if int(attr.get('page', 1)) >= int(attr.get('totalPages', 1)):
            break
        page += 1
    return tracks

def collect_active_users(seed_user, min_tracks=100, max_users=500):
    discovered = set([seed_user])
    queue = deque([seed_user])
    qualifying_users = []
    one_week_ago = int((datetime.utcnow() - timedelta(days=7)).timestamp())

    while queue and len(qualifying_users) < max_users:
        current_user = queue.popleft()
        try:
            friends_resp = lastfm_get({'method': 'user.getfriends', 'user': current_user})
            friends = friends_resp.get('friends', {}).get('user', [])

            for friend in friends:
                friend_name = friend['name']
                if friend_name in discovered:
                    continue
                discovered.add(friend_name)

                recent_tracks = get_recent_tracks(friend_name, from_ts=one_week_ago)
                if len(recent_tracks) >= min_tracks:
                    qualifying_users.append(friend_name)
                    print(f"{len(qualifying_users)}/500: {friend_name} has {len(recent_tracks)} recent tracks")

                queue.append(friend_name)

                if len(qualifying_users) >= max_users:
                    break
        except Exception as e:
            print(f"Error processing {current_user}: {e}")
            continue

    return qualifying_users

def get_top_10_tracks_last_week(user):
    try:
        one_week_ago = int((datetime.utcnow() - timedelta(days=7)).timestamp())
        recent_tracks = get_recent_tracks(user, from_ts=one_week_ago)
        top_tracks = {}
        for track in recent_tracks:
            name = track.get('name')
            artist = track.get('artist', {}).get('#text', '')
            if name and artist:
                key = (name, artist)
                top_tracks[key] = top_tracks.get(key, 0) + 1
        sorted_tracks = sorted(top_tracks.items(), key=lambda x: -x[1])
        return [(name, artist, count) for (name, artist), count in sorted_tracks[:10]]
    except Exception as e:
        print(f"Error fetching top tracks for {user}: {e}")
        return []

# === RUN SCRAPER ===
seed = "chippy_boi"
usernames = collect_active_users(seed_user=seed)

# Save to CSV
df = pd.DataFrame(usernames, columns=["username"])
df.to_csv("lastfm_active_users.csv", index=False)
print("Saved active usernames to lastfm_active_users.csv")
