# Preparation

## Imports

In [None]:
import os
import random
import json
import dotenv
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from pymongo.errors import CollectionInvalid, DuplicateKeyError
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from faker import Faker

## Useful Function

In [None]:
def remove_available_markets(obj):
    if isinstance(obj, dict):
        obj = {k: remove_available_markets(v) for k, v in obj.items() if k != "available_markets"}
    elif isinstance(obj, list):
        obj = [remove_available_markets(item) for item in obj]
    return obj

## Connect to MongoDB

In [None]:
dotenv.load_dotenv()

mongo_user = os.getenv("MONGODB_USERNAME")
mongo_pass = os.getenv("MONGODB_PASSWORD")

mongo_client = MongoClient(
    f"mongodb+srv://{mongo_user}:{mongo_pass}@projeto-bd.9scqvyv.mongodb.net/?retryWrites=true&w=majority&appName=projeto-bd",
    server_api = ServerApi(
        version = "1",
        strict = True,
        deprecation_errors = True
    )
)

mongo_db = mongo_client["music_catalog"]

## Connect to Spotify API

In [None]:
dotenv.load_dotenv()

spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

## Init Faker

In [None]:
fake = Faker()

# Population - Artists

## Create Collection

In [None]:
try:
    mongo_db.create_collection("artists")
    mongo_db.artists.create_index("releases.id")
except CollectionInvalid as e:
    print(e)

## Retrieve Artists by Genre

### Response Preview

In [None]:
response = spotify.search(
    q = "genre:djent",
    type = "artist",
    offset = 0,
)

print(json.dumps(response, indent=4))

### Do It

### From Spotify

In [None]:
response = spotify.search(
    q = "genre:djent",
    type = "artist",
)
artist_ids = [artist["id"] for artist in response["artists"]["items"]]

while response["artists"]["next"]:
    response = spotify.next(response["artists"])
    artist_ids.extend([artist["id"] for artist in response["artists"]["items"]])

artist_ids

### From JSON

In [None]:
with open("../resources/my_guys.json", "r") as f:
    artist_ids = json.load(f)

artist_ids

## Insert into MongoDB

### Response Preview

In [None]:
response = spotify.artist(artist_ids[7])
print(json.dumps(response, indent=4))

In [None]:
response = spotify.artist_albums("4yRSUmhuSJ3KcIMljdh4fH", album_type="album")
print(json.dumps(remove_available_markets(response), indent=4))

In [None]:
response = spotify.album_tracks("0T0XW3kREbbHdNyqbZAd99")
print(json.dumps(remove_available_markets(response), indent=4))

In [None]:
response = spotify.album_tracks("0T0XW3kREbbHdNyqbZAd99")

tracks = []
for track in response["items"]:
    tracks.append({
        "track_number": track["track_number"],
        "name": track["name"],
        "duration": track["duration_ms"]
    })

print(json.dumps(tracks, indent=4))

In [None]:
fake_bio = fake.paragraph(nb_sentences=25)
print(fake_bio)

### Do It

In [None]:
def artist_releases(artist_id: str) -> list:
    response = spotify.artist_albums(artist_id, album_type="album")

    releases = []
    for release in response["items"]:
        releases.append({
            "id": release["id"],
            "name": release["name"],
            "release_date": release["release_date"],
            "tracks": release_tracks(release["id"]),
            "ratings": [],
        })

    while response["next"]:
        response = spotify.next(response)
        for release in response["items"]:
            releases.append({
                "id": release["id"],
                "name": release["name"],
                "release_date": release["release_date"],
                "tracks": release_tracks(release["id"]),
                "ratings": [],
            })

    return releases

def release_tracks(release_id: str) -> list:
    response = spotify.album_tracks(release_id)

    tracks = []
    for track in response["items"]:
        tracks.append({
            "track_number": track["track_number"],
            "name": track["name"],
            "duration": track["duration_ms"]
        })

    while response["next"]:
        response = spotify.next(response)
        for track in response["items"]:
            tracks.append({
                "track_number": track["track_number"],
                "name": track["name"],
                "duration": track["duration_ms"]
            })

    return tracks

for artist_id in artist_ids:
    response = spotify.artist(artist_id)

    artist = dict()
    artist["_id"] = artist_id
    artist["name"] = response["name"]
    artist["genres"] = response["genres"]
    artist["popularity"] = response["popularity"]
    if random.random() < 0.9:
        artist["bio"] = fake.paragraph(nb_sentences=25)
    artist["qt_followers"] = 0
    artist["releases"] = artist_releases(artist_id)

    if len(artist["releases"]) > 0:
        mongo_db.artists.insert_one(artist)    


# Population - Users

## Create Collection

In [None]:
try:
    mongo_db.create_collection("users")
    mongo_db.users.create_index("username", unique=True)
except CollectionInvalid as e:
    print(e)

## Insert into MongoDB

### Response Preview

In [None]:
f"{fake.first_name()} {fake.last_name()}"

In [None]:
fake.sha256()

In [None]:
fake.catch_phrase()

In [None]:
fake.paragraph()

In [None]:
fake.user_name()

### Do It

In [None]:
QT_INSERTIONS = 10_000
qt_duplicates = 0

for _ in range(QT_INSERTIONS):
    try:
        first_name = fake.first_name()
        last_name = fake.last_name()
        number = random.randint(0, 99)

        user = dict()
        user["username"] = f"{first_name.lower()}_{last_name.lower()}{number:02}"
        user["password"] = fake.sha256()
        if random.random() < 0.75:
            user["name"] = f"{first_name} {last_name}"
        if "name" in user and random.random() < 0.5:
            user["bio"] = fake.paragraph(nb_sentences=10)
        user["friends"] = []
        user["artists_followed"] = []
        user["ratings"] = []

        mongo_db.users.insert_one(user)
    except DuplicateKeyError:
        qt_duplicates += 1

print(f"Finished with {QT_INSERTIONS-qt_duplicates} users inserted.")        