# Preparation

## Imports

In [1]:
import os
import json
import dotenv
from pymongo import MongoClient
from pymongo.server_api import ServerApi
from neo4j import GraphDatabase
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from faker import Faker
from google import genai
from google.genai.errors import ClientError

## Useful Functions

In [4]:
def remove_available_markets(obj):
    if isinstance(obj, dict):
        obj = {k: remove_available_markets(v) for k, v in obj.items() if k != "available_markets"}
    elif isinstance(obj, list):
        obj = [remove_available_markets(item) for item in obj]
    return obj

## Connect to MongoDB

In [2]:
dotenv.load_dotenv()

mongo_user = os.getenv("MONGODB_USERNAME")
mongo_pass = os.getenv("MONGODB_PASSWORD")

mongo_client = MongoClient(
    f"mongodb+srv://{mongo_user}:{mongo_pass}@projeto-bd.9scqvyv.mongodb.net/?retryWrites=true&w=majority&appName=projeto-bd",
    server_api = ServerApi(
        version = "1",
        strict = True,
        deprecation_errors = True
    )
)

mongo_db = mongo_client["music_catalog"]

## Connect to Neo4j

In [3]:
dotenv.load_dotenv()

neo4j = GraphDatabase.driver(
    "neo4j+s://10ab7e50.databases.neo4j.io",
    auth=(
        os.getenv("NEO4J_USERNAME"),
        os.getenv("NEO4J_PASSWORD"),
    ),
)

neo4j.verify_connectivity()

## Connect to Spotify API

In [None]:
dotenv.load_dotenv()

spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials())

## Init Faker

In [None]:
fake = Faker()

## Connect to Gemini API

In [5]:
dotenv.load_dotenv()

gemini = genai.Client(api_key=os.getenv("GEMINI_API_KEY")).chats.create(model="gemini-2.5-flash")

# Artists

## Data Sources

In [None]:
response = spotify.search(
    q = "genre:djent",
    type = "artist",
    offset = 0,
)

print(json.dumps(response, indent=4))

In [None]:
with open("resources/my_guys.json", "r") as f:
    artist_ids = json.load(f)

artist_ids

In [None]:
response = spotify.artist(artist_ids[7])
print(json.dumps(response, indent=4))

In [None]:
response = spotify.artist_albums("4yRSUmhuSJ3KcIMljdh4fH", album_type="album")
print(json.dumps(remove_available_markets(response), indent=4))

In [None]:
response = spotify.album_tracks("0T0XW3kREbbHdNyqbZAd99")
print(json.dumps(remove_available_markets(response), indent=4))

In [None]:
response = spotify.album_tracks("0T0XW3kREbbHdNyqbZAd99")

tracks = []
for track in response["items"]:
    tracks.append({
        "track_number": track["track_number"],
        "name": track["name"],
        "duration": track["duration_ms"]
    })

print(json.dumps(tracks, indent=4))

In [None]:
fake_bio = fake.paragraph(nb_sentences=25)
print(fake_bio)

In [1]:
artist = "VOLA"

gemini.send_message(
    f"Give me a bio for the music artist {artist}. Respond with only the bio.",
).text.strip()

NameError: name 'gemini' is not defined

In [7]:
error = None
while True:
    try:
        bio = gemini.send_message(
            f"Give me a bio for the music artist Chat Pile. Respond with only a paragraph-long the bio.",
        ).text.strip()
    except ClientError as e:
        error = e
        break

error

google.genai.errors.ClientError("429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'model': 'gemini-2.5-flash', 'location': 'global'}, 'quotaValue': '10'}]}, {'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '3s'}]}}")

In [10]:
error.code

429

## BDs

In [None]:
pipeline = [
    {
        "$project": {
            "num_releases": {
                "$size": "$releases",
            },
        },
    },
    {
        "$group": {
            "_id": None,
            "avg_releases": {
                "$avg": "$num_releases",
            },
        },
    },
]

result = list(mongo_db.artists.aggregate(pipeline))
print(json.dumps(result[0], indent=4))

In [None]:
pipeline = [
    {
        "$project": {
            "total_tracks": {
                "$sum": {
                    "$map": {
                        "input": "$releases",
                        "as": "release",
                        "in": {
                            "$size": {
                                "$ifNull": ["$$release.tracks", []],
                            },
                        },
                    },
                },
            },
        },
    },
    {
        "$group": {
            "_id": None,
            "avg_tracks": {
                "$avg": "$total_tracks",
            },
        },
    },
]

result = list(mongo_db.artists.aggregate(pipeline))
print(json.dumps(result[0], indent=4))

In [None]:
pipeline = [
    {
        "$sort": {
            "popularity": -1,
        },
    },
    {
        "$project": {
            "_id": 0,
            "name": 1,
            "popularity": 1,
        },
    },
]

result = list(mongo_db.artists.aggregate(pipeline))
print(json.dumps(result, indent=4))

In [None]:
pipeline = [
    {
        "$unwind": "$genres",
    },
    {
        "$group": {
            "_id": "$genres",
            "count": {
                "$sum": 1,
            },
        },
    },
    {
        "$sort": {
            "count": -1,
        },
    }
]

result = list(mongo_db.artists.aggregate(pipeline))
print(json.dumps(result, indent=4))

In [5]:
records, summary, keys = neo4j.execute_query(
    """
    MATCH (a:Artist)-[:BELONGS_TO]->(g:Genre {name: $genre})
    RETURN a.id AS artist_id, a.popularity AS popularity
    ORDER BY a.popularity DESC
    LIMIT 10
    """,
    genre="metalcore"
)
print(records)
print(summary)
print(keys)

[<Record artist_id='1Ffb6ejR6Fe5IamqA5oRUF' popularity=80>, <Record artist_id='3Ri4H12KFyu98LMjSoij5V' popularity=74>, <Record artist_id='6MwPCCR936cYfM1dLsGVnl' popularity=71>, <Record artist_id='3Uobr6LgQpBbk6k4QGAb3V' popularity=71>, <Record artist_id='3ZztVuWxHzNpl0THurTFCv' popularity=70>, <Record artist_id='4MzJMcHQBl9SIYSjwWn8QW' popularity=69>, <Record artist_id='6vwjIs0tbIiseJMR3pqwiL' popularity=63>, <Record artist_id='7rqJQQxuUOCk052MK5kLsH' popularity=61>, <Record artist_id='0v2e0oznnTaIXKLGKHphfX' popularity=61>, <Record artist_id='0NbQe5CNgh4YApOCDuHSjb' popularity=60>]
<neo4j._work.summary.ResultSummary object at 0x7b727c11c9e0>
['artist_id', 'popularity']


# Users

In [None]:
f"{fake.first_name()} {fake.last_name()}"

In [None]:
fake.sha256()

In [None]:
fake.catch_phrase()

In [None]:
fake.paragraph()

In [None]:
fake.user_name()

In [11]:
name = "Ryan Sakurai"

gemini.send_message(
    f"Create a random bio in first person for a person called {name}. Respond with only the bio",
).text.strip()

"Hey there, I'm Ryan Sakurai. I'm a passionate designer based out of Portland, always seeking new ways to blend aesthetics with functionality, whether that's through my work in user experience or my weekend pottery projects. When I'm not sketching or throwing clay, you'll probably find me exploring a new hiking trail, getting lost in a good sci-fi novel, or trying out a new recipe in the kitchen. I thrive on creativity and connection, always eager to learn and share new perspectives."

In [4]:
mongo_db.users.update_many(
    {},
    {
        "$rename": {
            "artists_followed": "follows"
        }
    }
)

UpdateResult({'n': 1000, 'electionId': ObjectId('7fffffff0000000000000019'), 'opTime': {'ts': Timestamp(1751658754, 1004), 't': 25}, 'nModified': 1000, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1751658754, 1004), 'signature': {'hash': b'\xa2\x8f\xf6\xec\xfa\xf0~\xebS\xf7\xb9\x89\xbf\xa7\xb9%>v\xda\x8a', 'keyId': 7477973821127917570}}, 'operationTime': Timestamp(1751658754, 1004), 'updatedExisting': True}, acknowledged=True)