In [0]:
import pyspark.pandas as ps
import requests
import time
import json
import pandas as pd



def normalize_text(text):
    return text.lower().strip()

def match_title(user_input, anime):
    # Verifica se existe lista de títulos e itera sobre ela
    titles = anime.get("titles", [])
    return any(
        normalize_text(user_input) in normalize_text(t["title"])
        for t in titles
    )

def fetch_anime_id_by_name(anime_name: str) -> int:
    # Pequeno sleep para respeitar a API entre chamadas de ID
    time.sleep(1) 
    resp = requests.get("https://api.jikan.moe/v4/anime", params={"q": anime_name, "limit": 10})
    
    if resp.status_code == 429:
        raise Exception("Erro 429: Rate Limit atingido. Diminua a concorrência.")
        
    resp.raise_for_status()
    data = resp.json().get("data", [])
    
    if not data:
        print(f"Anime {anime_name} não encontrado, pulando...")
        return None

    for anime in data:
        if match_title(anime_name, anime):
            return anime["mal_id"]
    return data[0]["mal_id"]

def fetch_jikan_characters_logic(anime_name: str, path: str, max_characters: int = None) -> str:
    anime_id = fetch_anime_id_by_name(anime_name)
    
    if not anime_id:
        return None

    print(f"Buscando personagens para ID: {anime_id}...")
    time.sleep(1) # Sleep de segurança para a API
    resp = requests.get(f"https://api.jikan.moe/v4/anime/{anime_id}/characters")
    resp.raise_for_status()
    
    items = resp.json().get("data", [])
    if max_characters:
        items = items[:max_characters]

    # Nome do arquivo dinâmico baseado no anime
    file_path = f"{path}/{anime_name}_characters.csv"
    
    df = pd.json_normalize(items)
    if 'voice_actors.name' in df.columns:
        df['voice_actors.name'] = df['voice_actors.name'].apply(str)
    df.to_csv(file_path, index=False)
    return file_path



In [0]:
import numpy as np
animes_list_names = [
    "naruto",
    "bleach",
    "one piece",
    "dragon ball z",
    "dragon ball gt",
    "dragon ball super",
    "boruto",
    "death note",
    "full metal alchemist brotherhood",
    "attack on titan",
    "my hero academia",
    "one punch man",
    "jojo's bizarre adventure",
    "jojo's bizarre adventure the odd couple",
    "jojo's bizarre adventure stone ocean",
    "jojo's bizarre adventure diamond is unbreakable"
]

animes_list = np.random.choice(animes_list_names, 10, replace=False)
animes_list


dados_animes = [
    fetch_anime_id_by_name(anime_name)
    for anime_name in animes_list
]

In [0]:
datasets = [
    fetch_jikan_characters_logic(
        anime_name,
        "/Workspace/Users/estevaolins94@gmail.com/animes_databricks_pipeline/bronze",
        1000
    )
    for anime_name in animes_list
]

In [0]:
def read_data():
    for dataset in datasets:
        if dataset:
            df = pd.read_csv(dataset)
            name = dataset.split("/")[-1].replace(".csv", "")
            print(f"nome: {name}")
            display(df)
read_data()