# Notebook Refined API Coingecko

In [1]:
%idle_timeout 10
%glue_version 5.0
%worker_type G.1X
%number_of_workers 2

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from datetime import datetime
from pyspark.sql.functions import col, concat_ws, format_string
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)

Welcome to the Glue Interactive Sessions Kernel
For more information on available magic commands, please type %help in any new cell.

Please view our Getting Started page to access the most up-to-date information on the Interactive Sessions kernel: https://docs.aws.amazon.com/glue/latest/dg/interactive-sessions.html
Installed kernel version: 1.0.8 
Current idle_timeout is None minutes.
idle_timeout has been set to 10 minutes.
Setting Glue version to: 5.0
Previous worker type: None
Setting new worker type to: G.1X
Previous number of workers: None
Setting new number of workers to: 2
Trying to create a Glue session for the kernel.
Session Type: glueetl
Worker Type: G.1X
Number of Workers: 2
Idle Timeout: 10
Session ID: 5b12d592-3308-401f-8d89-c17b3467320c
Applying the following default arguments:
--glue_kernel_version 1.0.8
--enable-glue-datacatalog true
Waiting for session 5b12d592-3308-401f-8d89-c17b3467320c to get into ready status...
Session 5b12d592-3308-401f-8d89-c17b3467320c has be

In [3]:
# Data atual UTC
now = datetime.utcnow()

# Monta o path no formato ano/mes/dia
trusted_path = f"s3://arquitetura-software-datalake/trusted/coingecko/{now.year}/{now.month:02d}/{now.day:02d}/{now.hour:02d}/"

# Leitura dos JSONs
df = spark.read.parquet(trusted_path)




# Criação fato_precos_moedas 

In [6]:
df_fato_precos = df.select(
    col("id_moeda"),
    col("dat_carga").alias("data_hora_id"),  # já está no formato YYYYMMDDHH
    col("preco_atual"),
    col("preco_maximo_24h"),
    col("preco_minimo_24h"),
    col("valor_mercado"),
    col("ranking_mercado"),
    col("volume_total"),
    col("variacao_preco_24h"),
    col("percentual_variacao_preco_24h"),
    col("variacao_valor_mercado_24h"),
    col("percentual_variacao_valor_mercado_24h"),
    col("valor_total_diluido")
)

# Escreve o DataFrame tratado no caminho S3
df_fato_precos.write.mode("overwrite").parquet(f"s3://arquitetura-software-datalake/refined/coingecko/fato_precos/{now.year}/{now.month:02d}/{now.day:02d}/{now.hour:02d}/")




# Criação da DM

In [7]:
# Cria dimensão de moedas com dados estáticos/descritivos
df_dim_moeda = df.select(
    col("id_moeda"),
    col("simbolo"),
    col("nome"),
    col("fornecimento_total"),
    col("fornecimento_maximo"),
    col("circulacao"),
    col("preco_maximo_historico"),
    col("data_maximo_historico"),
    col("preco_minimo_historico"),
    col("data_minimo_historico"),
    col("variacao_desde_maximo"),
    col("variacao_desde_minimo")
).dropDuplicates(["id_moeda"])

# Escreve o DataFrame tratado no caminho S3
df_dim_moeda.write.mode("overwrite").parquet(f"s3://arquitetura-software-datalake/refined/coingecko/dim_moeda/{now.year}/{now.month:02d}/{now.day:02d}/{now.hour:02d}/")


