In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder \
    .appName('User Raw to Trusted') \
    .config('spark.sql.extensions','io.delta.sql.DeltaSparkSessionExtension') \
    .config('spark.sql.catalog.spark_catalog','org.apache.spark.sql.delta.catalog.DeltaCatalog') \
    .getOrCreate()

In [3]:
!pip install boto3

Collecting boto3
  Downloading boto3-1.35.24-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting s3transfer<0.11.0,>=0.10.0
  Downloading s3transfer-0.10.2-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.7/82.7 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.36.0,>=1.35.24
  Downloading botocore-1.35.24-py3-none-any.whl (12.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.35.24 botocore-1.35.24 jmespath-1.0.1 s3transfer-0.10.2


In [24]:
import json
import boto3
from botocore.client import Config
from botocore.exceptions import NoCredentialsError
from io import BytesIO
import time


def upload_json_to_minio(data, bucket, object_name, path_save):
    # Configurar o cliente boto3 para MinIO
    s3_client = boto3.client(
        's3',
        endpoint_url='http://minio:9000',  # Altere para o endpoint do seu MinIO
        aws_access_key_id='datalake',        # Altere para sua chave de acesso do MinIO
        aws_secret_access_key='datalake',    # Altere para sua chave secreta do MinIO
    )

    try:
        print("Serializar o conteúdo JSON e convertê-lo para bytes")
        json_bytes = json.dumps(data).encode('utf-8')

        print("Criar um buffer de bytes para simular um arquivo")
        json_buffer = BytesIO(json_bytes)

        # Upload do "arquivo" JSON para o MinIO
        s3_client.upload_fileobj(
            json_buffer,
            bucket,
            path_save
         )
        print(f'Arquivo JSON foi carregado com sucesso no bucket {bucket}, objeto: {object_name}')
        time.sleep(0.4)
    except NoCredentialsError:
        print("Credenciais não encontradas")

In [25]:
import requests
from pyspark.sql.functions import udf
from http.cookiejar import LWPCookieJar
import datetime

post_auth = "http://api.olhovivo.sptrans.com.br/v2.1/Login/Autenticar?token=d3f026bb74699c88e75ef5a7e71cf1181e70cd26cb93b8b31038c9df15ba2f61"
session = requests.Session()
session.post(post_auth)

def fetch_data(id_linha):
    url = "http://api.olhovivo.sptrans.com.br/v2.1/Login/Autenticar?token=d3f026bb74699c88e75ef5a7e71cf1181e70cd26cb93b8b31038c9df15ba2f61"
    session = requests.Session()
    response = session.post(url)
    
    if response.status_code == 200:
        cookies = response.cookies
        
        endpoint = "http://api.olhovivo.sptrans.com.br/v2.1/Previsao/Linha?"
        params = {
            "codigoLinha": id_linha,
        }
        response = session.get(endpoint, params=params, cookies=cookies)

        time = datetime.datetime.now()
        bucket = "raw"
        name_file = f"previsao_chegada_{time}.json"
        caminho_de_salvar = f"/previsao_chegada/{name_file}"

        upload_json_to_minio(
            data=response.json(), 
            bucket=bucket, 
            object_name=name_file, 
            path_save=caminho_de_salvar
        )
        
        return True  # assuming API returns a list of records
    else:
        print("NAODEUCERTO")

In [26]:
trusted_path_posicao_veiculo = 's3a://trusted/posicao_veiculo'

In [33]:
df_posicao_to_previsao = spark.read.format('delta').load(trusted_path_posicao_veiculo)

In [34]:
df_posicao_to_previsao.show()

+--------------+--------+----------------------+---------------------+-----------------+---------------+-----------------+----------+-------------------+-------------------+
|LETREIRO_LINHA|ID_LINHA|LETREIRO_DESTINO_LINHA|LETREIRO_ORIGEM_LINHA|QTD_VEICULO_LINHA|SENTIDO_O_LINHA|ACESSIVEL_VEICULO|ID_VEICULO|  LONGITUDE_VEICULO|   LATITUDE_VEICULO|
+--------------+--------+----------------------+---------------------+-----------------+---------------+-----------------+----------+-------------------+-------------------+
|       1765-10|   34550|        METRÔ TUCURUVI|           JD. CABUÇU|                4|              2|             true|     22847|       -46.56898525|        -23.4661845|
|       1765-10|   34550|        METRÔ TUCURUVI|           JD. CABUÇU|                4|              2|             true|     22830|         -46.602609|         -23.480383|
|       1765-10|   34550|        METRÔ TUCURUVI|           JD. CABUÇU|                4|              2|             true|     223

In [41]:
from pyspark.sql.functions import col, udf  
convertUDF = udf(lambda id: fetch_data(id))
df_previsao_responde2 = df_posicao_to_previsao.select("ID_LINHA").distinct()
df_previsao_responde = df_previsao_responde2.withColumn("response", convertUDF(col("id_linha")))

In [42]:
df_previsao_responde.show()

+--------+--------+
|ID_LINHA|response|
+--------+--------+
|   32954|    true|
|    2453|    true|
|   32912|    true|
|     474|    true|
|      26|    true|
|    2250|    true|
|      29|    true|
|    2173|    true|
|   33600|    true|
|   33831|    true|
|    1258|    true|
|    1277|    true|
|     418|    true|
|    1224|    true|
|   33090|    true|
|     558|    true|
|    1127|    true|
|   34832|    true|
|   33342|    true|
|   33234|    true|
+--------+--------+
only showing top 20 rows

