In [1]:
import requests
import json
import pyspark
from delta import *

builder = (
    pyspark.sql.SparkSession.builder
    .appName("delta")
     .master("spark://spark-master:7077")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
    .config("spark.hadoop.fs.s3a.access.key", "datalake")
    .config("spark.hadoop.fs.s3a.secret.key", "datalake")
    .config("spark.hadoop.fs.s3a.endpoint", "http://minio:9000")
    .config("spark.hadoop.fs.s3a.path.style.access", "true")
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
)

spark = configure_spark_with_delta_pip(builder).enableHiveSupport().getOrCreate()


In [17]:
spark.conf.set("spark.sql.repl.eagerEval.enabled", True)


In [24]:
import requests
import json

def loadData(qtde):
    items = []
    for _ in range(qtde):
        r = requests.get('https://fakerapi.it/api/v1/persons?_quantity=1')
        pessoa = r.json()['data'][0]
        
        # Remove a chave indesejada
        pessoa.pop('address', None)
        endereco = r.json()['data'][0]['address']
        items.append(pessoa | endereco)
      

    return spark.createDataFrame(items)

df = loadData(10)



In [37]:
from pyspark.sql.functions import year, month, dayofmonth, current_date
df_raw = (df.select('birthday',
          'firstname',
          'lastname',
          'gender',
          'phone',
          'city',
          'country',
          'latitude',
          'longitude'
         ) 
        .withColumn("ano", year(current_date()))
        .withColumn("mes", month(current_date()))
        .withColumn("dia", dayofmonth(current_date()))
)
          
          

In [38]:
path = path = 's3a://raw/user'
df_raw.write.format("delta").mode('overwrite').partitionBy("ano", "mes", "dia").save(path)

{'id': 1,
 'street': '55636 Emory Parks',
 'streetName': 'Rosenbaum Mill',
 'buildingNumber': '2849',
 'city': 'Eichmannmouth',
 'zipcode': '60543-1442',
 'country': 'Equatorial Guinea',
 'country_code': 'GQ',
 'latitude': 14.705867,
 'longitude': 72.560943}