In [0]:
# Carga de Datos RAW a Tabla Delta BRONZE
import os
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType, StringType
from pyspark.sql.functions import current_timestamp, lit, input_file_name, monotonically_increasing_id, current_timestamp, col

In [0]:
# También puedes ver el contenido del schema credit_risk
display(dbutils.fs.ls("/Volumes/workspace/credit_risk/credit_risk_volume"))


In [0]:
# 1. DEFINIR ESQUEMA DE LECTURA
schema_raw = StructType([
    StructField("person_age", StringType(), True),
    StructField("person_income", StringType(), True),
    StructField("person_home_ownership", StringType(), True),
    StructField("person_emp_length", StringType(), True),
    StructField("loan_intent", StringType(), True),
    StructField("loan_grade", StringType(), True),
    StructField("loan_amnt", StringType(), True),
    StructField("loan_int_rate", StringType(), True),
    StructField("loan_status", StringType(), True),
    StructField("loan_percent_income", StringType(), True),
    StructField("cb_person_default_on_file", StringType(), True),
    StructField("cb_preson_cred_hist_length", StringType(), True)
])

In [0]:
# 2. LEER DATOS CON ESQUEMA DEFINIDO
volume_path = "/Volumes/workspace/credit_risk/credit_risk_volume"  # ajusta el nombre
df_raw = spark.read.csv(f"{volume_path}/credit_risk_dataset.csv", header=True, schema=schema_raw)



In [0]:
# 3. RENOMBRAR COLUMNAS A ESPAÑOL Y AGREGAR METADATOS
df_bronze = (df_raw
    # Renombrar columnas
    .withColumnRenamed("person_age", "edad")
    .withColumnRenamed("person_income", "ingreso_anual")
    .withColumnRenamed("person_home_ownership", "tipo_vivienda")
    .withColumnRenamed("person_emp_length", "anios_empleo")
    .withColumnRenamed("loan_intent", "proposito")
    .withColumnRenamed("loan_grade", "calificacion")
    .withColumnRenamed("loan_amnt", "monto")
    .withColumnRenamed("loan_int_rate", "tasa_interes")
    .withColumnRenamed("loan_status", "estado_pago")
    .withColumnRenamed("loan_percent_income", "pct_ingreso")
    .withColumnRenamed("cb_person_default_on_file", "historial_default")
    .withColumnRenamed("cb_preson_cred_hist_length", "anios_hist_credito")
    # Agregar metadatos
    .withColumn("fecha_ingesta", current_timestamp())
#    .withColumn("archivo_origen", input_file_name())
    .withColumn("capa", lit("bronze"))
)

In [0]:
try:
    # 4. GUARDAR EN TABLA BRONZE (TAL CUAL, SIN TRANSFORMACIONES)
    df_bronze.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("workspace.credit_risk.bronze_credit_risk")

except Exception as e:
    import traceback
    # Tipo de error
    error_type = type(e).__name__
    # Descripcion de error
    error_summary = str(e)
    # Trazar el error
    error_trace = traceback.format_exc()

    # Error completo
    error_msg_full = "f{error_type}: {error_sumamary}/n{error_trace}"

    if len(error_msg_full) > 500:
        error_msg = error_msg_full[:500]+"\n[...]Error Truncado[...]"
    else:
        error_msg = error_msg_full

    dbutils.jobs.taskValues.set(key="error", value=error_msg)
    raise e