# Imports

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.functions import col, trim
from pyspark.sql.types import StringType

#Reading from Bronze

In [0]:
df = spark.table("workspace.bronze.crm_cust_info")

In [0]:
df.display()

# Data Transformation

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))
 

In [0]:
RENAME_MAP ={
    "cst_id": "customer_id",
    "cst_key": "customer_number",
    "cst_firstname": "firstname",
    "cst_lastname": "lastname",
    "cst_marital_status": "marital_status",
    "cst_gndr": "gender",
    "cst_create_date": "create_date"
}

In [0]:
df =(
    df
    .withColumn(
        "cst_marital_status",
        F.when(F.upper(F.col("cst_marital_status")) == "M", "Married")
        .when(F.upper(F.col("cst_marital_status")) == "S", "Single")
        .otherwise("n/a")
)
    .withColumn(
        "cst_gndr",
        F.when(F.upper(F.col("cst_gndr")) == "M", "Male")
        .when(F.upper(F.col("cst_gndr")) == "F", "Female")
        .otherwise("n/a")
    )
)


# Renaming the columns

In [0]:
for old_name, new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name, new_name)


# Remove rows with missing Customer_Id

In [0]:
df = df.filter(col("customer_id").isNotNull())

In [0]:
df.display()

#Write to silver Table

In [0]:
(
    df.write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable("workspace.silver.crm_customers")


)

In [0]:
%sql select * from workspace.silver.crm_customers
WHERE "customer_id" IS NOT null