# Import libraries

In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import StringType, DateType
from pyspark.sql.functions import col ,trim

# Load data

In [0]:
df = spark.table ("workspace.bronze.erp_loc_a101")

# Data transmission

## Trimming

In [0]:
for field in df.schema.fields:
    if isinstance(field.dataType, StringType):
        df = df.withColumn(field.name, trim(col(field.name)))

## Clean customer id

In [0]:
df = df.withColumn('cid',F.regexp_replace(col('cid'),"-",""))

In [0]:
df.display()

## Country name normalization

In [0]:
df = df.withColumn(
    'cntry',
    F.when (col('cntry')== "DE","Germany")
    .when(col("cntry").isin('US','USA'),'United States')
    .when((col('cntry') == '')| col('cntry').isNull(), 'n/a')
    .otherwise(col('cntry'))
)

# Rename Columns

In [0]:
RENAME_MAP = {
    'cid' :'customer_number',
    'cntry':'country'

}

for old_name,new_name in RENAME_MAP.items():
    df = df.withColumnRenamed(old_name,new_name)
    

In [0]:
df.limit(10).display()

# Write to Silver Table

In [0]:
df. write.mode('overwrite').format('delta').saveAsTable('workspace.silver.erp_customer_location')

# Check silver Table

In [0]:
%sql
SELECT * FROM workspace.silver.erp_customer_location LIMIT 10

In [0]:
%sql
DROP table workspace.silver.erp_customer_location