In [None]:
# Check if the directory is already mounted
if dbutils.fs.mounts() and any(mount.mountPoint == "/mnt/raw-data" for mount in dbutils.fs.mounts()):
   # Unmount the directory if it is already mounted
   dbutils.fs.unmount("/mnt/raw-data")

# Check if the directory is already mounted
if dbutils.fs.mounts() and any(mount.mountPoint == "/mnt/transformed-data" for mount in dbutils.fs.mounts()):
   # Unmount the directory if it is already mounted
   dbutils.fs.unmount("/mnt/transformed-data")

# Delete the existing directory recursively
dbutils.fs.rm("/mnt/raw-data", recurse=True)
dbutils.fs.rm("/mnt/transformed-data", recurse=True)

configs = {"fs.azure.account.auth.type": "OAuth",
           "fs.azure.account.oauth2.client.id": "Application (client) ID của App registration",
           "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
           "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/Directory (tenant) ID của App registration/oauth2/token",
           "fs.azure.account.oauth2.client.secret": "Value của Certificates & secrets trong App registration"}
         
# Mount the directory again
dbutils.fs.mount(
    source="abfss://raw-data@{Tên storage account}.dfs.core.windows.net", 
    mount_point="/mnt/raw-data",
    extra_configs=configs
)

# Mount the directory again
dbutils.fs.mount(
    source="abfss://transformed-data@{Tên storage account}.dfs.core.windows.net", 
    mount_point="/mnt/transformed-data",
    extra_configs=configs
)

In [None]:
%fs
ls "/mnt/raw-data"

In [None]:
%fs
ls "/mnt/transformed-data"

In [None]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

customerSchema = StructType([
     StructField("CustomerID", IntegerType()),
     StructField("FistName", StringType()),
     StructField("LastName", StringType()),
     StructField("Email", StringType()),
     StructField("Phone", StringType())
])
customer_raw = spark.read.format("csv").option("header", "true").load("/mnt/raw-data/customer_raw", schema=customerSchema)
display(customer_raw)

In [None]:
# Transform data
customer_raw.createOrReplaceTempView("customer_raw")
dim_customer = spark.sql("""SELECT CustomerID AS id,
                            CONCAT(FistName, ' ', LastName) AS name,
                            Email as email,
                            Phone as phone
                            FROM customer_raw""")
display(dim_customer)

In [None]:
# dim_customer.write.format("com.databricks.spark.csv").option("header","true").option("delimiter", ",").mode("overwrite").save("/mnt/transformed-data/dim_customer")
dim_customer.write.format("delta").option("overwriteSchema", "true").mode("overwrite").save("/mnt/transformed-data/dim_customer")