In [None]:
# Check if the directory is already mounted
if dbutils.fs.mounts() and any(mount.mountPoint == "/mnt/dataEngineering" for mount in dbutils.fs.mounts()):
   # Unmount the directory if it is already mounted
   dbutils.fs.unmount("/mnt/dataEngineering")

# Delete the existing directory recursively
dbutils.fs.rm("/mnt/dataEngineering", recurse=True)

configs = {"fs.azure.account.auth.type": "OAuth",
           "fs.azure.account.oauth2.client.id": "Application (client) ID của App registration",
           "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
           "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/Directory (tenant) ID của App registration/oauth2/token",
           "fs.azure.account.oauth2.client.secret": "Value của Certificates & secrets trong App registration"}
           
# Mount the directory again
dbutils.fs.mount(
    source="abfss://raw-data@{Tên storage account}.dfs.core.windows.net", 
    mount_point="/mnt/dataEngineering",
    extra_configs=configs
)

In [None]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

customerSchema = StructType([
     StructField("CustomerID", IntegerType()),
     StructField("FistName", StringType()),
     StructField("LastName", StringType()),
     StructField("Email", StringType()),
     StructField("Phone", StringType())
])
customer_raw = spark.read.format("csv").option("header", "true").load("/mnt/dataEngineering/customer.csv", schema=customerSchema)
display(customer_raw)

In [None]:
# Transform data
transformed_customer = customer_raw.orderBy("CustomerID")
display(transformed_customer)

In [None]:
table_name = "Customer"
transformed_customer.write.format("delta").mode("overwrite").saveAsTable(f"TransformedData.{table_name}")
spark.sql(f"DESCRIBE EXTENDED TransformedData.{table_name}").show(truncate=False)

In [None]:
%sql
SHOW TABLES IN TransformedData;

In [None]:
%sql
SELECT * 
FROM TransformedData.Customer
LIMIT 20;