In [0]:
import pyspark.sql.functions as F
import pyspark.sql.types as T

### Data Reading

In [0]:
df = (
    spark
    .read
    .format('parquet')
    .load('abfss://bronze@databricksstorageete.dfs.core.windows.net/customers')
)

In [0]:
display(df)

In [0]:
transformed_df = (
    df
    .withColumn('email_domain', F.element_at(F.split('email', '@'), -1))
    .withColumn('full_name', F.concat('first_name', F.lit(' '), 'last_name'))
)
transformed_df.display()

In [0]:
(
    transformed_df
    .groupBy('email_domain')
    .agg(F.count('customer_id').alias('count_of_customers'))
    .sort(F.desc('count_of_customers'))
    .display()
)

In [0]:
transformed_df_gmail = (
    transformed_df
    .filter(F.col('email_domain') == 'gmail.com') 
)
# transformed_df_gmail.display()

transformed_df_hotmail = (
    transformed_df
    .filter(F.col('email_domain') == 'hotmail.com') 
)
# transformed_df_hotmail.display()

transformed_df_yahoo = (
    transformed_df
    .filter(F.col('email_domain') == 'yahoo.com') 
)
# transformed_df_yahoo.display()

In [0]:
(
    transformed_df
    .write
    .format('delta')
    .mode('overwrite')
    .save('abfss://silver@databricksstorageete.dfs.core.windows.net/customers')
)

In [0]:
%sql
CREATE TABLE IF NOT EXISTS databricks_cata.silver.customers
USING DELTA
LOCATION 'abfss://silver@databricksstorageete.dfs.core.windows.net/customers'

In [0]:
%sql
SELECT * FROM databricks_cata.silver.customers;