In [0]:
from pyspark.sql.functions import col, monotonically_increasing_id
from delta.tables import DeltaTable

# CREATE FLAG PARAMETER

In [0]:
dbutils.widgets.text('incremental_flag','0')

In [0]:
incremental_flag = dbutils.widgets.get('incremental_flag')

# CREATING DIMENSION MODEL

### Fetch Relative Column

In [0]:
df_scr = spark.sql('''
SELECT DISTINCT(Dealer_ID) as Dealer_ID , DealerName
FROM PARQUET.`abfss://silver@storageact04.dfs.core.windows.net/carsales`
'''
)

In [0]:
df_scr.display()

Dealer_ID,DealerName
DLR0058,Fiat do Brasil Motors
DLR0107,Land Rover Motors
DLR0129,Mia Motors
DLR0111,Lotus Motors
DLR0085,Humber Motors
DLR0001,AC Cars Motors
DLR0218,Lagonda Motors
DLR0082,Honda Motors
DLR0063,Ford do Brasil Motors
DLR0193,Tazzari Motors


### dim_model Sink - Initial and Incremental(just bring the schema if table NOT EXISTS)

In [0]:
if spark.catalog.tableExists('car_catalog.gold.dim_dealer'):
  df_sink = spark.sql('''
    SELECT Dim_dealer_key, Dealer_ID , DealerName
    FROM car_catalog.gold.dim_dealer
    '''
    )  
else:
    df_sink = spark.sql('''
    SELECT 1 AS Dim_dealer_key, Dealer_ID , DealerName
    FROM PARQUET.`abfss://silver@storageact04.dfs.core.windows.net/carsales`
    WHERE 1=0
    '''
    )

### Filtering new records and Updating old records

In [0]:
df_filter = df_scr.join(df_sink, df_scr['Dealer_ID'] == df_sink['Dealer_ID'], 'left').select(df_scr['Dealer_ID'],df_scr['DealerName'],df_sink['Dim_dealer_key'])

In [0]:
df_filter.display()

Dealer_ID,DealerName,Dim_dealer_key
DLR0058,Fiat do Brasil Motors,
DLR0107,Land Rover Motors,
DLR0129,Mia Motors,
DLR0111,Lotus Motors,
DLR0085,Humber Motors,
DLR0001,AC Cars Motors,
DLR0218,Lagonda Motors,
DLR0082,Honda Motors,
DLR0063,Ford do Brasil Motors,
DLR0193,Tazzari Motors,


**DF Filter Old**

In [0]:
df_filter_old = df_filter.filter(col('dim_dealer_key').isNotNull())

In [0]:
df_filter_old.display()

Dealer_ID,DealerName,Dim_dealer_key


**DF_Filter_New**

In [0]:
df_filter_new = df_filter.filter(col('dim_dealer_key').isNull()).select(df_filter['Dealer_ID'],df_filter['DealerName'])

In [0]:
df_filter_new.display()

Dealer_ID,DealerName
DLR0058,Fiat do Brasil Motors
DLR0107,Land Rover Motors
DLR0129,Mia Motors
DLR0111,Lotus Motors
DLR0085,Humber Motors
DLR0001,AC Cars Motors
DLR0218,Lagonda Motors
DLR0082,Honda Motors
DLR0063,Ford do Brasil Motors
DLR0193,Tazzari Motors


### Create Surrogate Key 

**Fetch the max Surrogate Key**

In [0]:
if incremental_flag ==  '0' :
    max_value = 1
else:
    max_value_df = spark.sql("SELECT max(Dim_dealer_key) FROM cars_catalog.gold.dim_dealer")
    max_value = max_value_df.collect()[0][0]+1

**Create Surrogate Key column and Add the max Surrogate key**

In [0]:
max_value

1

In [0]:
df_filter_new = df_filter_new.withColumn('Dim_dealer_key', max_value + monotonically_increasing_id())

### Create Final Filter - df_filter_old + df_filter_new

In [0]:
df_final = df_filter_new.union(df_filter_old)

In [0]:
df_final.display()

Dealer_ID,DealerName,Dim_dealer_key
DLR0058,Fiat do Brasil Motors,1
DLR0107,Land Rover Motors,2
DLR0129,Mia Motors,3
DLR0111,Lotus Motors,4
DLR0085,Humber Motors,5
DLR0001,AC Cars Motors,6
DLR0218,Lagonda Motors,7
DLR0082,Honda Motors,8
DLR0063,Ford do Brasil Motors,9
DLR0193,Tazzari Motors,10


# SCD TYPE - 1 (UPSERT)

In [0]:
#Incremental Run 
if spark.catalog.tableExists('car_catalog.gold.dim_dealer'):
    delta_tbl = DeltaTable.forPath(spark, 'abfss://gold@storageact0.dfs.core.windows.net/dim_dealer')
    delta_tbl.alias('trg').merge(df_final.alias('src'), 'trg.dim_dealer_key = src.Dim_dealer_key')\
                            .whenMatchedUpdateAll()\
                            .whenNotMatchedInsertAll()\
                            .execute()
    
#Initial run
else:
    df_final.write.format('delta')\
    .mode('overwrite')\
    .option('path','abfss://gold@storageact0.dfs.core.windows.net/dim_dealer')\
    .saveAsTable('cars_catalog.gold.dim_dealer')

In [0]:
%sql
SELECT * FROM cars_catalog.gold.dim_dealer

Dealer_ID,DealerName,Dim_dealer_key
DLR0058,Fiat do Brasil Motors,1
DLR0107,Land Rover Motors,2
DLR0129,Mia Motors,3
DLR0111,Lotus Motors,4
DLR0085,Humber Motors,5
DLR0001,AC Cars Motors,6
DLR0218,Lagonda Motors,7
DLR0082,Honda Motors,8
DLR0063,Ford do Brasil Motors,9
DLR0193,Tazzari Motors,10
