In [1]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window

print("POPULATING DimFX")

print("\nReading silver_ecb_fx_rates...")
df_fx = spark.table("silver_ecb_fx_rates")

print(f"Silver FX records: {df_fx.count():,}")

df_dim_fx = df_fx.select(
    col("date").alias("Date"),
    lit("USD").alias("CurrencyFrom"),
    lit("EUR").alias("CurrencyTo"),
    col("usd_eur_rate").alias("ExchangeRate")
).distinct()

df_dim_fx = df_dim_fx.withColumn(
    "FXKey",
    row_number().over(Window.orderBy("Date"))
)

df_dim_fx = df_dim_fx.select(
    "FXKey", "Date", "CurrencyFrom", "CurrencyTo", "ExchangeRate"
)

print("\nSample data:")
df_dim_fx.show(10)

print("\nWriting to Lakehouse table: DimFX")
df_dim_fx.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("DimFX")

print(f"DimFX created with {df_dim_fx.count():,} records!")

StatementMeta(, dd3d86f4-7c00-4cc0-933c-a0d59c573438, 3, Finished, Available, Finished)

POPULATING DimFX

Reading silver_ecb_fx_rates...
Silver FX records: 1,538

Sample data:
+-----+----------+------------+----------+------------+
|FXKey|      Date|CurrencyFrom|CurrencyTo|ExchangeRate|
+-----+----------+------------+----------+------------+
|    1|2019-01-02|         USD|       EUR|      1.1397|
|    2|2019-01-03|         USD|       EUR|      1.1348|
|    3|2019-01-04|         USD|       EUR|      1.1403|
|    4|2019-01-07|         USD|       EUR|      1.1445|
|    5|2019-01-08|         USD|       EUR|       1.144|
|    6|2019-01-09|         USD|       EUR|      1.1455|
|    7|2019-01-10|         USD|       EUR|      1.1535|
|    8|2019-01-11|         USD|       EUR|      1.1533|
|    9|2019-01-14|         USD|       EUR|      1.1467|
|   10|2019-01-15|         USD|       EUR|      1.1424|
+-----+----------+------------+----------+------------+
only showing top 10 rows


Writing to Lakehouse table: DimFX
DimFX created with 1,538 records!
