Get the api_key

In [0]:
dbutils.widgets.text("api_key", "")

Running previous code

In [0]:
%run ./fixer_api_injection

Creating Bronze Table

In [0]:
# Clear incorrect fixer_bronze table location
dbutils.fs.rm("dbfs:/user/hive/warehouse/fixer_bronze", True)
spark.sql("DROP TABLE IF EXISTS fixer_bronze")


DataFrame[]

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import SparkSession

class Bronze():
    def __init__(self): 
        self.base_data_dir = "/mnt/raw_data"

    def getSchema(self):
        """Defines the schema for the Fixer API JSON data."""
        return """base string, 
                  date string, 
                  historical boolean, 
                  rates map<string, double>,  
                  success boolean, 
                  timestamp long"""

    def read_fixer_file_batch(self):
        """Reads batch JSON data from the Bronze layer."""
        return (spark.read
                    .format("json")
                    .schema(self.getSchema())
                    .load(self.base_data_dir)
                )

    def process_batch(self):
        print("\nStarting Batch Bronze Ingestion...")
        
        apiDF = self.read_fixer_file_batch()

        # Write batch dataframe into the Delta table
        apiDF.write.format("delta") \
            .mode("overwrite") \
            .option("overwriteSchema", "true") \
            .saveAsTable("fixer_bronze")

        print("✅ Bronze Batch Ingestion Done")

# Run Bronze batch ingestion
bronze_layer = Bronze()
bronze_layer.process_batch()





Starting Batch Bronze Ingestion...
✅ Bronze Batch Ingestion Done


Fetching data for 2024-01-01...
Wrote 159 bytes.
✅ Data for 2024-01-01 saved at /mnt/raw_data/2024-01-01.json
Fetching data for 2024-01-02...
Wrote 161 bytes.
✅ Data for 2024-01-02 saved at /mnt/raw_data/2024-01-02.json
Fetching data for 2024-01-03...
Wrote 160 bytes.
✅ Data for 2024-01-03 saved at /mnt/raw_data/2024-01-03.json
Fetching data for 2024-01-04...
Wrote 162 bytes.
✅ Data for 2024-01-04 saved at /mnt/raw_data/2024-01-04.json
Fetching data for 2024-01-05...
Wrote 162 bytes.
✅ Data for 2024-01-05 saved at /mnt/raw_data/2024-01-05.json
Fetching data for 2024-01-06...
Wrote 162 bytes.
✅ Data for 2024-01-06 saved at /mnt/raw_data/2024-01-06.json
Fetching data for 2024-01-07...
Wrote 162 bytes.
✅ Data for 2024-01-07 saved at /mnt/raw_data/2024-01-07.json
Fetching data for 2024-01-08...
Wrote 160 bytes.
✅ Data for 2024-01-08 saved at /mnt/raw_data/2024-01-08.json
Fetching data for 2024-01-09...
Wrote 160 bytes.
✅ Data for 2024-01-09 saved at /mnt/raw_data/2024-01-09.json
Fetching d

path,name,size,modificationTime
dbfs:/mnt/raw_data/2024-01-01.json,2024-01-01.json,159,1742428127000
dbfs:/mnt/raw_data/2024-01-02.json,2024-01-02.json,161,1742428128000
dbfs:/mnt/raw_data/2024-01-03.json,2024-01-03.json,160,1742428128000
dbfs:/mnt/raw_data/2024-01-04.json,2024-01-04.json,162,1742428128000
dbfs:/mnt/raw_data/2024-01-05.json,2024-01-05.json,162,1742428129000
dbfs:/mnt/raw_data/2024-01-06.json,2024-01-06.json,162,1742428129000
dbfs:/mnt/raw_data/2024-01-07.json,2024-01-07.json,162,1742428130000
dbfs:/mnt/raw_data/2024-01-08.json,2024-01-08.json,160,1742428130000
dbfs:/mnt/raw_data/2024-01-09.json,2024-01-09.json,160,1742428130000
dbfs:/mnt/raw_data/2024-01-10.json,2024-01-10.json,160,1742428131000


Creating silver table

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import expr

class SilverLayer:
    def __init__(self):
        self.bronze_table = "fixer_bronze"
        self.silver_table = "fixer_silver"
        self.silver_path = "/mnt/raw_data/delta/fixer_silver"

    def transform(self):
        bronzeDF = spark.table(self.bronze_table)

        explodedDF = (
            bronzeDF.selectExpr(
                "base",
                "date",
                "timestamp as modify_time",
                "explode(map_from_arrays(array('CNY', 'JPY', 'USD'), array(rates.CNY, rates.JPY, rates.USD))) AS (currency, rate)"
            )
        )
        return explodedDF

    def write(self, df):
        (
            df.write.format("delta")
              .mode("overwrite")
              .option("path", self.silver_path)
              .saveAsTable(self.silver_table)
        )
        print(f"✅ Data written to Silver table '{self.silver_table}' successfully.")

# Execute Silver Transformation
silver_layer = SilverLayer()
silver_df = silver_layer.transform()
silver_layer.write(silver_df)





✅ Data written to Silver table 'fixer_silver' successfully.


creating gold table

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import round

class GoldLayer:
    def __init__(self):
        self.silver_table = "fixer_silver"
        self.gold_table = "fixer_gold"
        self.gold_path = "/mnt/raw_data/delta/fixer_gold"

    def read_silver(self):
        return spark.table("fixer_silver")

    def pivotDF(self, silverDF):
        """Pivots currency data to separate columns."""
        return (silverDF
                .groupBy("date", "modify_time")
                .pivot("currency", ["AUD", "CNY", "JPY", "USD"])
                .agg({"rate": "first"})
                .orderBy("date")
               )

    def transform(self):
        silverDF = self.read()
        pivotedDF = self.pivotDF(silverDF)
        return pivotDF

    def write(self, df):
        (df.write
           .format("delta")
           .mode("overwrite")
           .option("path", "/mnt/raw_data/delta/fixer_gold")
           .saveAsTable(self.gold_table)
        )
        print("✅ Gold table written successfully.")

# Execute Gold Transformation
gold_layer = GoldLayer()
silverDF = gold_layer.read_silver()
pivotedDF = gold_layer.pivotDF(silverDF)
gold_layer.write(pivotedDF)


✅ Gold table written successfully.


In [0]:
%sql
select * from fixer_gold

date,modify_time,AUD,CNY,JPY,USD
2024-01-01,1704153599,,4.820522,96.109802,0.681
2024-01-02,1704239999,,4.803307,96.177403,0.67656
2024-01-03,1704326399,,4.787848,96.310885,0.6735
2024-01-04,1704412799,,4.774984,97.083568,0.670599
2024-01-05,1704499199,,4.762146,96.976096,0.670488
2024-01-06,1704585599,,4.762146,96.976096,0.670488
2024-01-07,1704671999,,4.770238,97.209734,0.671628
2024-01-08,1704758399,,4.766928,96.896729,0.6718
2024-01-09,1704844799,,4.753351,96.531584,0.6683
2024-01-10,1704931199,,4.806274,97.675613,0.6702


Databricks visualization. Run in Databricks to view.