In [1]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window

print("WORLD BANK GDP - BRONZE TO SILVER")

print("\nReading Bronze...")
df_bronze = spark.table("bronze_worldbank_gdp")

print(f"Bronze records: {df_bronze.count()}")
print(f"\nBronze schema:")
df_bronze.printSchema()

print(f"\nBronze sample:")
df_bronze.show(5)

print("\nTransforming...")

df_silver = df_bronze \
    .withColumn("year", col("date").cast("int")) \
    .withColumn("gdp_value_usd", col("gdp_value").cast("double")) \
    .withColumn("gdp_value_billions", round(col("gdp_value") / 1e9, 2)) \
    .withColumn("ingestion_timestamp", current_timestamp()) \
    .select(
        "year",
        "country",
        "indicator",
        "gdp_value_usd",
        "gdp_value_billions",
        "ingestion_timestamp"
    ) \
    .orderBy("year", ascending=False)

df_silver = df_silver \
    .withColumn("year_over_year_growth_pct", 
        round((col("gdp_value_billions") - lag("gdp_value_billions", 1).over(Window.orderBy("year"))) 
              / lag("gdp_value_billions", 1).over(Window.orderBy("year")) * 100, 2))

print(f"Silver records: {df_silver.count()}")

print("\nSilver data summary...")

print(f"\nGDP Trends:")
df_silver.select("year", "gdp_value_billions").show(10)

print(f"\nStatistics:")
df_silver.select(
    min("year").alias("earliest_year"),
    max("year").alias("latest_year"),
    round(min("gdp_value_billions"), 2).alias("min_gdp_billions"),
    round(max("gdp_value_billions"), 2).alias("max_gdp_billions")
).show()

print("\nSaving to Silver...")

df_silver.write \
    .mode("overwrite") \
    .format("delta") \
    .saveAsTable("silver_gdp")

df_silver.createOrReplaceTempView("silver_worldbank_gdp")
print(f"Created view: silver_worldbank_gdp")

print("GDP SILVER COMPLETE!")

StatementMeta(, 49dacf51-26da-4ced-81a5-77ec8cfaedc8, 3, Finished, Available, Finished)

WORLD BANK GDP - BRONZE TO SILVER

Reading Bronze...
Bronze records: 25

Bronze schema:
root
 |-- indicator: string (nullable = true)
 |-- country: string (nullable = true)
 |-- date: string (nullable = true)
 |-- gdp_value: string (nullable = true)


Bronze sample:
+-----------------+-------------+----+----------------+
|        indicator|      country|date|       gdp_value|
+-----------------+-------------+----+----------------+
|GDP (current US$)|United States|2024|28750956130731.2|
|GDP (current US$)|United States|2023|27292170793214.4|
|GDP (current US$)|United States|2022|  25604848907611|
|GDP (current US$)|United States|2021|  23315080560000|
|GDP (current US$)|United States|2020|  21060473613000|
+-----------------+-------------+----+----------------+
only showing top 5 rows


Transforming...
Silver records: 25

Silver data summary...

GDP Trends:
+----+------------------+
|year|gdp_value_billions|
+----+------------------+
|2024|          28750.96|
|2023|          27292.17|
|