In [0]:
# Silver: monthly FX averages (pivoted) + monthly imports + EUR conversion
# Outputs (managed Delta):
#   fx_impact.silver_fx_monthly_long
#   fx_impact.silver_fx_monthly
#   fx_impact.silver_imports_monthly
#   fx_impact.silver_monthly_fact


In [0]:
# Cell 1 — Ensure schema exists
spark.sql("CREATE DATABASE IF NOT EXISTS fx_impact")


In [0]:
# Cell 2 — FX monthly (long) with quality fields
spark.sql("""
CREATE OR REPLACE TABLE fx_impact.silver_fx_monthly_long AS
WITH fx AS (
  SELECT
    to_date(date_trunc('month', date)) AS month,
    currency,
    AVG(fx_rate) AS fx_avg,
    COUNT(fx_rate) AS obs_days,
    -- number of days in month
    datediff(last_day(to_date(date_trunc('month', date))),
             to_date(date_trunc('month', date))) + 1 AS days_total
  FROM fx_impact.bronze_ecb_fx_rates
  GROUP BY 1,2
)
SELECT
  month,
  currency,
  fx_avg,
  obs_days,
  days_total,
  CASE WHEN days_total > 0 THEN 1.0 - (obs_days * 1.0 / days_total) ELSE NULL END AS null_share
FROM fx
""")


In [0]:
# Cell 3 — FX monthly (pivoted) + USD quality columns
spark.sql("""
CREATE OR REPLACE TABLE fx_impact.silver_fx_monthly AS
WITH p AS (
  SELECT
    month,
    MAX(CASE WHEN currency='USD' THEN fx_avg END) AS USD_per_EUR,
    MAX(CASE WHEN currency='JPY' THEN fx_avg END) AS JPY_per_EUR,
    MAX(CASE WHEN currency='CNY' THEN fx_avg END) AS CNY_per_EUR
  FROM fx_impact.silver_fx_monthly_long
  GROUP BY month
),
q AS (
  SELECT
    month,
    MAX(CASE WHEN currency='USD' THEN obs_days   END) AS usd_obs_days,
    MAX(CASE WHEN currency='USD' THEN days_total END) AS usd_days_total,
    MAX(CASE WHEN currency='USD' THEN null_share END) AS usd_null_share
  FROM fx_impact.silver_fx_monthly_long
  GROUP BY month
)
SELECT
  p.month,
  USD_per_EUR, JPY_per_EUR, CNY_per_EUR,
  usd_obs_days, usd_days_total, usd_null_share
FROM p
LEFT JOIN q USING(month)
""")


In [0]:
# Cell 4 — Imports monthly (USD) by HS-2
spark.sql("""
CREATE OR REPLACE TABLE fx_impact.silver_imports_monthly AS
SELECT
  to_date(date_trunc('month', period_date)) AS month,
  cmdCode,
  COALESCE(MAX(cmdDesc), cmdCode)          AS cmdDesc,
  SUM(TradeValue)                           AS import_usd
FROM fx_impact.bronze_comtrade_imports
GROUP BY 1,2
""")


In [0]:
# Cell 5 — Join + EUR conversion (+ flags)
spark.sql("""
CREATE OR REPLACE TABLE fx_impact.silver_monthly_fact AS
SELECT
  i.month,
  i.cmdCode,
  i.cmdDesc,
  i.import_usd,
  f.USD_per_EUR, f.JPY_per_EUR, f.CNY_per_EUR,
  f.usd_obs_days, f.usd_days_total, f.usd_null_share,
  -- USD value to EUR using USD_per_EUR (target per 1 EUR)
  CASE WHEN f.USD_per_EUR IS NOT NULL THEN i.import_usd / f.USD_per_EUR END AS import_eur,
  CASE WHEN f.USD_per_EUR IS NULL THEN 1 ELSE 0 END AS fx_missing_flag,
  -- Period label for later slicing
  CASE
    WHEN i.month <  DATE('2020-01-01') THEN 'Pre-COVID'
    WHEN i.month <= DATE('2021-12-31') THEN 'During COVID'
    ELSE 'Post-COVID'
  END AS covid_period
FROM fx_impact.silver_imports_monthly i
LEFT JOIN fx_impact.silver_fx_monthly f
  ON i.month = f.month
""")


In [0]:
# Cell 6 — DQ checks (Silver)
from pyspark.sql import functions as F

fact = spark.table("fx_impact.silver_monthly_fact")

# 1) No duplicates on (month, cmdCode)
dups = fact.groupBy("month","cmdCode").count().filter("count > 1")
assert dups.count() == 0, "Duplicate (month, cmdCode) in silver_monthly_fact."

# 2) import_usd should be > 0
assert fact.filter(F.col("import_usd") <= 0).count() == 0, "Non-positive import_usd found."

# 3) FX availability
miss = fact.filter("fx_missing_flag = 1").count()
total = fact.count()
print(f"FX-missing rows: {miss}/{total} ({(miss/total*100 if total else 0):.2f}%)")


In [0]:
# Cell 7 — Quick profile / preview
spark.sql("""
SELECT
  MIN(month) AS min_month,
  MAX(month) AS max_month,
  COUNT(*)   AS rows,
  SUM(CASE WHEN fx_missing_flag=1 THEN 1 ELSE 0 END) AS rows_fx_missing
FROM fx_impact.silver_monthly_fact
""").show()

spark.sql("""
SELECT month, cmdCode, import_usd, import_eur, covid_period
FROM fx_impact.silver_monthly_fact
ORDER BY month, cmdCode
LIMIT 20
""").show()
