In [0]:
CREATE OR REFRESH LIVE TABLE DimDate AS
SELECT
    CAST(date_format(d, 'yyyyMMdd') AS INT) AS date_key,
    d AS full_date,
    year(d) AS year,
    month(d) AS month,
    day(d) AS day_of_month,
    dayofweek(d) AS day_of_week,
    weekofyear(d) AS week_of_year,
    quarter(d) AS quarter,
    CASE WHEN dayofweek(d) IN (1,7) THEN TRUE ELSE FALSE END AS is_weekend,
    current_timestamp() AS load_datetime
FROM (
    SELECT explode(sequence(date('1900-01-01'), date('2050-12-31'), interval 1 day)) AS d
) s;
CREATE OR REFRESH LIVE TABLE DimTitle AS
WITH base AS (
    SELECT
        b.title_imdb_id AS tconst,
        b.primary_title,
        b.original_title,
        b.title_type,
        b.is_adult AS is_adult_flag,
        b.start_year,
        b.end_year,
        b.runtime_minutes,
        CASE
            WHEN b.runtime_minutes IS NULL THEN 'Unknown'
            WHEN b.runtime_minutes < 30 THEN '< 30 mins'
            WHEN b.runtime_minutes BETWEEN 30 AND 59 THEN '30–59 mins'
            WHEN b.runtime_minutes BETWEEN 60 AND 119 THEN '60–119 mins'
            ELSE '120+ mins'
        END AS runtime_bucket,
        b.is_series AS is_series_flag,
        CASE WHEN e.TCONST IS NOT NULL THEN TRUE ELSE FALSE END AS is_episode_flag,
        b.bronze_source_file AS source_file
    FROM workspace.imdb_final.title_basics_silver_clean b
    LEFT JOIN workspace.imdb_final.silver_title_episode_clean e
      ON e.TCONST = b.title_imdb_id
)
SELECT
    ROW_NUMBER() OVER (ORDER BY tconst) AS title_key,
    *,
    current_timestamp() AS load_datetime,
    current_date() AS scd_effective_from,
    DATE('9999-12-31') AS scd_effective_to,
    TRUE AS scd_is_current,
    1 AS scd_version
FROM base;
CREATE OR REFRESH LIVE TABLE DimPerson AS
WITH base AS (
    SELECT
        n.person_imdb_id AS nconst,
        n.primary_name,
        n.birth_year,
        n.death_year,
        CASE WHEN n.is_deceased THEN FALSE ELSE TRUE END AS is_alive_flag,
        concat_ws(',', n.known_for_titles) AS known_for_titles,
        n.bronze_source_file AS source_file
    FROM workspace.imdb_final.name_basics_silver_clean n
)
SELECT
    ROW_NUMBER() OVER (ORDER BY nconst) AS person_key,
    *,
    current_timestamp() AS load_datetime,
    current_date() AS scd_effective_from,
    DATE('9999-12-31') AS scd_effective_to,
    TRUE AS scd_is_current,
    1 AS scd_version
FROM base;
CREATE OR REFRESH LIVE TABLE DimGenre AS
WITH genres AS (
    SELECT DISTINCT
        lower(trim(g)) AS genre_name
    FROM workspace.imdb_final.title_basics_silver_clean b
    LATERAL VIEW explode(b.genres_array) AS g
    WHERE g IS NOT NULL AND trim(g) <> ''
)
SELECT
    ROW_NUMBER() OVER (ORDER BY genre_name) AS genre_key,
    genre_name,
    current_timestamp() AS load_datetime
FROM genres;
CREATE OR REFRESH LIVE TABLE DimRegion AS
WITH regions AS (
    SELECT DISTINCT
        region_code,
        region_name
    FROM workspace.imdb_final.title_akas_silver_clean
    WHERE region_code IS NOT NULL
)
SELECT
    ROW_NUMBER() OVER (ORDER BY region_code) AS region_key,
    region_code,
    region_name,
    current_timestamp() AS load_datetime
FROM regions;
CREATE OR REFRESH LIVE TABLE DimLanguage AS
WITH langs AS (
    SELECT DISTINCT
        language_code,
        language_name
    FROM workspace.imdb_final.title_akas_silver_clean
    WHERE language_code IS NOT NULL
)
SELECT
    ROW_NUMBER() OVER (ORDER BY language_code) AS language_key,
    language_code,
    language_name,
    current_timestamp() AS load_datetime
FROM langs;
CREATE OR REFRESH LIVE TABLE DimJob AS
WITH jobs AS (
    SELECT DISTINCT
        COALESCE(NULLIF(Job, ''), Category) AS job_name,
        Category AS job_category_group
    FROM workspace.imdb_final.imdb_title_principals_silver
    WHERE COALESCE(NULLIF(Job, ''), Category) IS NOT NULL
)
SELECT
    ROW_NUMBER() OVER (ORDER BY job_name) AS job_key,
    job_name,
    job_category_group,
    current_timestamp() AS load_datetime
FROM jobs;
CREATE OR REFRESH LIVE TABLE DimCharacter AS
WITH chars AS (
    SELECT DISTINCT
        Characters AS character_name
    FROM workspace.imdb_final.imdb_title_principals_silver
    WHERE Characters IS NOT NULL AND Characters <> '[]'
)
SELECT
    ROW_NUMBER() OVER (ORDER BY character_name) AS character_key,
    character_name
FROM chars;
CREATE OR REFRESH LIVE TABLE DimProfession AS
WITH profs AS (
    SELECT DISTINCT
        lower(trim(p)) AS profession_name
    FROM workspace.imdb_final.name_basics_silver_clean n
    LATERAL VIEW explode(n.primary_professions) AS p
    WHERE p IS NOT NULL AND trim(p) <> ''
)
SELECT
    ROW_NUMBER() OVER (ORDER BY profession_name) AS profession_key,
    profession_name,
    current_timestamp() AS load_datetime
FROM profs;

CREATE OR REFRESH LIVE TABLE BridgePersonProfession AS
SELECT DISTINCT
    dp.person_key,
    dprof.profession_key,
    TRUE AS is_primary_flag,
    current_timestamp() AS load_datetime
FROM workspace.imdb_final.name_basics_silver_clean n
JOIN workspace.imdb_final.DimPerson dp
    ON dp.nconst = n.person_imdb_id AND dp.scd_is_current = TRUE
JOIN workspace.imdb_final.DimProfession dprof
    ON array_contains(
        transform(n.primary_professions, x -> lower(trim(x))),
        lower(trim(dprof.profession_name))
    )
LATERAL VIEW explode(n.primary_professions) AS prof
WHERE lower(trim(prof)) = lower(trim(dprof.profession_name));
CREATE OR REFRESH LIVE TABLE FactTitleGenre AS
SELECT DISTINCT
    dt.title_key,
    dg.genre_key,
    current_timestamp() AS load_datetime
FROM workspace.imdb_final.title_basics_silver_clean b
JOIN workspace.imdb_final.DimTitle dt
    ON dt.tconst = b.title_imdb_id AND dt.scd_is_current = TRUE
JOIN workspace.imdb_final.DimGenre dg
    ON array_contains(
        transform(b.genres_array, x -> lower(trim(x))),
        lower(trim(dg.genre_name))
    )
LATERAL VIEW explode(b.genres_array) AS g
WHERE lower(trim(g)) = lower(trim(dg.genre_name));
CREATE OR REFRESH LIVE TABLE FactTitleAvailability AS
SELECT
    dt.title_key,
    dr.region_key,
    dl.language_key,
    a.aka_title,
    a.is_original_title AS is_original_title_flag,
    concat_ws(',', a.types) AS types_array,
    concat_ws(',', a.attributes) AS attribute_array,
    current_timestamp() AS load_datetime
FROM workspace.imdb_final.title_akas_silver_clean a
JOIN workspace.imdb_final.DimTitle dt
    ON dt.tconst = a.title_imdb_id AND dt.scd_is_current = TRUE
LEFT JOIN workspace.imdb_final.DimRegion dr
    ON dr.region_code = a.region_code
LEFT JOIN workspace.imdb_final.DimLanguage dl
    ON dl.language_code = a.language_code;
CREATE OR REFRESH LIVE TABLE FactTitleRating AS
WITH today_key AS (
    SELECT date_key
    FROM workspace.imdb_final.DimDate
    WHERE full_date = current_date()
)
SELECT
    dt.title_key,
    tk.date_key,
    CAST(r.Average_Rating AS DECIMAL(4,2)) AS average_rating,
    r.Num_Votes AS num_votes,
    current_timestamp() AS load_datetime
FROM workspace.imdb_final.imdb_title_ratings_silver r
JOIN workspace.imdb_final.DimTitle dt
    ON dt.tconst = r.TCONST AND dt.scd_is_current = TRUE
CROSS JOIN today_key tk;
-- 14. FactEpisode  (from silver_title_episode_clean)
CREATE OR REFRESH LIVE TABLE FactEpisode AS
SELECT
    child.title_key  AS episode_title_key,
    parent.title_key AS parent_title_key,
    e.Season_Number  AS season_number,
    e.Episode_Number AS episode_number,
    current_timestamp() AS load_datetime
FROM workspace.imdb_final.silver_title_episode_clean e
JOIN workspace.imdb_final.DimTitle child
  ON child.tconst = e.TCONST AND child.scd_is_current = TRUE
JOIN workspace.imdb_final.DimTitle parent
  ON parent.tconst = e.Parent_TCONST AND parent.scd_is_current = TRUE;

CREATE OR REFRESH LIVE TABLE FactTitleCrewRole_Principals AS
WITH today_key AS (
    SELECT date_key FROM workspace.imdb_final.DimDate WHERE full_date = current_date()
)
SELECT
    dt.title_key,
    dp.person_key,
    dj.job_key,
    dc.character_key,
    p.Ordering AS ordering,
    TRUE AS is_principal_flag,
    'principals' AS role_source,
    tk.date_key AS date_key,
    current_timestamp() AS load_datetime
FROM workspace.imdb_final.imdb_title_principals_silver p
JOIN workspace.imdb_final.DimTitle dt
    ON dt.tconst = p.TCONST AND dt.scd_is_current = TRUE
JOIN workspace.imdb_final.DimPerson dp
    ON dp.nconst = p.NCONST AND dp.scd_is_current = TRUE
LEFT JOIN workspace.imdb_final.DimJob dj
    ON dj.job_name = COALESCE(NULLIF(p.Job, ''), p.Category)
LEFT JOIN workspace.imdb_final.DimCharacter dc
    ON dc.character_name = p.Characters
CROSS JOIN today_key tk;
CREATE OR REFRESH LIVE TABLE FactTitleCrewRole_TitleCrew AS
WITH today_key AS (
  SELECT date_key FROM workspace.imdb_final.DimDate WHERE full_date = current_date()
),
director_roles AS (
  SELECT
    c.TCONST,
    nconst AS NCONST,
    'director' AS job_name
  FROM workspace.imdb_final.silver_title_crew_clean c
  LATERAL VIEW explode(c.Directors_Array) AS nconst
  WHERE c.Has_Directors = TRUE
),
writer_roles AS (
  SELECT
    c.TCONST,
    nconst AS NCONST,
    'writer' AS job_name
  FROM workspace.imdb_final.silver_title_crew_clean c
  LATERAL VIEW explode(c.Writers_Array) AS nconst
  WHERE c.Has_Writers = TRUE
),
crew_union AS (
  SELECT * FROM director_roles
  UNION ALL
  SELECT * FROM writer_roles
)
SELECT
    dt.title_key,
    dp.person_key,
    dj.job_key,
    FALSE AS is_principal_flag,
    'title_crew' AS role_source,
    tk.date_key AS date_key,
    current_timestamp() AS load_datetime
FROM crew_union c
JOIN workspace.imdb_final.DimTitle dt
  ON dt.tconst = c.TCONST AND dt.scd_is_current = TRUE
JOIN workspace.imdb_final.DimPerson dp
  ON dp.nconst = c.NCONST AND dp.scd_is_current = TRUE
LEFT JOIN workspace.imdb_final.DimJob dj
  ON dj.job_name = c.job_name
CROSS JOIN today_key tk;