## Project: Delta Lake for Movie Jsons

In [None]:
# path
import re
source_path = '/mnt/antrastrg01adls/adlscontainer01/dataset/'
raw_path = '/FileStore/movie/raw/'
bronze_path = '/FileStore/movie/bronze/'
silver_path = '/FileStore/movie/silver/'
gold_path = '/FileStore/movie/gold/'

### 0. mounted storage to a Raw folder

In [None]:
# Make Raw Idempotent
dbutils.fs.rm(raw_path, recurse=True)

def retrieve_data(file: str, dest_path: str) -> bool:
    """Download file from remote location to driver. Move from driver to DBFS."""
    src = source_path + file
    dest = dest_path + file
    dbutils.fs.cp(src, dest)
    return True

def prepare_raw(dest_path=raw_path) -> bool:
    """Search for movie*.json data files, then call retrieve_data method to download to the destination raw pool."""
    json_pattern = '^movie.*\.json$'
    for f in dbutils.fs.ls(source_path):
        if re.match(json_pattern, f.name) is not None:
            retrieve_data(f.name, dest_path)
            
prepare_raw()

### 1. Raw to Bronze

In [None]:
from pyspark.sql.types import ArrayType, StructType,StructField, StringType, IntegerType, DateType, DoubleType, FloatType, LongType, TimestampType, DateType, BooleanType, MapType
from pyspark.sql.functions import col, explode, current_date, current_timestamp, lit, from_json, upper, lower, row_number, lag, last
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.window import Window

In [None]:
# Prepare Bronze path
dbutils.fs.rm(bronze_path, recurse=True)

In [None]:
# Ingest with a simple schema and metadata
movie_schema = StructType([StructField('movie', ArrayType(StringType()), True),])
    
def raw_to_bronze(src_path=raw_path, dest_path=bronze_path, schema=movie_schema, persist=True) -> DataFrame:
    """
    One by one ingestion to show json file names as the datasource.
    """
    for f in dbutils.fs.ls(src_path):
        movie_raw_df = (spark.read.option("inferSchema", 'false').option('multiline', 'true').schema(movie_schema).json(f.path))
        movie_raw_df = movie_raw_df.withColumn('movie', explode('movie'))
        movie_meta_df = movie_raw_df.select('Movie', lit(f.name).alias('SourceFile'), current_timestamp().alias('IngestTime'), current_timestamp().cast('date').alias('p_IngestDate'), lit('new').alias('Status'))
        if persist:
            movie_meta_df.write.format('delta').partitionBy('p_IngestDate').mode('append').save(dest_path)
        return movie_meta_df

movie_bronze = raw_to_bronze(persist=True)

In [None]:
# Alternatively, batch ingestion with detailed schema
movie_schema = StructType().add('movie', ArrayType(
    StructType([
    StructField('BackdropUrl', StringType(), True),
    StructField('Budget', FloatType(), True),
    StructField('CreatedBy', StringType(), True),
    StructField('CreatedDate', TimestampType(), True),
    StructField('Id', LongType(), True),
    StructField('ImdbUrl', StringType(), True),
    StructField('OriginalLanguage', StringType(), True),
    StructField('Overview', StringType(), True),
    StructField('PosterUrl', StringType(), True),
    StructField('Price', FloatType(), True),
    StructField('ReleaseDate', TimestampType(), True),
    StructField('Revenue', FloatType(), True),
    StructField('RunTime', IntegerType(), True),
    StructField('Tagline', StringType(), True),
    StructField('Title', StringType(), True),
    StructField('TmdbUrl', StringType(), True),
    StructField('UpdatedBy', StringType(), True),
    StructField('UpdatedDate', TimestampType(), True),
    StructField('genres', ArrayType(StructType([
        StructField('id', LongType(), True),
        StructField('name', StringType(), True),
    ])), True),
])
), True)

def raw_to_bronze(src_path=raw_path, dest_path=bronze_path, schema=movie_schema, persist=True) -> DataFrame:
    """
    Batch ingestion with wildcard.
    """
    movie_raw_df = (spark.read.option("inferSchema", 'false').option('multiline', 'true').schema(movie_schema).json(src_path+'movie*.json'))
    movie_raw_df = movie_raw_df.withColumn('movie', explode('movie'))
    movie_meta_df = movie_raw_df.select('Movie', lit('movie_json').alias('SourceFile'), current_timestamp().alias('IngestTime'), current_timestamp().cast('date').alias('p_IngestDate'), lit('new').alias('Status'))
    if persist:
        movie_meta_df.write.format('delta').partitionBy('p_IngestDate').mode('append').save(dest_path)
    return movie_meta_df

movie_bronze = raw_to_bronze(persist=False)

In [None]:
%sql

-- show history of the bronze folder
DESCRIBE HISTORY '/FileStore/movie/bronze'

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
0,2022-08-25T14:50:44.000+0000,1289947930359854,yokurt@yahoo.com,WRITE,"Map(mode -> Append, partitionBy -> [""p_IngestDate""])",,List(936934055869446),0817-160429-43ys5vhs,,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 1249, numOutputBytes -> 522918)",,Databricks-Runtime/10.4.x-scala2.12


In [None]:
# Register the Bronze Table in the Metastore
spark.sql(
    """
    CREATE SCHEMA IF not EXISTS delta_lake
    """
)
spark.sql(
    """
    USE delta_lake
    """
)
spark.sql(
    """
    DROP TABLE IF EXISTS movie_bronze
    """
)
spark.sql(
    f"""
CREATE TABLE movie_bronze
USING DELTA
LOCATION "{bronze_path}"
"""
)

In [None]:
%sql
-- query the bronze table
SELECT * FROM movie_bronze limit 10;

Movie,SourceFile,IngestTime,p_IngestDate,Status
"{""Id"":1,""Title"":""Inception"",""Overview"":""Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: \""inception\"", the implantation of another person's idea into a target's subconscious."",""Tagline"":""Your mind is the scene of the crime."",""Budget"":1.6E8,""Revenue"":8.25532764E8,""ImdbUrl"":""https://www.imdb.com/title/tt1375666"",""TmdbUrl"":""https://www.themoviedb.org/movie/27205"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//9gk7adHYeDvHkCSEqAvQNLV5Uge.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//s3TBrRGB1iav7gFOCNx3H31MoES.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2010-07-15T00:00:00"",""RunTime"":148,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1633333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":6,""name"":""Action""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":2,""Title"":""Interstellar"",""Overview"":""The adventures of a group of explorers who make use of a newly discovered wormhole to surpass the limitations on human space travel and conquer the vast distances involved in an interstellar voyage."",""Tagline"":""Mankind was born on Earth. It was never meant to die here."",""Budget"":1.65E8,""Revenue"":6.75120017E8,""ImdbUrl"":""https://www.imdb.com/title/tt0816692"",""TmdbUrl"":""https://www.themoviedb.org/movie/157336"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//gEU2QniE6E77NI6lCU6MxlNBvIx.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//xJHokMbljvjADYdit5fK5VQsXEG.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2014-11-05T00:00:00"",""RunTime"":169,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1633333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":4,""name"":""Drama""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":3,""Title"":""The Dark Knight"",""Overview"":""Batman raises the stakes in his war on crime. With the help of Lt. Jim Gordon and District Attorney Harvey Dent, Batman sets out to dismantle the remaining criminal organizations that plague the streets. The partnership proves to be effective, but they soon find themselves prey to a reign of chaos unleashed by a rising criminal mastermind known to the terrified citizens of Gotham as the Joker."",""Tagline"":""Why So Serious?"",""Budget"":1.85E8,""Revenue"":1.004558444E9,""ImdbUrl"":""https://www.imdb.com/title/tt0468569"",""TmdbUrl"":""https://www.themoviedb.org/movie/155"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//qJ2tW6WMUDux911r6m7haRef0WH.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//hkBaDkMWbLaf8B1lsWsKX7Ew3Xq.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2008-07-16T00:00:00"",""RunTime"":152,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1633333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":6,""name"":""Action""},{""id"":10,""name"":""Thriller""},{""id"":11,""name"":""Crime""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":4,""Title"":""Deadpool"",""Overview"":""Deadpool tells the origin story of former Special Forces operative turned mercenary Wade Wilson, who after being subjected to a rogue experiment that leaves him with accelerated healing powers, adopts the alter ego Deadpool. Armed with his new abilities and a dark, twisted sense of humor, Deadpool hunts down the man who nearly destroyed his life."",""Tagline"":""Witness the beginning of a happy ending"",""Budget"":5.8E7,""Revenue"":7.831E8,""ImdbUrl"":""https://www.imdb.com/title/tt1431045"",""TmdbUrl"":""https://www.themoviedb.org/movie/293660"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//yGSxMiF0cYuAiyuve5DA6bnWEOI.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//en971MEXui9diirXlogOrPKmsEn.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2016-02-09T00:00:00"",""RunTime"":108,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1633333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":6,""name"":""Action""},{""id"":7,""name"":""Comedy""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":5,""Title"":""The Avengers"",""Overview"":""When an unexpected enemy emerges and threatens global safety and security, Nick Fury, director of the international peacekeeping agency known as S.H.I.E.L.D., finds himself in need of a team to pull the world back from the brink of disaster. Spanning the globe, a daring recruitment effort begins!"",""Tagline"":""Some assembly required."",""Budget"":2.2E8,""Revenue"":1.51955791E9,""ImdbUrl"":""https://www.imdb.com/title/tt0848228"",""TmdbUrl"":""https://www.themoviedb.org/movie/24428"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//RYMX2wcKCBAr24UyPD7xwmjaTn.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//kwUQFeFXOOpgloMgZaadhzkbTI4.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2012-04-25T00:00:00"",""RunTime"":143,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1666667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":6,""name"":""Action""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":6,""Title"":""Avatar"",""Overview"":""In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization."",""Tagline"":""Enter the World of Pandora."",""Budget"":2.37E8,""Revenue"":2.787965087E9,""ImdbUrl"":""https://www.imdb.com/title/tt0499549"",""TmdbUrl"":""https://www.themoviedb.org/movie/19995"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//6EiRUJpuoeQPghrs3YNktfnqOVh.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//AmHOQ7rpHwiaUMRjKXztnauSJb7.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2009-12-10T00:00:00"",""RunTime"":162,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1666667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":2,""name"":""Fantasy""},{""id"":6,""name"":""Action""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":7,""Title"":""Guardians of the Galaxy"",""Overview"":""Light years from Earth, 26 years after being abducted, Peter Quill finds himself the prime target of a manhunt after discovering an orb wanted by Ronan the Accuser."",""Tagline"":""All heroes start somewhere."",""Budget"":1.7E8,""Revenue"":7.727766E8,""ImdbUrl"":""https://www.imdb.com/title/tt2015381"",""TmdbUrl"":""https://www.themoviedb.org/movie/118340"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//r7vmZjiyZw9rpJMQJdXpjgiCOk9.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//mZSAu5acXueGC4Z3S5iLSWx8AEp.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2014-07-30T00:00:00"",""RunTime"":121,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1666667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":6,""name"":""Action""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":8,""Title"":""Fight Club"",""Overview"":""A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a shocking new form of therapy. Their concept catches on, with underground \""fight clubs\"" forming in every town, until an eccentric gets in the way and ignites an out-of-control spiral toward oblivion."",""Tagline"":""Mischief. Mayhem. Soap."",""Budget"":6.3E7,""Revenue"":1.00853753E8,""ImdbUrl"":""https://www.imdb.com/title/tt0137523"",""TmdbUrl"":""https://www.themoviedb.org/movie/550"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//8kNruSfhk5IoE4eZOc4UpvDn6tq.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//52AfXWuXCHn3UjD17rBruA9f5qb.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""1999-10-15T00:00:00"",""RunTime"":139,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1666667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":9,""Title"":""Avengers: Infinity War"",""Overview"":""As the Avengers and their allies have continued to protect the world from threats too large for any one hero to handle, a new danger has emerged from the cosmic shadows: Thanos. A despot of intergalactic infamy, his goal is to collect all six Infinity Stones, artifacts of unimaginable power, and use them to inflict his twisted will on all of reality. Everything the Avengers have fought for has led up to this moment - the fate of Earth and existence itself has never been more uncertain."",""Tagline"":""An entire universe. Once and for all."",""Budget"":3.0E8,""Revenue"":2.046239637E9,""ImdbUrl"":""https://www.imdb.com/title/tt4154756"",""TmdbUrl"":""https://www.themoviedb.org/movie/299536"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//7WsyChQLEftFiDOVTGkv3hFpyyt.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//lmZFxXgJE3vgrciwuDib0N8CfQo.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2018-04-25T00:00:00"",""RunTime"":149,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1666667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":6,""name"":""Action""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new
"{""Id"":10,""Title"":""Pulp Fiction"",""Overview"":""A burger-loving hit man, his philosophical partner, a drug-addled gangster's moll and a washed-up boxer converge in this sprawling, comedic crime caper. Their adventures unfurl in three stories that ingeniously trip back and forth in time."",""Tagline"":""Just because you are a character doesn't mean you have character."",""Budget"":8000000.0,""Revenue"":2.14179088E8,""ImdbUrl"":""https://www.imdb.com/title/tt0110912"",""TmdbUrl"":""https://www.themoviedb.org/movie/680"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//plnlrtBUULT0rh3Xsjmpubiso3L.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//w7RDIgQM6bLT7JXtH4iUQd3Iwxm.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""1994-09-10T00:00:00"",""RunTime"":154,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1666667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":10,""name"":""Thriller""},{""id"":11,""name"":""Crime""}]}",movie_0.json,2022-08-25T05:59:46.822+0000,2022-08-25,new


## 2. Bronze to Silver

### Checkout bronze data and unpack schema

In [None]:
spark.sql(
    """
    USE delta_lake
    """
)

In [None]:
# checkout bronze data
movie_bronze = spark.read.table("movie_bronze").filter("Status = 'new'").drop_duplicates()
movie_bronze.printSchema()

In [None]:
schema = StructType([
    StructField('Id', LongType(), True),
    StructField('Title', StringType(), True),
    StructField('Overview', StringType(), True),
    StructField('Tagline', StringType(), True),
    StructField('Budget', FloatType(), True),
    StructField('Revenue', FloatType(), True),
    StructField('ImdbUrl', StringType(), True),
    StructField('TmdbUrl', StringType(), True),
    StructField('PosterUrl', StringType(), True),
    StructField('BackdropUrl', StringType(), True),
    StructField('OriginalLanguage', StringType(), True),
    StructField('ReleaseDate', TimestampType(), True),
    StructField('RunTime', IntegerType(), True),
    StructField('Price', FloatType(), True),
    StructField('CreatedDate', TimestampType(), True),
    StructField('UpdatedDate', TimestampType(), True),
    StructField('CreatedBy', StringType(), True),
    StructField('UpdatedBy', StringType(), True),
    # StructField('genres', ArrayType(MapType(StringType(), StringType(), False)), True),
    StructField('genres', ArrayType(StructType([
        StructField('id', LongType(), True),
        StructField('name', StringType(), True),
    ])), True),
])
movie_augment_df = movie_bronze.withColumn('value', from_json(col('Movie'), schema)).select('value.*','*')
movie_augment_df = movie_augment_df.drop('value')
movie_augment_df.printSchema()

### Generate junction and lookup tables

+ -- create language_silver delta and table
+ -- language table is special b/c **LangId** is newly generated by the process
+ -- other junction tables and lookup tables have predefined scopes by the source

In [None]:
try:
    language_silver_ex = spark.read.table("language_silver")
    # language silver table exists
    # existing last/max language id in the silver table
    LangId_offset = (language_silver.orderBy(col('LangId').desc()).collect())[0]['LangId'] # int
    language_silver = movie_augment_df.select(col('OriginalLanguage').alias('Language')).drop_duplicates()
    language_silver = language_silver.subtract(language_silver_ex.select('Language')) # extract new languages
    language_silver = language_silver.withColumn('LangId', LangId_offset + row_number().over(Window.orderBy('Language'))) # assign new LangId
except:
    # language silver table not exist
    LangId_offset = 0
    language_silver = movie_augment_df.select(col('OriginalLanguage').alias('Language')).drop_duplicates()
    language_silver = language_silver.withColumn('LangId', row_number().over(Window.orderBy('Language')))

In [None]:
movie_silver = movie_augment_df.join(language_silver, col('OriginalLanguage') == language_silver['language'], 'inner').drop('OriginalLanguage', 'language')
movie_silver = movie_silver.withColumn('genre', explode('genres')).select('*').drop('genres')

movie_genre_junction_silver = movie_silver.select(col('Id').alias('MovieId'), col('genre.id').alias('GenreId')).drop_duplicates() # no need for clean & quarantine
genre_silver = movie_silver.select(col('genre.id').alias('GenreId'), col('genre.name').alias('GenreName')).na.drop().drop_duplicates() # remove null genreName, no need for quarantine

### Cleanse and quarantine the bad

In [None]:
# filter for genre missing
movie_qua_genre_missing = movie_silver.filter('genre.name is null')
# filter for low budget
movie_qua_budget = movie_silver.filter('Budget < 1000000')
# filter for negative runtime
movie_qua_runtime = movie_silver.filter('RunTime < 0')
# quarantine but leave the fixing to future 
movie_quarantine = movie_qua_genre_missing.union(movie_qua_budget).union(movie_qua_runtime).drop('genre').drop_duplicates() # 32 rows acquired

In [None]:
movie_silver_clean = movie_silver.drop('genre').subtract(movie_quarantine) # 1249 - 32 = 1217 rows

In [None]:
# add Status column for labeling
movie_quarantine = movie_quarantine.withColumn('Status', lit('quarantine'))
movie_silver_clean = movie_silver_clean.withColumn('Status', lit('load'))

### Persist

In [None]:
language_silver.write.format('delta').mode('append').save(silver_path+'/language_silver')
movie_genre_junction_silver.write.format('delta').mode('append').save(silver_path+'/movie_genre_junction_silver')
genre_silver.write.format('delta').mode('append').save(silver_path+'/genre_silver')
movie_silver_clean.drop('Movie', 'Status').write.format('delta').mode('append').partitionBy('p_IngestDate').save(silver_path+'/movie_silver')
movie_quarantine.drop('Status').write.format('delta').mode('append').partitionBy('p_IngestDate').save(silver_path+'/movie_silver_quarantine')

In [None]:
# spark.sql(
#     """
#     DROP TABLE IF EXISTS language_silver
#     """
# )

spark.sql(
    f"""
CREATE TABLE language_silver
USING DELTA
LOCATION "{silver_path}/language_silver"
"""
)

spark.sql(
    f"""
CREATE TABLE movie_genre_junction_silver
USING DELTA
LOCATION "{silver_path}/movie_genre_junction_silver"
"""
)

spark.sql(
    f"""
CREATE TABLE genre_silver
USING DELTA
LOCATION "{silver_path}/genre_silver"
"""
)

spark.sql(
    f"""
CREATE TABLE movie_silver
USING DELTA
LOCATION "{silver_path}/movie_silver"
"""
)

spark.sql(
    f"""
CREATE TABLE movie_silver_quarantine
USING DELTA
LOCATION "{silver_path}/movie_silver_quarantine"
"""
)

In [None]:
%sql
-- query the movie_silver_quarantine table
SELECT * FROM movie_silver_quarantine limit 10;

Id,Title,Overview,Tagline,Budget,Revenue,ImdbUrl,TmdbUrl,PosterUrl,BackdropUrl,ReleaseDate,RunTime,Price,CreatedDate,UpdatedDate,CreatedBy,UpdatedBy,Movie,SourceFile,IngestTime,p_IngestDate,LangId
824,Paranormal Activity,"After a young, middle-class couple moves into what seems like a typical suburban house, they become increasingly disturbed by a presence that may or may not be demonic but is certainly the most active in the middle of the night. Followed by five terrifying installments in the franchise, this is the original found-footage shocker that started it all.",What Happens When You Sleep?,15000.0,193355808.0,https://www.imdb.com/title/tt1179904,https://www.themoviedb.org/movie/23827,https://image.tmdb.org/t/p/w342//1bjA7de4O0NhMsaOqwvrecophUs.jpg,https://image.tmdb.org/t/p/original//hxFjtYQ1YuAAGeK13yHE2ylu5gM.jpg,2009-09-25T00:00:00.000+0000,86,9.9,2021-04-03T16:51:30.296+0000,,,,"{""Id"":824,""Title"":""Paranormal Activity"",""Overview"":""After a young, middle-class couple moves into what seems like a typical suburban house, they become increasingly disturbed by a presence that may or may not be demonic but is certainly the most active in the middle of the night. Followed by five terrifying installments in the franchise, this is the original found-footage shocker that started it all."",""Tagline"":""What Happens When You Sleep?"",""Budget"":15000.0,""Revenue"":1.933558E8,""ImdbUrl"":""https://www.imdb.com/title/tt1179904"",""TmdbUrl"":""https://www.themoviedb.org/movie/23827"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//1bjA7de4O0NhMsaOqwvrecophUs.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//hxFjtYQ1YuAAGeK13yHE2ylu5gM.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2009-09-25T00:00:00"",""RunTime"":86,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.2966667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":5,""name"":""Horror""},{""id"":14,""name"":""Mystery""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
1076,Isn't It Romantic,"For a long time, Natalie, an Australian architect living in New York City, had always believed that what she had seen in rom-coms is all fantasy. But after thwarting a mugger at a subway station only to be knocked out while fleeing, Natalie wakes up and discovers that her life has suddenly become her worst nightmare—a romantic comedy—and she is the leading lady.",None of the Feels,0.0,0.0,https://www.imdb.com/title/tt2452244,https://www.themoviedb.org/movie/449563,https://image.tmdb.org/t/p/w342//5xNBYXuv8wqiLVDhsfqCOr75DL7.jpg,https://image.tmdb.org/t/p/original//lGYlBtu16NswrgrSY1i3gZ1E1eI.jpg,2019-02-13T00:00:00.000+0000,89,9.9,2021-04-03T16:51:30.393+0000,,,,"{""Id"":1076,""Title"":""Isn't It Romantic"",""Overview"":""For a long time, Natalie, an Australian architect living in New York City, had always believed that what she had seen in rom-coms is all fantasy. But after thwarting a mugger at a subway station only to be knocked out while fleeing, Natalie wakes up and discovers that her life has suddenly become her worst nightmare—a romantic comedy—and she is the leading lady."",""Tagline"":""None of the Feels"",""Budget"":0.0,""Revenue"":0.0,""ImdbUrl"":""https://www.imdb.com/title/tt2452244"",""TmdbUrl"":""https://www.themoviedb.org/movie/449563"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//5xNBYXuv8wqiLVDhsfqCOr75DL7.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//lGYlBtu16NswrgrSY1i3gZ1E1eI.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2019-02-13T00:00:00"",""RunTime"":89,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.3933333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":2,""name"":""Fantasy""},{""id"":7,""name"":""Comedy""},{""id"":16,""name"":""Romance""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
980,After We Collided,Tessa finds herself struggling with her complicated relationship with Hardin; she faces a dilemma that could change their lives forever.,Can love overcome the past?,0.0,0.0,https://www.imdb.com/title/tt10362466,https://www.themoviedb.org/movie/613504,https://image.tmdb.org/t/p/w342//kiX7UYfOpYrMFSAGbI6j1pFkLzQ.jpg,https://image.tmdb.org/t/p/original//6hgItrYQEG33y0I7yP2SRl2ei4w.jpg,2020-09-02T00:00:00.000+0000,105,9.9,2021-04-03T16:51:30.376+0000,,,,"{""Id"":980,""Title"":""After We Collided"",""Overview"":""Tessa finds herself struggling with her complicated relationship with Hardin; she faces a dilemma that could change their lives forever."",""Tagline"":""Can love overcome the past?"",""Budget"":0.0,""Revenue"":0.0,""ImdbUrl"":""https://www.imdb.com/title/tt10362466"",""TmdbUrl"":""https://www.themoviedb.org/movie/613504"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//kiX7UYfOpYrMFSAGbI6j1pFkLzQ.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//6hgItrYQEG33y0I7yP2SRl2ei4w.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2020-09-02T00:00:00"",""RunTime"":105,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.3766667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":16,""name"":""Romance""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
734,Monty Python and the Holy Grail,"King Arthur, accompanied by his squire, recruits his Knights of the Round Table, including Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Robin the Not-Quite-So-Brave-As-Sir-Lancelot and Sir Galahad the Pure. On the way, Arthur battles the Black Knight who, despite having had all his limbs chopped off, insists he can still fight. They reach Camelot, but Arthur decides not to enter, as ""it is a silly place"".",And now! At Last! Another film completely different from some of the other films which aren't quite the same as this one is.,400000.0,5028948.0,https://www.imdb.com/title/tt0071853,https://www.themoviedb.org/movie/762,https://image.tmdb.org/t/p/w342//jVztLnCw6F5YNOgEchm3QFydbYZ.jpg,https://image.tmdb.org/t/p/original//nE3wR3UeVaAOmipANbA1fJqIZ29.jpg,1975-05-25T00:00:00.000+0000,91,9.9,2021-04-03T16:51:30.283+0000,,,,"{""Id"":734,""Title"":""Monty Python and the Holy Grail"",""Overview"":""King Arthur, accompanied by his squire, recruits his Knights of the Round Table, including Sir Bedevere the Wise, Sir Lancelot the Brave, Sir Robin the Not-Quite-So-Brave-As-Sir-Lancelot and Sir Galahad the Pure. On the way, Arthur battles the Black Knight who, despite having had all his limbs chopped off, insists he can still fight. They reach Camelot, but Arthur decides not to enter, as \""it is a silly place\""."",""Tagline"":""And now! At Last! Another film completely different from some of the other films which aren't quite the same as this one is."",""Budget"":400000.0,""Revenue"":5028948.0,""ImdbUrl"":""https://www.imdb.com/title/tt0071853"",""TmdbUrl"":""https://www.themoviedb.org/movie/762"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//jVztLnCw6F5YNOgEchm3QFydbYZ.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//nE3wR3UeVaAOmipANbA1fJqIZ29.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""1975-05-25T00:00:00"",""RunTime"":91,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.2833333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":2,""name"":""Fantasy""},{""id"":7,""name"":""Comedy""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
1142,The Ballad of Buster Scruggs,"Vignettes weaving together the stories of six individuals in the old West at the end of the Civil War. Following the tales of a sharp-shooting songster, a wannabe bank robber, two weary traveling performers, a lone gold prospector, a woman traveling the West to an uncertain future, and a motley crew of strangers undertaking a carriage ride.",Stories live forever. People don't.,0.0,0.0,https://www.imdb.com/title/tt6412452,https://www.themoviedb.org/movie/537996,https://image.tmdb.org/t/p/w342//voxl654m7p36y8FLu8oQD7dfwwK.jpg,https://image.tmdb.org/t/p/original//8ZwaQCK7awHyK9Oqt4y16dl6w6Y.jpg,2018-11-09T00:00:00.000+0000,132,9.9,2021-04-03T16:51:30.403+0000,,,,"{""Id"":1142,""Title"":""The Ballad of Buster Scruggs"",""Overview"":""Vignettes weaving together the stories of six individuals in the old West at the end of the Civil War. Following the tales of a sharp-shooting songster, a wannabe bank robber, two weary traveling performers, a lone gold prospector, a woman traveling the West to an uncertain future, and a motley crew of strangers undertaking a carriage ride."",""Tagline"":""Stories live forever. People don't."",""Budget"":0.0,""Revenue"":0.0,""ImdbUrl"":""https://www.imdb.com/title/tt6412452"",""TmdbUrl"":""https://www.themoviedb.org/movie/537996"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//voxl654m7p36y8FLu8oQD7dfwwK.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//8ZwaQCK7awHyK9Oqt4y16dl6w6Y.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2018-11-09T00:00:00"",""RunTime"":132,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.4033333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":7,""name"":""Comedy""},{""id"":9,""name"":""Western""},{""id"":15,""name"":""Music""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
1146,Carol,"In 1950s New York, a department-store clerk who dreams of a better life falls for an older, married woman.",Some people change your life forever.,0.0,40272136.0,https://www.imdb.com/title/tt2402927,https://www.themoviedb.org/movie/258480,https://image.tmdb.org/t/p/w342//uHwjbztkPEW3lcfsbKGO3XlaPjL.jpg,https://image.tmdb.org/t/p/original//o0ghC4XMIMdbRBXIqSvnPrxwj3W.jpg,2015-11-20T00:00:00.000+0000,118,9.9,2021-04-03T16:51:30.403+0000,,,,"{""Id"":1146,""Title"":""Carol"",""Overview"":""In 1950s New York, a department-store clerk who dreams of a better life falls for an older, married woman."",""Tagline"":""Some people change your life forever."",""Budget"":0.0,""Revenue"":4.0272135E7,""ImdbUrl"":""https://www.imdb.com/title/tt2402927"",""TmdbUrl"":""https://www.themoviedb.org/movie/258480"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//uHwjbztkPEW3lcfsbKGO3XlaPjL.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//o0ghC4XMIMdbRBXIqSvnPrxwj3W.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2015-11-20T00:00:00"",""RunTime"":118,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.4033333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":16,""name"":""Romance""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
1218,Gerald's Game,"When her husband's sex game goes wrong, Jessie (who is handcuffed to a bed in a remote lake house) faces warped visions, dark secrets and a dire choice.",Some games you play. Some you survive.,0.0,0.0,https://www.imdb.com/title/tt3748172,https://www.themoviedb.org/movie/343674,https://image.tmdb.org/t/p/w342//32dippiypDdaKv7XFEfUlQ7kPup.jpg,https://image.tmdb.org/t/p/original//t9HChjSJi8B1PXSVh5Ec3pcDsAM.jpg,2017-09-29T00:00:00.000+0000,104,9.9,2021-04-03T16:51:30.413+0000,,,,"{""Id"":1218,""Title"":""Gerald's Game"",""Overview"":""When her husband's sex game goes wrong, Jessie (who is handcuffed to a bed in a remote lake house) faces warped visions, dark secrets and a dire choice."",""Tagline"":""Some games you play. Some you survive."",""Budget"":0.0,""Revenue"":0.0,""ImdbUrl"":""https://www.imdb.com/title/tt3748172"",""TmdbUrl"":""https://www.themoviedb.org/movie/343674"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//32dippiypDdaKv7XFEfUlQ7kPup.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//t9HChjSJi8B1PXSVh5Ec3pcDsAM.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2017-09-29T00:00:00"",""RunTime"":104,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.4133333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":5,""name"":""Horror""},{""id"":10,""name"":""Thriller""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
929,The Lion King II: Simba's Pride,"The circle of life continues for Simba, now fully grown and in his rightful place as the king of Pride Rock. Simba and Nala have given birth to a daughter, Kiara who's as rebellious as her father was. But Kiara drives her parents to distraction when she catches the eye of Kovu, the son of the evil lioness, Zira. Will Kovu steal Kiara's heart?",The Circle of Life Continues...,0.0,0.0,https://www.imdb.com/title/tt0120131,https://www.themoviedb.org/movie/9732,https://image.tmdb.org/t/p/w342//sWR1x6UCMCGN9xEf8RGhPS934X0.jpg,https://image.tmdb.org/t/p/original//9mBQu3cMwMc71HLzjB1xBaPs46R.jpg,1998-10-24T00:00:00.000+0000,81,9.9,2021-04-03T16:51:30.310+0000,,,,"{""Id"":929,""Title"":""The Lion King II: Simba's Pride"",""Overview"":""The circle of life continues for Simba, now fully grown and in his rightful place as the king of Pride Rock. Simba and Nala have given birth to a daughter, Kiara who's as rebellious as her father was. But Kiara drives her parents to distraction when she catches the eye of Kovu, the son of the evil lioness, Zira. Will Kovu steal Kiara's heart?"",""Tagline"":""The Circle of Life Continues..."",""Budget"":0.0,""Revenue"":0.0,""ImdbUrl"":""https://www.imdb.com/title/tt0120131"",""TmdbUrl"":""https://www.themoviedb.org/movie/9732"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//sWR1x6UCMCGN9xEf8RGhPS934X0.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//9mBQu3cMwMc71HLzjB1xBaPs46R.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""1998-10-24T00:00:00"",""RunTime"":81,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.3100000"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":3,""name"":""Animation""},{""id"":17,""name"":""Family""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
429,The Kissing Booth,"When teenager Elle's first kiss leads to a forbidden romance with the hottest boy in high school, she risks her relationship with her best friend.","She can tell her best friend anything, except this one thing",0.0,0.0,https://www.imdb.com/title/tt3799232,https://www.themoviedb.org/movie/454983,https://image.tmdb.org/t/p/w342//7Dktk2ST6aL8h9Oe5rpk903VLhx.jpg,https://image.tmdb.org/t/p/original//itiz2OBK4ns6XT0ufXtusojmMt9.jpg,2018-05-11T00:00:00.000+0000,105,9.9,2021-04-03T16:51:30.236+0000,,,,"{""Id"":429,""Title"":""The Kissing Booth"",""Overview"":""When teenager Elle's first kiss leads to a forbidden romance with the hottest boy in high school, she risks her relationship with her best friend."",""Tagline"":""She can tell her best friend anything, except this one thing"",""Budget"":0.0,""Revenue"":0.0,""ImdbUrl"":""https://www.imdb.com/title/tt3799232"",""TmdbUrl"":""https://www.themoviedb.org/movie/454983"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//7Dktk2ST6aL8h9Oe5rpk903VLhx.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//itiz2OBK4ns6XT0ufXtusojmMt9.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2018-05-11T00:00:00"",""RunTime"":105,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.2366667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":7,""name"":""Comedy""},{""id"":16,""name"":""Romance""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1
1054,Mad Max,"Taking place in a dystopian Australia in the near future, Mad Max tells the story of a highway patrolman cruising the squalid back roads that have become the breeding ground of criminals foraging for gasoline and scraps. After some grisly events at the hands of a motorcycle gang, Max sets out across the barren wastelands in search of revenge.",The Maximum Force Of The Future.,300000.0,8771757.0,https://www.imdb.com/title/tt0079501,https://www.themoviedb.org/movie/9659,https://image.tmdb.org/t/p/w342//5LrI4GiCSrChgkdskVZiwv643Kg.jpg,https://image.tmdb.org/t/p/original//iojuitFQi3Rb05ps7rBhJTdnYGs.jpg,1979-04-12T00:00:00.000+0000,91,9.9,2021-04-03T16:51:30.390+0000,,,,"{""Id"":1054,""Title"":""Mad Max"",""Overview"":""Taking place in a dystopian Australia in the near future, Mad Max tells the story of a highway patrolman cruising the squalid back roads that have become the breeding ground of criminals foraging for gasoline and scraps. After some grisly events at the hands of a motorcycle gang, Max sets out across the barren wastelands in search of revenge."",""Tagline"":""The Maximum Force Of The Future."",""Budget"":300000.0,""Revenue"":8771757.0,""ImdbUrl"":""https://www.imdb.com/title/tt0079501"",""TmdbUrl"":""https://www.themoviedb.org/movie/9659"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//5LrI4GiCSrChgkdskVZiwv643Kg.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//iojuitFQi3Rb05ps7rBhJTdnYGs.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""1979-04-12T00:00:00"",""RunTime"":91,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.3900000"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":6,""name"":""Action""},{""id"":10,""name"":""Thriller""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,1


### Lable the status in bronze

In [None]:
# Create temp views
movie_silver_clean.createOrReplaceTempView('movie_silver_clean_tsv')
movie_quarantine.createOrReplaceTempView('movie_quarantine_silver_tsv')

In [None]:
%sql
select * from `movie_silver_clean_tsv` limit 5

Id,Title,Overview,Tagline,Budget,Revenue,ImdbUrl,TmdbUrl,PosterUrl,BackdropUrl,ReleaseDate,RunTime,Price,CreatedDate,UpdatedDate,CreatedBy,UpdatedBy,Movie,SourceFile,IngestTime,p_IngestDate,Status,LangId
116,World War Z,"Life for former United Nations investigator Gerry Lane and his family seems content. Suddenly, the world is plagued by a mysterious infection turning whole human populations into rampaging mindless zombies. After barely escaping the chaos, Lane is persuaded to go on a mission to investigate this disease. What follows is a perilous trek around the world where Lane must brave horrific dangers and long odds to find answers before human civilization falls.",Remember Philly!,200000000.0,531864992.0,https://www.imdb.com/title/tt0816711,https://www.themoviedb.org/movie/72190,https://image.tmdb.org/t/p/w342//1SWBSYJsnyhdNRfLI1T6RsCxAQ4.jpg,https://image.tmdb.org/t/p/original//upsk7nfUjf8ZSYuokwa5U5YXERm.jpg,2013-06-20T00:00:00.000+0000,116,9.9,2021-04-03T16:51:30.190+0000,,,,"{""Id"":116,""Title"":""World War Z"",""Overview"":""Life for former United Nations investigator Gerry Lane and his family seems content. Suddenly, the world is plagued by a mysterious infection turning whole human populations into rampaging mindless zombies. After barely escaping the chaos, Lane is persuaded to go on a mission to investigate this disease. What follows is a perilous trek around the world where Lane must brave horrific dangers and long odds to find answers before human civilization falls."",""Tagline"":""Remember Philly!"",""Budget"":2.0E8,""Revenue"":5.31865E8,""ImdbUrl"":""https://www.imdb.com/title/tt0816711"",""TmdbUrl"":""https://www.themoviedb.org/movie/72190"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//1SWBSYJsnyhdNRfLI1T6RsCxAQ4.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//upsk7nfUjf8ZSYuokwa5U5YXERm.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2013-06-20T00:00:00"",""RunTime"":116,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.1900000"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":5,""name"":""Horror""},{""id"":6,""name"":""Action""},{""id"":10,""name"":""Thriller""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,load,1
747,The Wizard of Oz,"Young Dorothy finds herself in a magical world where she makes friends with a lion, a scarecrow and a tin man as they make their way along the yellow brick road to talk with the Wizard and ask for the things they miss most in their lives. The Wicked Witch of the West is the only thing that could stop them.","We're off to see the Wizard, the wonderful Wizard of Oz!",2777000.0,33754968.0,https://www.imdb.com/title/tt0032138,https://www.themoviedb.org/movie/630,https://image.tmdb.org/t/p/w342//pfAZFD7I2hxW9HCChTuAzsdE6UX.jpg,https://image.tmdb.org/t/p/original//qAvou7F5P4VcIR72JzzrnKEQSN3.jpg,1939-08-15T00:00:00.000+0000,102,9.9,2021-04-03T16:51:30.283+0000,,,,"{""Id"":747,""Title"":""The Wizard of Oz"",""Overview"":""Young Dorothy finds herself in a magical world where she makes friends with a lion, a scarecrow and a tin man as they make their way along the yellow brick road to talk with the Wizard and ask for the things they miss most in their lives. The Wicked Witch of the West is the only thing that could stop them."",""Tagline"":""We're off to see the Wizard, the wonderful Wizard of Oz!"",""Budget"":2777000.0,""Revenue"":3.3754967E7,""ImdbUrl"":""https://www.imdb.com/title/tt0032138"",""TmdbUrl"":""https://www.themoviedb.org/movie/630"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//pfAZFD7I2hxW9HCChTuAzsdE6UX.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//qAvou7F5P4VcIR72JzzrnKEQSN3.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""1939-08-15T00:00:00"",""RunTime"":102,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.2833333"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":1,""name"":""Adventure""},{""id"":2,""name"":""Fantasy""},{""id"":17,""name"":""Family""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,load,1
859,The Circle,"A young tech worker takes a job at a powerful Internet corporation, quickly rises up the company's ranks, and soon finds herself in a perilous situation concerning privacy, surveillance and freedom. She comes to learn that her decisions and actions will determine the future of humanity.",Knowing is good. Knowing everything is better.,18000000.0,20497844.0,https://www.imdb.com/title/tt4287320,https://www.themoviedb.org/movie/339988,https://image.tmdb.org/t/p/w342//bQVqd5rWrx5GbXhJNuvKy4Viz6j.jpg,https://image.tmdb.org/t/p/original//hENBbG0J4zf4fFB3OVAC50poojR.jpg,2017-04-27T00:00:00.000+0000,110,9.9,2021-04-03T16:51:30.300+0000,,,,"{""Id"":859,""Title"":""The Circle"",""Overview"":""A young tech worker takes a job at a powerful Internet corporation, quickly rises up the company's ranks, and soon finds herself in a perilous situation concerning privacy, surveillance and freedom. She comes to learn that her decisions and actions will determine the future of humanity."",""Tagline"":""Knowing is good. Knowing everything is better."",""Budget"":1.8E7,""Revenue"":2.0497844E7,""ImdbUrl"":""https://www.imdb.com/title/tt4287320"",""TmdbUrl"":""https://www.themoviedb.org/movie/339988"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//bQVqd5rWrx5GbXhJNuvKy4Viz6j.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//hENBbG0J4zf4fFB3OVAC50poojR.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2017-04-27T00:00:00"",""RunTime"":110,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.3000000"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":10,""name"":""Thriller""},{""id"":13,""name"":""Science Fiction""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,load,1
1031,High School Musical 2,"The East High Wildcats are gearing up for big fun as they land the coolest summer jobs imaginable. Troy, Gabriella, Chad, and Taylor have scored sweet gigs at the Lava Springs Country Club owned by Sharpay and Ryan's family. Sharpay's first rule of business: Get Troy. As Troy experiences a life of privilege he's never known, will he give up the Wildcats and Gabriella to rise to the top?",,0.0,7000000.0,https://www.imdb.com/title/tt0810900,https://www.themoviedb.org/movie/13649,https://image.tmdb.org/t/p/w342//la2kiVWDm2vuB4APZDgCCmuBh4K.jpg,https://image.tmdb.org/t/p/original//rmCMBdo5Jk8rP3L524Q6MIxxdUF.jpg,2007-08-17T00:00:00.000+0000,111,9.9,2021-04-03T16:51:30.386+0000,,,,"{""Id"":1031,""Title"":""High School Musical 2"",""Overview"":""The East High Wildcats are gearing up for big fun as they land the coolest summer jobs imaginable. Troy, Gabriella, Chad, and Taylor have scored sweet gigs at the Lava Springs Country Club owned by Sharpay and Ryan's family. Sharpay's first rule of business: Get Troy. As Troy experiences a life of privilege he's never known, will he give up the Wildcats and Gabriella to rise to the top?"",""Tagline"":"""",""Budget"":0.0,""Revenue"":7000000.0,""ImdbUrl"":""https://www.imdb.com/title/tt0810900"",""TmdbUrl"":""https://www.themoviedb.org/movie/13649"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//la2kiVWDm2vuB4APZDgCCmuBh4K.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//rmCMBdo5Jk8rP3L524Q6MIxxdUF.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2007-08-17T00:00:00"",""RunTime"":111,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.3866667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":4,""name"":""Drama""},{""id"":7,""name"":""Comedy""},{""id"":15,""name"":""Music""},{""id"":16,""name"":""Romance""},{""id"":17,""name"":""Family""},{""id"":20,""name"":""TV Movie""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,load,1
1235,Hostel,"Three backpackers head to a Slovakian city that promises to meet their hedonistic expectations, with no idea of the hell that awaits them.",Welcome To Your Worst Nightmare,4800000.0,81979824.0,https://www.imdb.com/title/tt0450278,https://www.themoviedb.org/movie/1690,https://image.tmdb.org/t/p/w342//dDrtuWUKhgUGp12kgUWuP0NpTdF.jpg,https://image.tmdb.org/t/p/original//tqwF1zFceqHzlDNAMfhvjbmzoXG.jpg,2006-01-06T00:00:00.000+0000,94,9.9,2021-04-03T16:51:30.416+0000,,,,"{""Id"":1235,""Title"":""Hostel"",""Overview"":""Three backpackers head to a Slovakian city that promises to meet their hedonistic expectations, with no idea of the hell that awaits them."",""Tagline"":""Welcome To Your Worst Nightmare"",""Budget"":4800000.0,""Revenue"":8.1979826E7,""ImdbUrl"":""https://www.imdb.com/title/tt0450278"",""TmdbUrl"":""https://www.themoviedb.org/movie/1690"",""PosterUrl"":""https://image.tmdb.org/t/p/w342//dDrtuWUKhgUGp12kgUWuP0NpTdF.jpg"",""BackdropUrl"":""https://image.tmdb.org/t/p/original//tqwF1zFceqHzlDNAMfhvjbmzoXG.jpg"",""OriginalLanguage"":""en"",""ReleaseDate"":""2006-01-06T00:00:00"",""RunTime"":94,""Price"":9.9,""CreatedDate"":""2021-04-03T16:51:30.4166667"",""UpdatedDate"":null,""UpdatedBy"":null,""CreatedBy"":null,""genres"":[{""id"":5,""name"":""Horror""}]}",movie_0.json,2022-08-25T21:29:49.947+0000,2022-08-25,load,1


In [None]:
# mark clean data as load
spark.sql(
    f"""
MERGE INTO movie_bronze
USING (
select Movie, SourceFile, IngestTime, p_IngestDate, Status
from movie_silver_clean_tsv 
) AS SOURCE 
ON movie_bronze.Movie = SOURCE.Movie
AND movie_bronze.IngestTime = SOURCE.IngestTime
WHEN MATCHED
THEN UPDATE
SET movie_bronze.Status = SOURCE.Status
"""
)

In [None]:
# mark quarantine data
spark.sql(
    f"""
MERGE INTO movie_bronze
USING (
select Movie, SourceFile, IngestTime, p_IngestDate, Status
from movie_quarantine_silver_tsv 
) AS SOURCE 
ON movie_bronze.Movie = SOURCE.Movie
AND movie_bronze.IngestTime = SOURCE.IngestTime
WHEN MATCHED
THEN UPDATE
SET movie_bronze.Status = SOURCE.Status
"""
)

In [None]:
%sql
select count(Movie) as Qty, first(Status) as Status from movie_bronze group by Status

Qty,Status
1217,load
32,quarantine


In [None]:
%sql
drop table `movie_silver_clean_tsv`;
drop table `movie_quarantine_silver_tsv`;

## 3. Silver update