In [0]:
dbutils.widgets.text("p_file_date", "2021-03-21")
v_filer_date = dbutils.widgets.get("p_file_date")

#### Read all the data as required

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
drivers_df = spark.read.format("delta").load(f"{processed_folder_path}/drivers") \
    .withColumnRenamed("number","driver_number") \
    .withColumnRenamed("name","driver_name") \
    .withColumnRenamed("nationality", "driver_nationality")

In [0]:
constructors_df = spark.read.format("delta").load(f"{processed_folder_path}/constructors") \
        .withColumnRenamed("name", "team")

In [0]:
circuits_df = spark.read.format("delta").load(f"{processed_folder_path}/circuits") \
        .withColumnRenamed("location", "circuit_location")

In [0]:
races_df = spark.read.format("delta").load(f"{processed_folder_path}/races") \
        .withColumnRenamed("name", "race_name") \
        .withColumnRenamed("race_timestamp", "race_date")

In [0]:
results_df = spark.read.format("delta").load(f"{processed_folder_path}/results") \
        .filter(f"file_date = '{v_filer_date}'") \
        .withColumnRenamed("time", "race_time") \
        .withColumnRenamed("race_id","result_race_id") \
        .withColumnRenamed("file_date","result_file_date")

#### Join Circuits to Races

In [0]:
circuits_races_df = races_df.join(circuits_df, circuits_df.circuit_id == races_df.circuit_id,"inner") \
    .select(races_df.race_id, races_df.race_year, races_df.race_name, races_df.race_date, circuits_df.circuit_location)


#### Join Results to all other dataframe 

In [0]:
race_result_df = results_df.join(circuits_races_df, circuits_races_df.race_id == results_df.result_race_id) \
    .join(constructors_df, constructors_df.constructor_id == results_df.constructor_id) \
    .join(drivers_df, drivers_df.driver_id == results_df.driver_id)
    

In [0]:
from pyspark.sql.functions import current_timestamp

In [0]:
final_df = race_result_df.select("race_id","race_year","race_name","race_date","circuit_location","driver_name","driver_number","driver_nationality",
                                "team","grid","fastest_lap","race_time","points","position","result_file_date") \
                                .withColumn("created_date",current_timestamp()) \
                                .withColumnRenamed("result_file_date","file_date")

In [0]:
display(final_df.filter("race_year == 2020 and race_name == 'Abu Dhabi Grand Prix'").orderBy(final_df.points.desc()))

race_id,race_year,race_name,race_date,circuit_location,driver_name,driver_number,driver_nationality,team,grid,fastest_lap,race_time,points,position,file_date,created_date


In [0]:
# final_df.write.mode("overwrite").parquet(f"{presentation_folder_path}/race_result")

# final_df.write.mode("overwrite").format("parquet").saveAsTable("f1_presentation.race_results")

# overwrite_function(final_df, 'f1_presentation', 'race_results', 'race_id')

In [0]:
merge_condition = "tgt.driver_name = src.driver_name and tgt.race_id = src.race_id"
merge_delta_data(final_df, "f1_presentation", "race_results", "race_id",processed_folder_path, merge_condition)

[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-3393798542034609>, line 2[0m
[1;32m      1[0m merge_condition [38;5;241m=[39m [38;5;124m"[39m[38;5;124mtgt.driver_name = src.driver_name and tgt.race_id = src.race_id[39m[38;5;124m"[39m
[0;32m----> 2[0m merge_delta_data(final_df, [38;5;124m"[39m[38;5;124mf1_presentation[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mrace_results[39m[38;5;124m"[39m, [38;5;124m"[39m[38;5;124mrace_id[39m[38;5;124m"[39m,processed_folder_path, merge_condition)

File [0;32m<command-3651007263442800>, line 6[0m, in [0;36mmerge_delta_data[0;34m(input_df, db_name, table_name, partition_column, folder_path, merge_condition)[0m
[1;32m      4[0m [38;5;28;01mdef[39;00m [38;5;21mmerge_delta_data[39m(input_df, db_name,table_name,partition_column,folder_path,merge_condition):
[1;32m    

In [0]:
%sql

select * from f1_presentation.race_results ;



In [0]:
%sql
select race_id, count(1) from f1_presentation.race_results
group by race_id
order by race_id desc;



In [0]:
# %sql
# drop table f1_presentation.race_results;

