### Ingest constructor.json file

In [0]:
%run "../includes/configuration"

In [0]:
%run "../includes/common_functions"

In [0]:
dbutils.widgets.text("p_data_source","")
v_data_source = dbutils.widgets.get("p_data_source")

##### Step 1 - Read the JSON file using the spark dataframe reader

In [0]:
constructor_schema = "constructorId INT,constructorRef string,name string, nationality string, url string"

In [0]:
construct_df = spark.read.schema(constructor_schema).json(f'{raw_folder_path}/constructors.json/')

In [0]:
construct_df.printSchema()

root
 |-- constructorId: integer (nullable = true)
 |-- constructorRef: string (nullable = true)
 |-- name: string (nullable = true)
 |-- nationality: string (nullable = true)
 |-- url: string (nullable = true)



In [0]:
display(construct_df)

constructorId,constructorRef,name,nationality,url
1,mclaren,McLaren,British,http://en.wikipedia.org/wiki/McLaren
2,bmw_sauber,BMW Sauber,German,http://en.wikipedia.org/wiki/BMW_Sauber
3,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Prix_Engineering
4,renault,Renault,French,http://en.wikipedia.org/wiki/Renault_in_Formula_One
5,toro_rosso,Toro Rosso,Italian,http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso
6,ferrari,Ferrari,Italian,http://en.wikipedia.org/wiki/Scuderia_Ferrari
7,toyota,Toyota,Japanese,http://en.wikipedia.org/wiki/Toyota_Racing
8,super_aguri,Super Aguri,Japanese,http://en.wikipedia.org/wiki/Super_Aguri_F1
9,red_bull,Red Bull,Austrian,http://en.wikipedia.org/wiki/Red_Bull_Racing
10,force_india,Force India,Indian,http://en.wikipedia.org/wiki/Racing_Point_Force_India



#### Step -2 drop unwanted column from dataframe


In [0]:
from pyspark.sql.functions import col

In [0]:
constructor_dropped_df = construct_df.drop(col('url'))


#### Step -3 Rename column from dataframe and ingestion date

In [0]:
from pyspark.sql.functions import lit

In [0]:
constructor_renamed_df = constructor_dropped_df.withColumnRenamed("constructorId","constructor_id") \
                                               .withColumnRenamed("constructorRef","constructor_ref") \
                                               .withColumn("data_source",lit(v_data_source)) 

In [0]:
constructor_renamed_df = add_ingestion_date(constructor_renamed_df)

In [0]:
display(constructor_renamed_df)

constructor_id,constructor_ref,name,nationality,data_source,ingestion_date
1,mclaren,McLaren,British,testing,2024-01-27T06:11:26.888Z
2,bmw_sauber,BMW Sauber,German,testing,2024-01-27T06:11:26.888Z
3,williams,Williams,British,testing,2024-01-27T06:11:26.888Z
4,renault,Renault,French,testing,2024-01-27T06:11:26.888Z
5,toro_rosso,Toro Rosso,Italian,testing,2024-01-27T06:11:26.888Z
6,ferrari,Ferrari,Italian,testing,2024-01-27T06:11:26.888Z
7,toyota,Toyota,Japanese,testing,2024-01-27T06:11:26.888Z
8,super_aguri,Super Aguri,Japanese,testing,2024-01-27T06:11:26.888Z
9,red_bull,Red Bull,Austrian,testing,2024-01-27T06:11:26.888Z
10,force_india,Force India,Indian,testing,2024-01-27T06:11:26.888Z


In [0]:
constructor_renamed_df.write.mode('overwrite').parquet(f'{processed_folder_path}/constructors')

In [0]:
%fs

ls /mnt/formula1dlajay/processed/constructors

path,name,size,modificationTime
dbfs:/mnt/formula1dlajay/processed/constructors/_SUCCESS,_SUCCESS,0,1706335888000
dbfs:/mnt/formula1dlajay/processed/constructors/_committed_2170537692634117497,_committed_2170537692634117497,122,1706278743000
dbfs:/mnt/formula1dlajay/processed/constructors/_committed_4426805983916794136,_committed_4426805983916794136,232,1706335708000
dbfs:/mnt/formula1dlajay/processed/constructors/_committed_6278370906564638702,_committed_6278370906564638702,223,1706335887000
dbfs:/mnt/formula1dlajay/processed/constructors/_committed_vacuum3601799114994695477,_committed_vacuum3601799114994695477,96,1706335709000
dbfs:/mnt/formula1dlajay/processed/constructors/_started_4426805983916794136,_started_4426805983916794136,0,1706335708000
dbfs:/mnt/formula1dlajay/processed/constructors/_started_6278370906564638702,_started_6278370906564638702,0,1706335887000
dbfs:/mnt/formula1dlajay/processed/constructors/part-00000-tid-6278370906564638702-4af70d6c-2e34-4d79-bd60-fd4f40e97f44-341-1-c000.snappy.parquet,part-00000-tid-6278370906564638702-4af70d6c-2e34-4d79-bd60-fd4f40e97f44-341-1-c000.snappy.parquet,6921,1706335887000


In [0]:
# display(spark.read.parquet('/mnt/formula1dlajay/processed/constructors'))

In [0]:
dbutils.notebook.exit("Success")