In [0]:
display(dbutils.fs.mounts())

In [0]:
%fs
ls /mnt/f1dlnagsa/raw

## Races Table

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, DateType

races_schema = StructType([
  StructField("raceId", IntegerType(), False),
  StructField("year", IntegerType(), False),
  StructField("round", IntegerType(), False),
  StructField("circuitId", IntegerType(), False),
  StructField("name", StringType(), False),
  StructField("date", DateType(), False),
  StructField("time", StringType(), True),
  StructField("url", StringType(), True)
])

In [0]:
races_df = spark.read.csv("dbfs:/mnt/f1dlnagsa/raw/races.csv", header=True, schema=races_schema)

In [0]:
%python
from pyspark.sql.functions import current_timestamp, to_timestamp, col, concat, lit

races_selected_df = races_df.withColumn(
    "race_timestamp",
    to_timestamp(concat(col("date"), lit(" "), col("time")), "yyyy-MM-dd HH:mm:ss")
).withColumn(
    "ingestion_date",
    current_timestamp()
)
display(races_selected_df)

In [0]:
races_final_df = races_selected_df.select(col("raceId").alias("race_id"), col("year").alias("race_year"), col("round"), col("circuitId").alias("circuit_id"), col("name"), col("race_timestamp"), col("ingestion_date"))

In [0]:
races_final_df.write.mode("overwrite").partitionBy('race_year').format("delta").saveAsTable("f1_processed.races")

In [0]:
dbutils.notebook.exit("Success")