# Імпортування даних у таблиці фактів сховища даних

Імпортуємо бібліотеки

In [2]:
import pandas as pd
from dotenv import dotenv_values
from sqlalchemy import create_engine, text
import os

Завантажуємо змінні оточення з .env файлу для з'єднання зі сховищем

In [11]:
config = dotenv_values()

DB_NAME = config.get('DB_NAME')
DB_USER = config.get('DB_USER')
DB_HOST = config.get('DB_HOST')
DB_PASSWORD = config.get('DB_PASSWORD')
DB_PORT = config.get('DB_PORT')

З'єднуємося з базою даних

In [12]:
engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')

Функція для внесення даних в таблицю сховища

In [13]:
def insert_data(df, table):
    with engine.connect() as connection:
        result = connection.execute(text(f"SELECT COUNT(*) FROM {table}"))
        count = result.scalar()

        if count == 0:
            df.to_sql(table, engine, if_exists='append', index=False)

Завантажимо датафрейми з ідентифікаторами сховища та датасетів

In [14]:
circuits_id_df = pd.read_csv('../adapted_data/circuitsId.csv')
constructors_id_df = pd.read_csv('../adapted_data/constructorsId.csv')
countries_id_df = pd.read_csv('../adapted_data/countriesId.csv')
drivers_id_df = pd.read_csv('../adapted_data/driversId.csv')
grand_prix_id_df = pd.read_csv('../adapted_data/grand_prixId.csv')
seasons_id_df = pd.read_csv('../adapted_data/seasonsId.csv')

circuits_id_df.head()

Unnamed: 0,id,dbId
0,adelaide,1
1,aida,2
2,ain-diab,3
3,aintree,4
4,anderstorp,5


## Races FP1 Results Table

In [16]:
races_fp1_results_df = pd.read_csv('../data/f1db-races-free-practice-1-results.csv')
races_fp1_results_df.head()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,tyreManufacturerId,time,timeMillis,gap,gapMillis,interval,intervalMillis,laps
0,435,1986,15,1,1,1,6,nelson-piquet,williams,honda,goodyear,1:18.601,78601.0,,,,,
1,435,1986,15,2,2,2,12,ayrton-senna,lotus,renault,goodyear,1:18.779,78779.0,0.178,178.0,0.178,178.0,
2,435,1986,15,3,3,3,20,gerhard-berger,benetton,bmw,pirelli,1:19.004,79004.0,0.403,403.0,0.225,225.0,
3,435,1986,15,4,4,4,2,keke-rosberg,mclaren,tag,goodyear,1:19.099,79099.0,0.498,498.0,0.095,95.0,
4,435,1986,15,5,5,5,5,nigel-mansell,williams,honda,goodyear,1:19.588,79588.0,0.987,987.0,0.489,489.0,


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [17]:
races_fp1_results_df['driver_id'] = races_fp1_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_fp1_results_df['constructor_id'] = races_fp1_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [18]:
races_fp1_results_df = races_fp1_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'time', 'timeMillis', 'gap', 'gapMillis', 'interval', 'intervalMillis', 'laps']]
races_fp1_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,time,timeMillis,gap,gapMillis,interval,intervalMillis,laps
0,435,644,180,1,1,1:18.601,78601.0,,,,,
1,435,68,101,2,2,1:18.779,78779.0,0.178,178.0,0.178,178.0,
2,435,316,22,3,3,1:19.004,79004.0,0.403,403.0,0.225,225.0,
3,435,521,112,4,4,1:19.099,79099.0,0.498,498.0,0.095,95.0,
4,435,652,180,5,5,1:19.588,79588.0,0.987,987.0,0.489,489.0,


Перейменуємо стовпці

In [19]:
races_fp1_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'time', 'time_millis', 'gap', 'gap_millis', 'interval', 'interval_millis', 'laps']

Вносимо дані з датафрейму у таблицю сховища

In [20]:
insert_data(races_fp1_results_df, 'races_fp1_results')

## Races FP2 Results Table

In [21]:
races_fp2_results_df = pd.read_csv('../data/f1db-races-free-practice-2-results.csv')
races_fp2_results_df.head()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,tyreManufacturerId,time,timeMillis,gap,gapMillis,interval,intervalMillis,laps
0,435,1986,15,1,1,1,12,ayrton-senna,lotus,renault,goodyear,1:17.977,77977.0,,,,,
1,435,1986,15,2,2,2,20,gerhard-berger,benetton,bmw,pirelli,1:18.088,78088.0,0.111,111.0,0.111,111.0,
2,435,1986,15,3,3,3,19,teo-fabi,benetton,bmw,pirelli,1:18.154,78154.0,0.177,177.0,0.066,66.0,
3,435,1986,15,4,4,4,6,nelson-piquet,williams,honda,goodyear,1:18.353,78353.0,0.376,376.0,0.199,199.0,
4,435,1986,15,5,5,5,5,nigel-mansell,williams,honda,goodyear,1:18.785,78785.0,0.808,808.0,0.432,432.0,


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [22]:
races_fp2_results_df['driver_id'] = races_fp2_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_fp2_results_df['constructor_id'] = races_fp2_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [23]:
races_fp2_results_df = races_fp2_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'time', 'timeMillis', 'gap', 'gapMillis', 'interval', 'intervalMillis', 'laps']]
races_fp2_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,time,timeMillis,gap,gapMillis,interval,intervalMillis,laps
0,435,68,101,1,1,1:17.977,77977.0,,,,,
1,435,316,22,2,2,1:18.088,78088.0,0.111,111.0,0.111,111.0,
2,435,834,22,3,3,1:18.154,78154.0,0.177,177.0,0.066,66.0,
3,435,644,180,4,4,1:18.353,78353.0,0.376,376.0,0.199,199.0,
4,435,652,180,5,5,1:18.785,78785.0,0.808,808.0,0.432,432.0,


Перейменуємо стовпці

In [24]:
races_fp2_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'time', 'time_millis', 'gap', 'gap_millis', 'interval', 'interval_millis', 'laps']

Вносимо дані з датафрейму у таблицю сховища

In [25]:
insert_data(races_fp2_results_df, 'races_fp2_results')

## Races FP3 Results Table

In [26]:
races_fp3_results_df = pd.read_csv('../data/f1db-races-free-practice-3-results.csv')
races_fp3_results_df.head()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,tyreManufacturerId,time,timeMillis,gap,gapMillis,interval,intervalMillis,laps
0,706,2003,9,1,1,1,4,ralf-schumacher,williams,bmw,michelin,1:31.305,91305.0,,,,,11
1,706,2003,9,2,2,2,3,juan-pablo-montoya,williams,bmw,michelin,1:31.366,91366.0,0.061,61.0,0.061,61.0,15
2,706,2003,9,3,3,3,20,olivier-panis,toyota,toyota,michelin,1:31.490,91490.0,0.185,185.0,0.124,124.0,18
3,706,2003,9,4,4,4,5,david-coulthard,mclaren,mercedes,michelin,1:31.608,91608.0,0.303,303.0,0.118,118.0,16
4,706,2003,9,5,5,5,6,kimi-raikkonen,mclaren,mercedes,michelin,1:32.021,92021.0,0.716,716.0,0.413,413.0,13


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [27]:
races_fp3_results_df['driver_id'] = races_fp3_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_fp3_results_df['constructor_id'] = races_fp3_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [28]:
races_fp3_results_df = races_fp3_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'time', 'timeMillis', 'gap', 'gapMillis', 'interval', 'intervalMillis', 'laps']]
races_fp3_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,time,timeMillis,gap,gapMillis,interval,intervalMillis,laps
0,706,736,180,1,1,1:31.305,91305.0,,,,,11
1,706,506,180,2,2,1:31.366,91366.0,0.061,61.0,0.061,61.0,15
2,706,664,169,3,3,1:31.490,91490.0,0.185,185.0,0.124,124.0,18
3,706,191,112,4,4,1:31.608,91608.0,0.303,303.0,0.118,118.0,16
4,706,530,112,5,5,1:32.021,92021.0,0.716,716.0,0.413,413.0,13


Перейменуємо стовпці

In [29]:
races_fp3_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'time', 'time_millis', 'gap', 'gap_millis', 'interval', 'interval_millis', 'laps']

Вносимо дані з датафрейму у таблицю сховища

In [30]:
insert_data(races_fp3_results_df, 'races_fp3_results')

## Races Qualifying Results Table

In [31]:
races_qualifying_results_df = pd.read_csv('../data/f1db-races-qualifying-results.csv')
races_qualifying_results_df.head()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,...,q1Millis,q2,q2Millis,q3,q3Millis,gap,gapMillis,interval,intervalMillis,laps
0,1,1950,1,1,1.0,1,2,nino-farina,alfa-romeo,alfa-romeo,...,,,,,,,,,,
1,1,1950,1,2,2.0,2,3,luigi-fagioli,alfa-romeo,alfa-romeo,...,,,,,,0.2,200.0,0.2,200.0,
2,1,1950,1,3,3.0,3,1,juan-manuel-fangio,alfa-romeo,alfa-romeo,...,,,,,,0.2,200.0,0.0,0.0,
3,1,1950,1,4,4.0,4,4,reg-parnell,alfa-romeo,alfa-romeo,...,,,,,,1.4,1400.0,1.2,1200.0,
4,1,1950,1,5,5.0,5,21,birabongse-bhanudej,maserati,maserati,...,,,,,,1.8,1800.0,0.4,400.0,


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [32]:
races_qualifying_results_df['driver_id'] = races_qualifying_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_qualifying_results_df['constructor_id'] = races_qualifying_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [33]:
races_qualifying_results_df = races_qualifying_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'q1', 'q1Millis', 'q2', 'q2Millis', 'q3', 'q3Millis', 'gap', 'gapMillis', 'interval', 'intervalMillis', 'laps']]
races_qualifying_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,q1,q1Millis,q2,q2Millis,q3,q3Millis,gap,gapMillis,interval,intervalMillis,laps
0,1,655,4,1,1.0,,,,,,,,,,,
1,1,565,4,2,2.0,,,,,,,0.2,200.0,0.2,200.0,
2,1,505,4,3,3.0,,,,,,,0.2,200.0,0.0,0.0,
3,1,742,4,4,4.0,,,,,,,1.4,1400.0,1.2,1200.0,
4,1,93,109,5,5.0,,,,,,,1.8,1800.0,0.4,400.0,


Перейменуємо стовпці

In [34]:
races_qualifying_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'q1', 'q1_millis', 'q2', 'q2_millis', 'q3', 'q3_millis', 'gap', 'gap_millis', 'interval', 'interval_millis', 'laps']

Вносимо дані з датафрейму у таблицю сховища

In [46]:
insert_data(races_qualifying_results_df, 'races_qualifying_results')

## Races Race Results Table

In [3]:
races_race_results_df = pd.read_csv('../data/f1db-races-race-results.csv')
races_race_results_df.head()
races_race_results_df.info()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,...,intervalMillis,reasonRetired,points,gridPositionNumber,gridPositionText,positionsGained,fastestLap,pitStops,driverOfTheDay,grandSlam
0,1,1950,1,1,1.0,1,2,nino-farina,alfa-romeo,alfa-romeo,...,,,9.0,1.0,1,0.0,True,,,False
1,1,1950,1,2,2.0,2,3,luigi-fagioli,alfa-romeo,alfa-romeo,...,2600.0,,6.0,2.0,2,0.0,False,,,False
2,1,1950,1,3,3.0,3,4,reg-parnell,alfa-romeo,alfa-romeo,...,49400.0,,4.0,4.0,4,1.0,False,,,False
3,1,1950,1,4,4.0,4,14,yves-giraud-cabantous,talbot-lago,talbot-lago,...,,,3.0,6.0,6,2.0,False,,,False
4,1,1950,1,5,5.0,5,15,louis-rosier,talbot-lago,talbot-lago,...,,,2.0,9.0,9,4.0,False,,,False


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [40]:
races_race_results_df['driver_id'] = races_race_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_race_results_df['constructor_id'] = races_race_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [42]:
races_race_results_df = races_race_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'laps', 'time', 'timeMillis',
                                                'timePenalty', 'timePenaltyMillis', 'gap', 'gapMillis', 'gapLaps', 'interval','intervalMillis', 'reasonRetired','points','gridPositionNumber',
                                                'positionsGained','fastestLap','pitStops','driverOfTheDay','grandSlam']]
races_race_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,laps,time,timeMillis,timePenalty,timePenaltyMillis,...,interval,intervalMillis,reasonRetired,points,gridPositionNumber,positionsGained,fastestLap,pitStops,driverOfTheDay,grandSlam
0,1,655,4,1,1.0,70.0,2:13:23.600,8003600.0,,,...,,,,9.0,1.0,0.0,True,,,False
1,1,565,4,2,2.0,70.0,2:13:26.200,8006200.0,,,...,2.6,2600.0,,6.0,2.0,0.0,False,,,False
2,1,742,4,3,3.0,70.0,2:14:15.600,8055600.0,,,...,49.4,49400.0,,4.0,4.0,1.0,False,,,False
3,1,899,162,4,4.0,68.0,,,,,...,,,,3.0,6.0,2.0,False,,,False
4,1,558,162,5,5.0,68.0,,,,,...,,,,2.0,9.0,4.0,False,,,False


Перейменуємо стовпці

In [44]:
races_race_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'laps', 'time', 'time_millis', 
                                       'time_penalty', 'time_penalty_millis', 'gap', 'gap_millis', 'gap_laps', 'interval', 'interval_millis', 'reason_retired', 'points', 'grid_position_number', 
                                       'positions_gained', 'fastest_lap', 'pit_stops', 'driver_of_the_day', 'grand_slam']


Вносимо дані з датафрейму у таблицю сховища

In [47]:
insert_data(races_race_results_df, 'races_race_results')

## Races Pit Stops Table

In [4]:
races_pit_stops_df = pd.read_csv('../data/f1db-races-pit-stops.csv')
races_pit_stops_df.head()
races_pit_stops_df.info()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,tyreManufacturerId,stop,lap,time,timeMillis
0,550,1994,2,1,1,1,20,erik-comas,larrousse,ford,goodyear,1,1,49.111,49111.0
1,550,1994,2,2,2,2,3,ukyo-katayama,tyrrell,yamaha,goodyear,1,17,28.482,28482.0
2,550,1994,2,3,3,3,7,mika-hakkinen,mclaren,peugeot,goodyear,1,18,43.745,43745.0
3,550,1994,2,4,4,4,0,damon-hill,williams,renault,goodyear,1,18,21.992,21992.0
4,550,1994,2,5,5,5,24,michele-alboreto,minardi,ford,goodyear,1,19,27.693,27693.0


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [49]:
races_pit_stops_df['driver_id'] = races_pit_stops_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_pit_stops_df['constructor_id'] = races_pit_stops_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [51]:
races_pit_stops_df = races_pit_stops_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber','stop', 'lap', 'time', 'timeMillis']]
races_pit_stops_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,stop,lap,time,timeMillis
0,550,254,91,1,1,1,1,49.111,49111.0
1,550,870,173,2,2,1,17,28.482,28482.0
2,550,617,112,3,3,1,18,43.745,43745.0
3,550,179,180,4,4,1,18,21.992,21992.0
4,550,614,117,5,5,1,19,27.693,27693.0


Перейменуємо стовпці

In [52]:
races_pit_stops_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number','stop', 'lap', 'time', 'time_millis']


Вносимо дані з датафрейму у таблицю сховища

In [53]:
insert_data(races_pit_stops_df, 'races_pit_stops')

## Races Sprint Qualifying Results Table

In [5]:
races_sprint_qualifying_results_df = pd.read_csv('../data/f1db-races-sprint-qualifying-results.csv')
races_sprint_qualifying_results_df.head()
#races_sprint_qualifying_results_df.info(100)

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,...,q1Millis,q2,q2Millis,q3,q3Millis,gap,gapMillis,interval,intervalMillis,laps
0,1083,2023,4,1,1.0,1,16,charles-leclerc,ferrari,ferrari,...,102820.0,1:42.500,102500.0,1:41.697,101697.0,,,,,14
1,1083,2023,4,2,2.0,2,11,sergio-perez,red-bull,honda-rbpt,...,103858.0,1:42.925,102925.0,1:41.844,101844.0,0.147,147.0,0.147,147.0,15
2,1083,2023,4,3,3.0,3,1,max-verstappen,red-bull,honda-rbpt,...,103288.0,1:42.417,102417.0,1:41.987,101987.0,0.29,290.0,0.143,143.0,13
3,1083,2023,4,4,4.0,4,63,george-russell,mercedes,mercedes,...,103763.0,1:43.112,103112.0,1:42.252,102252.0,0.555,555.0,0.265,265.0,18
4,1083,2023,4,5,5.0,5,55,carlos-sainz-jr,ferrari,ferrari,...,103622.0,1:42.909,102909.0,1:42.287,102287.0,0.59,590.0,0.035,35.0,15


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [61]:
races_sprint_qualifying_results_df['driver_id'] = races_sprint_qualifying_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_sprint_qualifying_results_df['constructor_id'] = races_sprint_qualifying_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [64]:
races_sprint_qualifying_results_df = races_sprint_qualifying_results_df[['raceId','driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber',
                                                            'q1', 'q1Millis', 'q2', 'q2Millis', 'q3', 'q3Millis', 'gap', 'gapMillis', 'interval',
                                                             'intervalMillis', 'laps']]
races_sprint_qualifying_results_df.head()
races_sprint_qualifying_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,q1,q1Millis,q2,q2Millis,q3,q3Millis,gap,gapMillis,interval,intervalMillis,laps
0,1083,143,57,1,1.0,1:42.820,102820.0,1:42.500,102500.0,1:41.697,101697.0,,,,,14
1,1083,812,141,2,2.0,1:43.858,103858.0,1:42.925,102925.0,1:41.844,101844.0,0.147,147.0,0.147,147.0,15
2,1083,606,141,3,3.0,1:43.288,103288.0,1:42.417,102417.0,1:41.987,101987.0,0.29,290.0,0.143,143.0,13
3,1083,312,113,4,4.0,1:43.763,103763.0,1:43.112,103112.0,1:42.252,102252.0,0.555,555.0,0.265,265.0,18
4,1083,137,57,5,5.0,1:43.622,103622.0,1:42.909,102909.0,1:42.287,102287.0,0.59,590.0,0.035,35.0,15


Перейменуємо стовпці

In [66]:
races_sprint_qualifying_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number',
                            'q1', 'q1_millis', 'q2', 'q2_millis', 'q3', 'q3_millis', 'gap', 'gap_millis', 'interval',
                            'interval_millis', 'laps']



Вносимо дані з датафрейму у таблицю сховища

In [67]:
insert_data(races_sprint_qualifying_results_df, 'races_sprint_qualifying_results')

## Races Sprint Race Results Table

In [6]:
races_sprint_race_results_df = pd.read_csv('../data/f1db-races-sprint-race-results.csv')
races_sprint_race_results_df.head()
#races_sprint_race_results_df.info(100)

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,...,intervalMillis,reasonRetired,points,gridPositionNumber,gridPositionText,positionsGained,fastestLap,pitStops,driverOfTheDay,grandSlam
0,1045,2021,10,1,1.0,1,33,max-verstappen,red-bull,honda,...,,,3.0,2.0,2,1.0,False,,False,False
1,1045,2021,10,2,2.0,2,44,lewis-hamilton,mercedes,mercedes,...,1430.0,,2.0,1.0,1,-1.0,False,,False,False
2,1045,2021,10,3,3.0,3,77,valtteri-bottas,mercedes,mercedes,...,6072.0,,1.0,3.0,3,0.0,False,,False,False
3,1045,2021,10,4,4.0,4,16,charles-leclerc,ferrari,ferrari,...,3776.0,,,4.0,4,0.0,False,,False,False
4,1045,2021,10,5,5.0,5,4,lando-norris,mclaren,mercedes,...,12833.0,,,6.0,6,1.0,False,,False,False


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [69]:
races_sprint_race_results_df['driver_id'] = races_sprint_race_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_sprint_race_results_df['constructor_id'] = races_sprint_race_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [71]:
races_sprint_race_results_df = races_sprint_race_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'laps',
                                                              'time', 'timeMillis', 'timePenalty', 'timePenaltyMillis', 'gap', 'gapMillis', 'gapLaps', 'interval',
                                                                'intervalMillis', 'reasonRetired', 'points', 'gridPositionNumber', 'positionsGained',
                                                                  'fastestLap', 'pitStops']]
races_sprint_race_results_df.head()
races_sprint_race_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,laps,time,timeMillis,timePenalty,timePenaltyMillis,...,gapMillis,gapLaps,interval,intervalMillis,reasonRetired,points,gridPositionNumber,positionsGained,fastestLap,pitStops
0,1045,606,141,1,1.0,17.0,25:38.426,1538426.0,,,...,,,,,,3.0,2.0,1.0,False,
1,1045,551,113,2,2.0,17.0,25:39.856,1539856.0,,,...,1430.0,,1.43,1430.0,,2.0,1.0,-1.0,False,
2,1045,872,113,3,3.0,17.0,25:45.928,1545928.0,,,...,7502.0,,6.072,6072.0,,1.0,3.0,0.0,False,
3,1045,143,57,4,4.0,17.0,25:49.704,1549704.0,,,...,11278.0,,3.776,3776.0,,,4.0,0.0,False,
4,1045,539,112,5,5.0,17.0,26:02.537,1562537.0,,,...,24111.0,,12.833,12833.0,,,6.0,1.0,False,


Перейменуємо стовпці

In [72]:
races_sprint_race_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'laps',
                                         'time', 'time_millis', 'time_penalty', 'time_penalty_millis', 'gap', 'gap_millis', 'gap_laps', 'interval',
                                           'interval_millis', 'reason_retired', 'points', 'grid_position_number', 'positions_gained', 'fastest_lap', 'pit_stops']



Вносимо дані з датафрейму у таблицю сховища

In [73]:
insert_data(races_sprint_race_results_df, 'races_sprint_race_results')

## Races Constructor Standings Table

In [7]:
races_constructor_standings_df = pd.read_csv('../data/f1db-races-constructor-standings.csv')
races_constructor_standings_df.head()
#races_constructor_standings_df.info()

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,constructorId,engineManufacturerId,points,positionsGained
0,65,1958,1,1,1.0,1,cooper,climax,8.0,
1,65,1958,1,2,2.0,2,ferrari,ferrari,6.0,
2,65,1958,1,3,3.0,3,maserati,maserati,3.0,
3,66,1958,2,1,1.0,1,cooper,climax,16.0,0.0
4,66,1958,2,2,2.0,2,ferrari,ferrari,12.0,0.0


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [None]:
races_constructor_standings_df['constructor_id'] = races_constructor_standings_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [None]:
races_constructor_standings_df = races_constructor_standings_df[['raceId', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'points', 'positionsGained']]
races_constructor_standings_df['positionNumber'] = races_constructor_standings_df['positionNumber'].astype('Int64')
races_constructor_standings_df.head()

Unnamed: 0,raceId,constructor_id,positionDisplayOrder,positionNumber,points,positionsGained
0,65,38,1,1,8.0,
1,65,57,2,2,6.0,
2,65,109,3,3,3.0,
3,66,38,1,1,16.0,0.0
4,66,57,2,2,12.0,0.0


Перейменуємо стовпці

In [None]:
races_constructor_standings_df.columns = ['race_id', 'constructor_id', 'position_display_order', 'position_number', 'points', 'positions_gained']

Вносимо дані з датафрейму у таблицю сховища

In [None]:
insert_data(races_constructor_standings_df, 'races_constructor_standings')

## Races Driver Of the Day Results Table

In [8]:
races_dod_results_df = pd.read_csv('../data/f1db-races-driver-of-the-day-results.csv')
races_dod_results_df.head()
#races_dod_results_df.info(100)

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverNumber,driverId,constructorId,engineManufacturerId,tyreManufacturerId,percentage
0,936,2016,1,1,1,1,8,romain-grosjean,haas,ferrari,pirelli,
1,937,2016,2,1,1,1,8,romain-grosjean,haas,ferrari,pirelli,
2,938,2016,3,1,1,1,26,daniil-kvyat,red-bull,tag-heuer,pirelli,
3,939,2016,4,1,1,1,20,kevin-magnussen,renault,renault,pirelli,
4,940,2016,5,1,1,1,33,max-verstappen,red-bull,tag-heuer,pirelli,


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [86]:
races_dod_results_df['driver_id'] = races_dod_results_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
races_dod_results_df['constructor_id'] = races_dod_results_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [87]:
races_dod_results_df = races_dod_results_df[['raceId', 'driver_id', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'percentage']]
races_dod_results_df.head()

Unnamed: 0,raceId,driver_id,constructor_id,positionDisplayOrder,positionNumber,percentage
0,936,787,70,1,1,
1,937,787,70,1,1,
2,938,184,141,1,1,
3,939,529,142,1,1,
4,940,606,141,1,1,


Перейменуємо стовпці

In [88]:
races_dod_results_df.columns = ['race_id', 'driver_id', 'constructor_id', 'position_display_order', 'position_number', 'percentage']

Вносимо дані з датафрейму у таблицю сховища

In [89]:
insert_data(races_dod_results_df, 'races_dod_results')

## Races Driver Standings Table

In [9]:
races_driver_standings_df = pd.read_csv('../data/f1db-races-driver-standings.csv')
races_driver_standings_df.head()
#races_driver_standings_df.info(100)

Unnamed: 0,raceId,year,round,positionDisplayOrder,positionNumber,positionText,driverId,points,positionsGained
0,1,1950,1,1,1.0,1,nino-farina,9.0,
1,1,1950,1,2,2.0,2,luigi-fagioli,6.0,
2,1,1950,1,3,3.0,3,reg-parnell,4.0,
3,1,1950,1,4,4.0,4,yves-giraud-cabantous,3.0,
4,1,1950,1,5,5.0,5,louis-rosier,2.0,


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [91]:
races_driver_standings_df['driver_id'] = races_driver_standings_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [92]:
races_driver_standings_df = races_driver_standings_df[['raceId', 'driver_id', 'positionDisplayOrder', 'positionNumber', 'points','positionsGained']]
races_driver_standings_df.head()

Unnamed: 0,raceId,driver_id,positionDisplayOrder,positionNumber,points,positionsGained
0,1,655,1,1.0,9.0,
1,1,565,2,2.0,6.0,
2,1,742,3,3.0,4.0,
3,1,899,4,4.0,3.0,
4,1,558,5,5.0,2.0,


Перейменуємо стовпці

In [94]:
races_driver_standings_df.columns = ['race_id', 'driver_id', 'position_display_order', 'position_number', 'points','positions_gained']

Вносимо дані з датафрейму у таблицю сховища

In [95]:
insert_data(races_driver_standings_df, 'races_driver_standings')

## Seasons Constructor Standings Table

In [17]:
seasons_constructor_standings_df = pd.read_csv('../data/f1db-seasons-constructor-standings.csv')
seasons_constructor_standings_df.head()


Unnamed: 0,year,positionDisplayOrder,positionNumber,positionText,constructorId,engineManufacturerId,points
0,1958,1,1.0,1,vanwall,vanwall,48.0
1,1958,2,2.0,2,ferrari,ferrari,40.0
2,1958,3,3.0,3,cooper,climax,31.0
3,1958,4,4.0,4,brm,brm,18.0
4,1958,5,5.0,5,maserati,maserati,6.0


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [18]:
seasons_constructor_standings_df['constructor_id'] = seasons_constructor_standings_df.merge(constructors_id_df, left_on='constructorId', right_on='id', how='left')['dbId']
seasons_constructor_standings_df['year'] = seasons_constructor_standings_df.merge(seasons_id_df, left_on='year', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [19]:
seasons_constructor_standings_df = seasons_constructor_standings_df[['year', 'constructor_id', 'positionDisplayOrder', 'positionNumber', 'points']]

Перейменуємо стовпці

In [20]:
seasons_constructor_standings_df.columns = ['season_id', 'constructor_id', 'position_display_order', 'position_number', 'points']
seasons_constructor_standings_df.head()

Unnamed: 0,season_id,constructor_id,position_display_order,position_number,points
0,9,174,1,1.0,48.0
1,9,57,2,2.0,40.0
2,9,38,3,3.0,31.0
3,9,28,4,4.0,18.0
4,9,109,5,5.0,6.0


Вносимо дані з датафрейму у таблицю сховища

In [105]:
insert_data(seasons_constructor_standings_df, 'seasons_constructor_standings')

## Seasons Driver Standings Table

In [21]:
seasons_driver_standings_df = pd.read_csv('../data/f1db-seasons-driver-standings.csv')
seasons_driver_standings_df.head()
#seasons_driver_standings_df.info(100)

Unnamed: 0,year,positionDisplayOrder,positionNumber,positionText,driverId,points
0,1950,1,1.0,1,nino-farina,30.0
1,1950,2,2.0,2,juan-manuel-fangio,27.0
2,1950,3,3.0,3,luigi-fagioli,24.0
3,1950,4,4.0,4,louis-rosier,13.0
4,1950,5,5.0,5,alberto-ascari,11.0


Замінимо ідентифікатори датасету на ті, що згенерувало сховище (driver_id, constructor_id)

In [22]:
seasons_driver_standings_df['driver_id'] = seasons_driver_standings_df.merge(drivers_id_df, left_on='driverId', right_on='id', how='left')['dbId']
seasons_driver_standings_df['year'] = seasons_driver_standings_df.merge(seasons_id_df, left_on='year', right_on='id', how='left')['dbId']

Приводимо дані у потрібний нам формат для сховища

In [23]:
seasons_driver_standings_df = seasons_driver_standings_df[['year', 'driver_id', 'positionDisplayOrder', 'positionNumber', 'points']]

seasons_driver_standings_df.head()

Unnamed: 0,year,driver_id,positionDisplayOrder,positionNumber,points
0,1,655,1,1.0,30.0
1,1,505,2,2.0,27.0
2,1,565,3,3.0,24.0
3,1,558,4,4.0,13.0
4,1,18,5,5.0,11.0


Перейменуємо стовпці

In [24]:
seasons_driver_standings_df.columns = ['season_id', 'driver_id', 'position_display_order', 'position_number', 'points']
seasons_driver_standings_df.head()


Unnamed: 0,season_id,driver_id,position_display_order,position_number,points
0,1,655,1,1.0,30.0
1,1,505,2,2.0,27.0
2,1,565,3,3.0,24.0
3,1,558,4,4.0,13.0
4,1,18,5,5.0,11.0


Вносимо дані з датафрейму у таблицю сховища

In [119]:
insert_data(seasons_driver_standings_df, 'seasons_driver_standings')