# Імпортування даних у таблиці вимірів сховища даних

Імпортуємо бібліотеки

In [1]:
import pandas as pd
from dotenv import dotenv_values
from sqlalchemy import create_engine, text
import os

Завантажуємо змінні оточення з .env файлу для з'єднання зі сховищем

In [2]:
config = dotenv_values()

DB_NAME = config.get('DB_NAME')
DB_USER = config.get('DB_USER')
DB_HOST = config.get('DB_HOST')
DB_PASSWORD = config.get('DB_PASSWORD')
DB_PORT = config.get('DB_PORT')

З'єднуємося з базою даних

In [3]:
engine = create_engine(f'postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')

## Countries Table

Імпортуємо дані з датасету

In [4]:
countries_df = pd.read_csv('../data/f1db-countries.csv')
countries_df.head()

Unnamed: 0,id,alpha2Code,alpha3Code,name,demonym,continentId
0,afghanistan,AF,AFG,Afghanistan,Afghan,asia
1,aland-islands,AX,ALA,Åland Islands,Ålandic,europe
2,albania,AL,ALB,Albania,Albanian,europe
3,algeria,DZ,DZA,Algeria,Algerian,africa
4,american-samoa,AS,ASM,American Samoa,Samoan,australia


Приводимо дані у потрібний нам формат для сховища

In [5]:
countries_df = countries_df[['alpha3Code', 'name', 'demonym', 'id']]
countries_df.head()

Unnamed: 0,alpha3Code,name,demonym,id
0,AFG,Afghanistan,Afghan,afghanistan
1,ALA,Åland Islands,Ålandic,aland-islands
2,ALB,Albania,Albanian,albania
3,DZA,Algeria,Algerian,algeria
4,ASM,American Samoa,Samoan,american-samoa


Перейменуємо стовпці

In [6]:
countries_df.columns = ['alpha3_code', 'name', 'demonym', 'id']
countries_df.head()

Unnamed: 0,alpha3_code,name,demonym,id
0,AFG,Afghanistan,Afghan,afghanistan
1,ALA,Åland Islands,Ålandic,aland-islands
2,ALB,Albania,Albanian,albania
3,DZA,Algeria,Algerian,algeria
4,ASM,American Samoa,Samoan,american-samoa


Створимо функцію, яка внесе потрібні нам дані у сховище та поверне згенеровані ідентифікатори для кожного рядка

In [7]:
def insert_data(df, table):
    with engine.connect() as connection:
        result = connection.execute(text(f"SELECT COUNT(*) FROM {table}"))
        count = result.scalar()

        if count == 0:
            df.to_sql(table, engine, if_exists='append', index=False)

        result = connection.execute(text(f"SELECT id FROM {table}"))
        generated_ids = [row[0] for row in result]
        
    return generated_ids

Вносимо дані з датафрейму у таблицю країн, функція повертає датафрейм з ідентифікаторами, які сховище присвоїло рядкам. Ці ідентифікатори додаємо до датафрейму для того, аби в наступних датафреймах замінити їх.

In [8]:
countries_df['dbId'] = insert_data(countries_df[['alpha3_code', 'name', 'demonym']], 'countries')
countries_df.head()

Unnamed: 0,alpha3_code,name,demonym,id,dbId
0,AFG,Afghanistan,Afghan,afghanistan,1
1,ALA,Åland Islands,Ålandic,aland-islands,2
2,ALB,Albania,Albanian,albania,3
3,DZA,Algeria,Algerian,algeria,4
4,ASM,American Samoa,Samoan,american-samoa,5


## Circuits Table

Імпортуємо дані з датасету

In [9]:
circuits_df = pd.read_csv('../data/f1db-circuits.csv')
circuits_df.head()

Unnamed: 0,id,name,fullName,previousNames,type,placeName,countryId,latitude,longitude,totalRacesHeld
0,adelaide,Adelaide,Adelaide Street Circuit,,STREET,Adelaide,australia,-34.927222,138.617222,11
1,aida,Aida,Okayama International Circuit,TI Circuit Aida,RACE,Aida,japan,34.915,134.221111,2
2,ain-diab,Ain-Diab,Ain-Diab Circuit,,ROAD,Casablanca,morocco,33.578611,-7.6875,1
3,aintree,Aintree,Aintree Motor Racing Circuit,,ROAD,Aintree,united-kingdom,53.476944,-2.940556,5
4,anderstorp,Anderstorp Raceway,Anderstorp Raceway,Scandinavian Raceway,RACE,Anderstorp,sweden,57.264167,13.601389,6


Замінимо ідентифікатори країн у таблиці на ті, що згенерувало сховище.

In [10]:
circuits_df['countryId'] = circuits_df.merge(countries_df[['id', 'dbId']], left_on='countryId', right_on='id', how='left')['dbId']
circuits_df.head()

Unnamed: 0,id,name,fullName,previousNames,type,placeName,countryId,latitude,longitude,totalRacesHeld
0,adelaide,Adelaide,Adelaide Street Circuit,,STREET,Adelaide,14,-34.927222,138.617222,11
1,aida,Aida,Okayama International Circuit,TI Circuit Aida,RACE,Aida,113,34.915,134.221111,2
2,ain-diab,Ain-Diab,Ain-Diab Circuit,,ROAD,Casablanca,149,33.578611,-7.6875,1
3,aintree,Aintree,Aintree Motor Racing Circuit,,ROAD,Aintree,235,53.476944,-2.940556,5
4,anderstorp,Anderstorp Raceway,Anderstorp Raceway,Scandinavian Raceway,RACE,Anderstorp,216,57.264167,13.601389,6


Приводимо дані у потрібний нам формат для сховища

In [11]:
circuits_df = circuits_df[['name', 'fullName', 'type', 'countryId', 'latitude', 'longitude', 'id']]
circuits_df.head()

Unnamed: 0,name,fullName,type,countryId,latitude,longitude,id
0,Adelaide,Adelaide Street Circuit,STREET,14,-34.927222,138.617222,adelaide
1,Aida,Okayama International Circuit,RACE,113,34.915,134.221111,aida
2,Ain-Diab,Ain-Diab Circuit,ROAD,149,33.578611,-7.6875,ain-diab
3,Aintree,Aintree Motor Racing Circuit,ROAD,235,53.476944,-2.940556,aintree
4,Anderstorp Raceway,Anderstorp Raceway,RACE,216,57.264167,13.601389,anderstorp


Перейменуємо стовпці

In [12]:
circuits_df.columns = ['name', 'full_name', 'type', 'country_id', 'latitude', 'longitude', 'id']
circuits_df.head()

Unnamed: 0,name,full_name,type,country_id,latitude,longitude,id
0,Adelaide,Adelaide Street Circuit,STREET,14,-34.927222,138.617222,adelaide
1,Aida,Okayama International Circuit,RACE,113,34.915,134.221111,aida
2,Ain-Diab,Ain-Diab Circuit,ROAD,149,33.578611,-7.6875,ain-diab
3,Aintree,Aintree Motor Racing Circuit,ROAD,235,53.476944,-2.940556,aintree
4,Anderstorp Raceway,Anderstorp Raceway,RACE,216,57.264167,13.601389,anderstorp


Вносимо дані з датафрейму у таблицю гоночних трас

In [13]:
circuits_df['dbId'] = insert_data(circuits_df[['name', 'full_name', 'type', 'country_id', 'latitude', 'longitude']], 'circuits')
circuits_df.head()

Unnamed: 0,name,full_name,type,country_id,latitude,longitude,id,dbId
0,Adelaide,Adelaide Street Circuit,STREET,14,-34.927222,138.617222,adelaide,1
1,Aida,Okayama International Circuit,RACE,113,34.915,134.221111,aida,2
2,Ain-Diab,Ain-Diab Circuit,ROAD,149,33.578611,-7.6875,ain-diab,3
3,Aintree,Aintree Motor Racing Circuit,ROAD,235,53.476944,-2.940556,aintree,4
4,Anderstorp Raceway,Anderstorp Raceway,RACE,216,57.264167,13.601389,anderstorp,5


## Constructors Table

Імпортуємо дані з датасету

In [14]:
constructors_df = pd.read_csv('../data/f1db-constructors.csv')
constructors_df.head()

Unnamed: 0,id,name,fullName,countryId,bestChampionshipPosition,bestStartingGridPosition,bestRaceResult,totalChampionshipWins,totalRaceEntries,totalRaceStarts,totalRaceWins,total1And2Finishes,totalRaceLaps,totalPodiums,totalPodiumRaces,totalChampionshipPoints,totalPolePositions,totalFastestLaps
0,adams,Adams,Adams,united-states-of-america,,17.0,27.0,0,1,1,0,0,108,0,0,0.0,0,0
1,afm,AFM,Alex von Falkenhausen Motorenbau,germany,,9.0,9.0,0,4,4,0,0,98,0,0,0.0,0,0
2,ags,AGS,Automobiles Gonfaronnaises Sportives,france,11.0,10.0,6.0,0,80,48,0,0,2031,0,0,2.0,0,0
3,alfa-romeo,Alfa Romeo,Alfa Romeo Racing,italy,6.0,1.0,1.0,0,214,214,10,4,20608,26,18,199.0,12,16
4,alfa-special,Alfa Special,Alfa Special,south-africa,,16.0,10.0,0,2,2,0,0,132,0,0,0.0,0,0


Замінимо ідентифікатори країн у таблиці на ті, що згенерувало сховище.

In [15]:
constructors_df['countryId'] = constructors_df.merge(countries_df[['id', 'dbId']], left_on='countryId', right_on='id', how='left')['dbId']
constructors_df.head()

Unnamed: 0,id,name,fullName,countryId,bestChampionshipPosition,bestStartingGridPosition,bestRaceResult,totalChampionshipWins,totalRaceEntries,totalRaceStarts,totalRaceWins,total1And2Finishes,totalRaceLaps,totalPodiums,totalPodiumRaces,totalChampionshipPoints,totalPolePositions,totalFastestLaps
0,adams,Adams,Adams,237,,17.0,27.0,0,1,1,0,0,108,0,0,0.0,0,0
1,afm,AFM,Alex von Falkenhausen Motorenbau,84,,9.0,9.0,0,4,4,0,0,98,0,0,0.0,0,0
2,ags,AGS,Automobiles Gonfaronnaises Sportives,77,11.0,10.0,6.0,0,80,48,0,0,2031,0,0,2.0,0,0
3,alfa-romeo,Alfa Romeo,Alfa Romeo Racing,110,6.0,1.0,1.0,0,214,214,10,4,20608,26,18,199.0,12,16
4,alfa-special,Alfa Special,Alfa Special,206,,16.0,10.0,0,2,2,0,0,132,0,0,0.0,0,0


Приводимо дані у потрібний нам формат для сховища

In [16]:
constructors_df = constructors_df[['name', 'fullName', 'countryId', 'id']]
constructors_df.head()

Unnamed: 0,name,fullName,countryId,id
0,Adams,Adams,237,adams
1,AFM,Alex von Falkenhausen Motorenbau,84,afm
2,AGS,Automobiles Gonfaronnaises Sportives,77,ags
3,Alfa Romeo,Alfa Romeo Racing,110,alfa-romeo
4,Alfa Special,Alfa Special,206,alfa-special


Перейменуємо стовпці

In [17]:
constructors_df.columns = ['name', 'full_name', 'country_id', 'id']
constructors_df.head()

Unnamed: 0,name,full_name,country_id,id
0,Adams,Adams,237,adams
1,AFM,Alex von Falkenhausen Motorenbau,84,afm
2,AGS,Automobiles Gonfaronnaises Sportives,77,ags
3,Alfa Romeo,Alfa Romeo Racing,110,alfa-romeo
4,Alfa Special,Alfa Special,206,alfa-special


Вносимо дані з датафрейму у таблицю команд

In [18]:
constructors_df['dbId'] = insert_data(constructors_df[['name', 'full_name', 'country_id']], 'constructors')
constructors_df.head()

Unnamed: 0,name,full_name,country_id,id,dbId
0,Adams,Adams,237,adams,1
1,AFM,Alex von Falkenhausen Motorenbau,84,afm,2
2,AGS,Automobiles Gonfaronnaises Sportives,77,ags,3
3,Alfa Romeo,Alfa Romeo Racing,110,alfa-romeo,4
4,Alfa Special,Alfa Special,206,alfa-special,5


## Drivers Table

Імпортуємо дані з датасету

In [19]:
drivers_df = pd.read_csv('../data/f1db-drivers.csv')
drivers_df.head()

Unnamed: 0,id,name,firstName,lastName,fullName,abbreviation,permanentNumber,gender,dateOfBirth,dateOfDeath,...,totalRaceStarts,totalRaceWins,totalRaceLaps,totalPodiums,totalPoints,totalChampionshipPoints,totalPolePositions,totalFastestLaps,totalDriverOfTheDay,totalGrandSlams
0,adderly-fong,Adderly Fong,Adderly,Fong,Adderly Fong Cheun-yue,FON,,MALE,1990-03-02,,...,0,0,0,0,0.0,0.0,0,0,0,0
1,adolf-brudes,Adolf Brudes,Adolf,Brudes,Adolf Brudes von Breslau,BRU,,MALE,1899-10-15,1986-11-05,...,1,0,5,0,0.0,0.0,0,0,0,0
2,adolfo-schwelm-cruz,Adolfo Schwelm Cruz,Adolfo,Schwelm Cruz,Adolfo Julio Carlos Schwelm Cruz,SCH,,MALE,1923-06-28,2012-02-10,...,1,0,20,0,0.0,0.0,0,0,0,0
3,adrian-campos,Adrián Campos,Adrián,Campos,Adrián Campos Suñer,CAM,,MALE,1960-06-17,2021-01-27,...,17,0,433,0,0.0,0.0,0,0,0,0
4,adrian-sutil,Adrian Sutil,Adrian,Sutil,Adrian Sutil,SUT,,MALE,1983-01-11,,...,128,0,6022,0,124.0,124.0,0,1,0,0


Замінимо ідентифікатори країн у таблиці на ті, що згенерувало сховище.

In [20]:
drivers_df['nationalityCountryId'] = drivers_df.merge(countries_df[['id', 'dbId']], left_on='nationalityCountryId', right_on='id', how='left')['dbId']
drivers_df[['nationalityCountryId']].head()

Unnamed: 0,nationalityCountryId
0,100
1,84
2,11
3,210
4,84


Приводимо дані у потрібний нам формат для сховища

In [21]:
drivers_df = drivers_df[['name', 'firstName', 'lastName', 'fullName', 'abbreviation', 'permanentNumber', 'gender', 'dateOfBirth', 'nationalityCountryId', 'id']]
drivers_df.head()

Unnamed: 0,name,firstName,lastName,fullName,abbreviation,permanentNumber,gender,dateOfBirth,nationalityCountryId,id
0,Adderly Fong,Adderly,Fong,Adderly Fong Cheun-yue,FON,,MALE,1990-03-02,100,adderly-fong
1,Adolf Brudes,Adolf,Brudes,Adolf Brudes von Breslau,BRU,,MALE,1899-10-15,84,adolf-brudes
2,Adolfo Schwelm Cruz,Adolfo,Schwelm Cruz,Adolfo Julio Carlos Schwelm Cruz,SCH,,MALE,1923-06-28,11,adolfo-schwelm-cruz
3,Adrián Campos,Adrián,Campos,Adrián Campos Suñer,CAM,,MALE,1960-06-17,210,adrian-campos
4,Adrian Sutil,Adrian,Sutil,Adrian Sutil,SUT,,MALE,1983-01-11,84,adrian-sutil


Перейменуємо стовпці

In [24]:
drivers_df.columns = ['name', 'first_name', 'last_name', 'full_name', 'abbreviation', 'permanent_number', 'gender', 'date_of_birth', 'nationality_country_id', 'id']
drivers_df.head()

Unnamed: 0,name,first_name,last_name,full_name,abbreviation,permanent_number,gender,date_of_birth,nationality_country_id,id
0,Adderly Fong,Adderly,Fong,Adderly Fong Cheun-yue,FON,,MALE,1990-03-02,100,adderly-fong
1,Adolf Brudes,Adolf,Brudes,Adolf Brudes von Breslau,BRU,,MALE,1899-10-15,84,adolf-brudes
2,Adolfo Schwelm Cruz,Adolfo,Schwelm Cruz,Adolfo Julio Carlos Schwelm Cruz,SCH,,MALE,1923-06-28,11,adolfo-schwelm-cruz
3,Adrián Campos,Adrián,Campos,Adrián Campos Suñer,CAM,,MALE,1960-06-17,210,adrian-campos
4,Adrian Sutil,Adrian,Sutil,Adrian Sutil,SUT,,MALE,1983-01-11,84,adrian-sutil


Вносимо дані з датафрейму у таблицю гонщиків

In [25]:
drivers_df['dbId'] = insert_data(drivers_df[['name', 'first_name', 'last_name', 'full_name', 'abbreviation', 'permanent_number', 'gender', 'date_of_birth', 'nationality_country_id']], 'drivers')
drivers_df.head()

Unnamed: 0,name,first_name,last_name,full_name,abbreviation,permanent_number,gender,date_of_birth,nationality_country_id,id,dbId
0,Adderly Fong,Adderly,Fong,Adderly Fong Cheun-yue,FON,,MALE,1990-03-02,100,adderly-fong,1
1,Adolf Brudes,Adolf,Brudes,Adolf Brudes von Breslau,BRU,,MALE,1899-10-15,84,adolf-brudes,2
2,Adolfo Schwelm Cruz,Adolfo,Schwelm Cruz,Adolfo Julio Carlos Schwelm Cruz,SCH,,MALE,1923-06-28,11,adolfo-schwelm-cruz,3
3,Adrián Campos,Adrián,Campos,Adrián Campos Suñer,CAM,,MALE,1960-06-17,210,adrian-campos,4
4,Adrian Sutil,Adrian,Sutil,Adrian Sutil,SUT,,MALE,1983-01-11,84,adrian-sutil,5


## Grand Prix Table

Імпортуємо дані з датасету

In [26]:
grand_prix_df = pd.read_csv('../data/f1db-grands-prix.csv')
grand_prix_df.head()

Unnamed: 0,id,name,fullName,shortName,abbreviation,countryId,totalRacesHeld
0,70th-anniversary,70th Anniversary,70th Anniversary Grand Prix,70th Anniversary GP,70A,united-kingdom,1
1,abu-dhabi,Abu Dhabi,Abu Dhabi Grand Prix,Abu Dhabi GP,ABD,united-arab-emirates,15
2,argentina,Argentina,Argentine Grand Prix,Argentine GP,ARG,argentina,20
3,australia,Australia,Australian Grand Prix,Australian GP,AUS,australia,38
4,austria,Austria,Austrian Grand Prix,Austrian GP,AUT,austria,36


Замінимо ідентифікатори країн у таблиці на ті, що згенерувало сховище.

In [27]:
grand_prix_df['countryId'] = grand_prix_df.merge(countries_df[['id', 'dbId']], left_on='countryId', right_on='id', how='left')['dbId'].astype('Int64')
grand_prix_df.head()

Unnamed: 0,id,name,fullName,shortName,abbreviation,countryId,totalRacesHeld
0,70th-anniversary,70th Anniversary,70th Anniversary Grand Prix,70th Anniversary GP,70A,235,1
1,abu-dhabi,Abu Dhabi,Abu Dhabi Grand Prix,Abu Dhabi GP,ABD,234,15
2,argentina,Argentina,Argentine Grand Prix,Argentine GP,ARG,11,20
3,australia,Australia,Australian Grand Prix,Australian GP,AUS,14,38
4,austria,Austria,Austrian Grand Prix,Austrian GP,AUT,15,36


Приводимо дані у потрібний нам формат для сховища

In [28]:
grand_prix_df = grand_prix_df[['name', 'fullName', 'shortName', 'abbreviation', 'countryId', 'id']]
grand_prix_df.head()

Unnamed: 0,name,fullName,shortName,abbreviation,countryId,id
0,70th Anniversary,70th Anniversary Grand Prix,70th Anniversary GP,70A,235,70th-anniversary
1,Abu Dhabi,Abu Dhabi Grand Prix,Abu Dhabi GP,ABD,234,abu-dhabi
2,Argentina,Argentine Grand Prix,Argentine GP,ARG,11,argentina
3,Australia,Australian Grand Prix,Australian GP,AUS,14,australia
4,Austria,Austrian Grand Prix,Austrian GP,AUT,15,austria


Перейменуємо стовпці

In [29]:
grand_prix_df.columns = ['name', 'full_name', 'short_name', 'abbreviation', 'country_id', 'id']
grand_prix_df.head()

Unnamed: 0,name,full_name,short_name,abbreviation,country_id,id
0,70th Anniversary,70th Anniversary Grand Prix,70th Anniversary GP,70A,235,70th-anniversary
1,Abu Dhabi,Abu Dhabi Grand Prix,Abu Dhabi GP,ABD,234,abu-dhabi
2,Argentina,Argentine Grand Prix,Argentine GP,ARG,11,argentina
3,Australia,Australian Grand Prix,Australian GP,AUS,14,australia
4,Austria,Austrian Grand Prix,Austrian GP,AUT,15,austria


Вносимо дані з датафрейму у таблицю гран-прі

In [30]:
grand_prix_df['dbId'] = insert_data(grand_prix_df[['name', 'full_name', 'short_name', 'abbreviation', 'country_id']], 'grand_prix')
grand_prix_df.head()

Unnamed: 0,name,full_name,short_name,abbreviation,country_id,id,dbId
0,70th Anniversary,70th Anniversary Grand Prix,70th Anniversary GP,70A,235,70th-anniversary,1
1,Abu Dhabi,Abu Dhabi Grand Prix,Abu Dhabi GP,ABD,234,abu-dhabi,2
2,Argentina,Argentine Grand Prix,Argentine GP,ARG,11,argentina,3
3,Australia,Australian Grand Prix,Australian GP,AUS,14,australia,4
4,Austria,Austrian Grand Prix,Austrian GP,AUT,15,austria,5


## Seasons Table

Імпортуємо дані з датасету

In [31]:
seasons_df = pd.read_csv('../data/f1db-seasons.csv')
seasons_df.head()

Unnamed: 0,year
0,1950
1,1951
2,1952
3,1953
4,1954


Вносимо дані з датафрейму у таблицю сезонів

In [32]:
seasons_df['dbId'] = insert_data(seasons_df, 'seasons')
seasons_df.head()

Unnamed: 0,year,dbId
0,1950,1
1,1951,2
2,1952,3
3,1953,4
4,1954,5


## Races Table

Імпортуємо дані з датасету

In [33]:
races_df = pd.read_csv('../data/f1db-races.csv')
races_df.head()

Unnamed: 0,id,year,round,date,time,grandPrixId,officialName,qualifyingFormat,sprintQualifyingFormat,circuitId,...,qualifying2Date,qualifying2Time,qualifyingDate,qualifyingTime,sprintQualifyingDate,sprintQualifyingTime,sprintRaceDate,sprintRaceTime,warmingUpDate,warmingUpTime
0,1,1950,1,1950-05-13,,great-britain,1950 RAC British Grand Prix,TWO_SESSION,,silverstone,...,,,,,,,,,,
1,2,1950,2,1950-05-21,,monaco,Grand Prix de Monaco 1950,TWO_SESSION,,monaco,...,,,,,,,,,,
2,3,1950,3,1950-05-30,,indianapolis,1950 Indianapolis 500,FOUR_LAPS,,indianapolis,...,,,,,,,,,,
3,4,1950,4,1950-06-04,,switzerland,Grosser Preis der Schweiz 1950,TWO_SESSION,,bremgarten,...,,,,,,,,,,
4,5,1950,5,1950-06-18,,belgium,1950 Belgian Grand Prix,TWO_SESSION,,spa-francorchamps,...,,,,,,,,,,


Замінимо ідентифікатори сезонів у таблиці на ті, що згенерувало сховище.

In [34]:
races_df['year'] = races_df.merge(seasons_df, on='year', how='left')['dbId'].astype('Int64')
races_df.head()

Unnamed: 0,id,year,round,date,time,grandPrixId,officialName,qualifyingFormat,sprintQualifyingFormat,circuitId,...,qualifying2Date,qualifying2Time,qualifyingDate,qualifyingTime,sprintQualifyingDate,sprintQualifyingTime,sprintRaceDate,sprintRaceTime,warmingUpDate,warmingUpTime
0,1,1,1,1950-05-13,,great-britain,1950 RAC British Grand Prix,TWO_SESSION,,silverstone,...,,,,,,,,,,
1,2,1,2,1950-05-21,,monaco,Grand Prix de Monaco 1950,TWO_SESSION,,monaco,...,,,,,,,,,,
2,3,1,3,1950-05-30,,indianapolis,1950 Indianapolis 500,FOUR_LAPS,,indianapolis,...,,,,,,,,,,
3,4,1,4,1950-06-04,,switzerland,Grosser Preis der Schweiz 1950,TWO_SESSION,,bremgarten,...,,,,,,,,,,
4,5,1,5,1950-06-18,,belgium,1950 Belgian Grand Prix,TWO_SESSION,,spa-francorchamps,...,,,,,,,,,,


Замінимо ідентифікатори гран-прі у таблиці на ті, що згенерувало сховище.

In [35]:
races_df['grandPrixId'] = races_df.merge(grand_prix_df[['id', 'dbId']], left_on='grandPrixId', right_on='id', how='left')['dbId'].astype('Int64')
races_df.head()

Unnamed: 0,id,year,round,date,time,grandPrixId,officialName,qualifyingFormat,sprintQualifyingFormat,circuitId,...,qualifying2Date,qualifying2Time,qualifyingDate,qualifyingTime,sprintQualifyingDate,sprintQualifyingTime,sprintRaceDate,sprintRaceTime,warmingUpDate,warmingUpTime
0,1,1,1,1950-05-13,,20,1950 RAC British Grand Prix,TWO_SESSION,,silverstone,...,,,,,,,,,,
1,2,1,2,1950-05-21,,32,Grand Prix de Monaco 1950,TWO_SESSION,,monaco,...,,,,,,,,,,
2,3,1,3,1950-05-30,,23,1950 Indianapolis 500,FOUR_LAPS,,indianapolis,...,,,,,,,,,,
3,4,1,4,1950-06-04,,48,Grosser Preis der Schweiz 1950,TWO_SESSION,,bremgarten,...,,,,,,,,,,
4,5,1,5,1950-06-18,,8,1950 Belgian Grand Prix,TWO_SESSION,,spa-francorchamps,...,,,,,,,,,,


Замінимо ідентифікатори гоночних трас у таблиці на ті, що згенерувало сховище.

In [36]:
races_df['circuitId'] = races_df.merge(circuits_df[['id', 'dbId']], left_on='circuitId', right_on='id', how='left')['dbId'].astype('Int64')
races_df.head()

Unnamed: 0,id,year,round,date,time,grandPrixId,officialName,qualifyingFormat,sprintQualifyingFormat,circuitId,...,qualifying2Date,qualifying2Time,qualifyingDate,qualifyingTime,sprintQualifyingDate,sprintQualifyingTime,sprintRaceDate,sprintRaceTime,warmingUpDate,warmingUpTime
0,1,1,1,1950-05-13,,20,1950 RAC British Grand Prix,TWO_SESSION,,66,...,,,,,,,,,,
1,2,1,2,1950-05-21,,32,Grand Prix de Monaco 1950,TWO_SESSION,,44,...,,,,,,,,,,
2,3,1,3,1950-05-30,,23,1950 Indianapolis 500,FOUR_LAPS,,28,...,,,,,,,,,,
3,4,1,4,1950-06-04,,48,Grosser Preis der Schweiz 1950,TWO_SESSION,,11,...,,,,,,,,,,
4,5,1,5,1950-06-18,,8,1950 Belgian Grand Prix,TWO_SESSION,,68,...,,,,,,,,,,


Приводимо дані у потрібний нам формат для сховища

In [37]:
races_df = races_df[['id', 'year', 'round', 'date', 'grandPrixId', 'officialName', 'circuitId', 'courseLength', 'laps', 'distance']]
races_df.head()

Unnamed: 0,id,year,round,date,grandPrixId,officialName,circuitId,courseLength,laps,distance
0,1,1,1,1950-05-13,20,1950 RAC British Grand Prix,66,4.649,70,325.43
1,2,1,2,1950-05-21,32,Grand Prix de Monaco 1950,44,3.181,100,318.1
2,3,1,3,1950-05-30,23,1950 Indianapolis 500,28,4.023,138,555.224
3,4,1,4,1950-06-04,48,Grosser Preis der Schweiz 1950,11,7.28,42,305.76
4,5,1,5,1950-06-18,8,1950 Belgian Grand Prix,68,14.12,35,494.2


Перейменуємо стовпці

In [38]:
races_df.columns = ['id', 'season_id', 'round', 'date', 'grand_prix_id', 'official_name', 'circuit_id', 'course_length', 'laps', 'distance']
races_df.head()

Unnamed: 0,id,season_id,round,date,grand_prix_id,official_name,circuit_id,course_length,laps,distance
0,1,1,1,1950-05-13,20,1950 RAC British Grand Prix,66,4.649,70,325.43
1,2,1,2,1950-05-21,32,Grand Prix de Monaco 1950,44,3.181,100,318.1
2,3,1,3,1950-05-30,23,1950 Indianapolis 500,28,4.023,138,555.224
3,4,1,4,1950-06-04,48,Grosser Preis der Schweiz 1950,11,7.28,42,305.76
4,5,1,5,1950-06-18,8,1950 Belgian Grand Prix,68,14.12,35,494.2


Вносимо дані з датафрейму у таблицю команд

In [39]:
insert_data(races_df, 'races')
races_df.head()

Unnamed: 0,id,season_id,round,date,grand_prix_id,official_name,circuit_id,course_length,laps,distance
0,1,1,1,1950-05-13,20,1950 RAC British Grand Prix,66,4.649,70,325.43
1,2,1,2,1950-05-21,32,Grand Prix de Monaco 1950,44,3.181,100,318.1
2,3,1,3,1950-05-30,23,1950 Indianapolis 500,28,4.023,138,555.224
3,4,1,4,1950-06-04,48,Grosser Preis der Schweiz 1950,11,7.28,42,305.76
4,5,1,5,1950-06-18,8,1950 Belgian Grand Prix,68,14.12,35,494.2


## Зберігаємо дані про ідентифікатори в окремі файли для подальшого імпорту в сховище даних в таблиці фактів

In [40]:
columns = ['id', 'dbId']

seasons_df.columns = columns

folder = '../adapted_data/'

dfs_to_save = {
    'countries': countries_df,
    'circuits': circuits_df,
    'constructors': constructors_df,
    'drivers': drivers_df,
    'grand_prix': grand_prix_df,
    'seasons': seasons_df
}

for file_name, df in dfs_to_save.items():
    file_path = os.path.join(folder, f"{file_name}Id.csv")
    df[columns].to_csv(file_path, index=False)