In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import PreprocessingUtils as utils
from GeneralUtils import get_historical_weather

In [2]:
data = pd.read_parquet('../data/raw_data_delays.parquet')
gps = pd.read_csv('../data/station_gps.csv')

In [3]:
data = utils.prepare_raw_data(data)
data = pd.merge(data, gps, on='Stacja', how='left')

### station count
<ul>
    <li>full route station count</li>
    <li>station count on current station</li>
</ul>

In [4]:
data['station_count_on_curr_station'] = data.groupby(['pk', 'Relacja']).cumcount()
data['full_route_station_count'] = data.groupby(['pk', 'Relacja'])['Relacja'].transform('count')

### distances
<ul>
    <li>full route distance</li>
    <li>distance distance until current station</li>
    <li>distance from start station</li>
    <li>distance to final station</li>
    <li>distance distance from the nearest big city station</li>
</ul>

In [None]:
data = utils.count_distances(data, gps)

### date features and holidays

In [None]:
cols = ['arrival_on_time','departure_on_time']
for col in cols:
    data = utils.fix_dates(data, col)
    
data = utils.apply_date_features(data)

In [7]:
data

Unnamed: 0,pk,Relacja,Stacja,Opóźnienie przyjazdu,Opóźnienie odjazdu,arrival_on_time,departure_on_time,lat,lon,station_count_on_curr_station,...,weekday_Sunday,weekday_Thursday,weekday_Tuesday,weekday_Wednesday,hour_angle_sin,weekday_angle_sin,monthday_angle_sin,yearday_angle_sin,weekyear_angle_sin,month_angle_sin
0,1.0,Przeworsk - Rzeszów Główny,Przeworsk,0.0,0.0,2022-12-01 06:55:00,2022-12-01 06:55:00,50.067161,22.503232,0.0,...,,,,,1.000000,0.433884,0.207912,-0.493776,-0.464723,-2.449294e-16
1,1.0,Przeworsk - Rzeszów Główny,Grzęska,0.0,0.0,2022-12-01 06:58:30,2022-12-01 06:59:00,50.080037,22.452564,1.0,...,,,,,1.000000,0.433884,0.207912,-0.493776,-0.464723,-2.449294e-16
2,1.0,Przeworsk - Rzeszów Główny,Rogóżno koło Łańcuta,0.0,0.0,2022-12-01 07:03:00,2022-12-01 07:03:30,50.083573,22.362513,2.0,...,,,,,0.965926,0.433884,0.207912,-0.493776,-0.464723,-2.449294e-16
3,1.0,Przeworsk - Rzeszów Główny,Kosina,0.0,0.0,2022-12-01 07:06:30,2022-12-01 07:07:00,50.083570,22.322846,3.0,...,,,,,0.965926,0.433884,0.207912,-0.493776,-0.464723,-2.449294e-16
4,1.0,Przeworsk - Rzeszów Główny,Głuchów,0.0,0.0,2022-12-01 07:09:30,2022-12-01 07:10:00,50.083469,22.279077,4.0,...,,,,,0.965926,0.433884,0.207912,-0.493776,-0.464723,-2.449294e-16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138205,,,,,,NaT,NaT,,,,...,1.0,0.0,0.0,0.0,,,,,,
1138206,,,,,,NaT,NaT,,,,...,1.0,0.0,0.0,0.0,,,,,,
1138207,,,,,,NaT,NaT,,,,...,1.0,0.0,0.0,0.0,,,,,,
1138208,,,,,,NaT,NaT,,,,...,1.0,0.0,0.0,0.0,,,,,,


### longer stop duration

In [7]:
data['stop_duration'] = (data['departure_on_time'] - data['arrival_on_time']).dt.total_seconds()/60

In [10]:
data['']

Unnamed: 0,pk,Relacja,Stacja,Opóźnienie przyjazdu,Opóźnienie odjazdu,arrival_on_time,departure_on_time,lat,lon,station_count_on_curr_station,full_route_station_count,distance_to_near_city_station,near_city_station_name,distance_to_prev_station,distance_from_start,distance_to_final,full_route_distance,stop_duration
0,1,Przeworsk - Rzeszów Główny,Przeworsk,0,0,2022-12-01 06:55:00,2022-12-01 06:55:00,50.067161,22.503232,0,10,35.612095,Rzeszów Główny,0.000000,0.000000,36.484418,36.484418,0.0
1,1,Przeworsk - Rzeszów Główny,Grzęska,0,0,2022-12-01 06:58:30,2022-12-01 06:59:00,50.080037,22.452564,1,10,32.152599,Rzeszów Główny,3.889077,3.889077,31.639766,36.484418,0.5
2,1,Przeworsk - Rzeszów Główny,Rogóżno koło Łańcuta,0,0,2022-12-01 07:03:00,2022-12-01 07:03:30,50.083573,22.362513,2,10,25.854248,Rzeszów Główny,6.437439,10.326516,28.265814,36.484418,0.5
3,1,Przeworsk - Rzeszów Główny,Kosina,0,0,2022-12-01 07:06:30,2022-12-01 07:07:00,50.083570,22.322846,3,10,23.070959,Rzeszów Główny,2.830260,13.156776,22.842960,36.484418,0.5
4,1,Przeworsk - Rzeszów Główny,Głuchów,0,0,2022-12-01 07:09:30,2022-12-01 07:10:00,50.083469,22.279077,4,10,20.013113,Rzeszów Główny,3.122964,16.279739,20.043487,36.484418,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138205,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Kraków Bieżanów,0,0,2023-04-23 08:52:12,2023-04-23 08:52:42,50.021694,20.029257,10,15,76.751356,Katowice,2.235773,19.677470,6.634256,24.366635,0.5
1138206,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Kraków Bieżanów Drożdżownia,0,0,2023-04-23 08:55:00,2023-04-23 08:55:30,50.010696,20.035373,11,15,77.593166,Katowice,1.298655,20.976125,4.328076,24.366635,0.5
1138207,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Wieliczka Bogucice,0,0,2023-04-23 08:57:24,2023-04-23 08:57:54,49.998474,20.036696,12,15,78.180632,Katowice,1.362311,22.338436,3.617016,24.366635,0.5
1138208,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Wieliczka Park,0,0,2023-04-23 09:00:06,2023-04-23 09:00:36,49.989105,20.049334,13,15,79.411243,Katowice,1.378941,23.717377,1.902019,24.366635,0.5


### date features and holidays

### weather features

In [11]:
data

Unnamed: 0,pk,Relacja,Stacja,Opóźnienie przyjazdu,Opóźnienie odjazdu,arrival_on_time,departure_on_time,lat,lon,station_count_on_curr_station,full_route_station_count,distance_to_near_city_station,near_city_station_name,distance_to_prev_station,distance_from_start,distance_to_final,full_route_distance,stop_duration
0,1,Przeworsk - Rzeszów Główny,Przeworsk,0,0,2022-12-01 06:55:00,2022-12-01 06:55:00,50.067161,22.503232,0,10,35.612095,Rzeszów Główny,0.000000,0.000000,36.484418,36.484418,0.0
1,1,Przeworsk - Rzeszów Główny,Grzęska,0,0,2022-12-01 06:58:30,2022-12-01 06:59:00,50.080037,22.452564,1,10,32.152599,Rzeszów Główny,3.889077,3.889077,31.639766,36.484418,0.5
2,1,Przeworsk - Rzeszów Główny,Rogóżno koło Łańcuta,0,0,2022-12-01 07:03:00,2022-12-01 07:03:30,50.083573,22.362513,2,10,25.854248,Rzeszów Główny,6.437439,10.326516,28.265814,36.484418,0.5
3,1,Przeworsk - Rzeszów Główny,Kosina,0,0,2022-12-01 07:06:30,2022-12-01 07:07:00,50.083570,22.322846,3,10,23.070959,Rzeszów Główny,2.830260,13.156776,22.842960,36.484418,0.5
4,1,Przeworsk - Rzeszów Główny,Głuchów,0,0,2022-12-01 07:09:30,2022-12-01 07:10:00,50.083469,22.279077,4,10,20.013113,Rzeszów Główny,3.122964,16.279739,20.043487,36.484418,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1138205,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Kraków Bieżanów,0,0,2023-04-23 08:52:12,2023-04-23 08:52:42,50.021694,20.029257,10,15,76.751356,Katowice,2.235773,19.677470,6.634256,24.366635,0.5
1138206,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Kraków Bieżanów Drożdżownia,0,0,2023-04-23 08:55:00,2023-04-23 08:55:30,50.010696,20.035373,11,15,77.593166,Katowice,1.298655,20.976125,4.328076,24.366635,0.5
1138207,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Wieliczka Bogucice,0,0,2023-04-23 08:57:24,2023-04-23 08:57:54,49.998474,20.036696,12,15,78.180632,Katowice,1.362311,22.338436,3.617016,24.366635,0.5
1138208,52128,Kraków Lotnisko - Wieliczka Rynek-Kopalnia,Wieliczka Park,0,0,2023-04-23 09:00:06,2023-04-23 09:00:36,49.989105,20.049334,13,15,79.411243,Katowice,1.378941,23.717377,1.902019,24.366635,0.5


In [2]:
x = get_historical_weather(50.067161, 22.503232, '2022-12-01')

In [3]:
x

{'error': {'code': 2008, 'message': 'API key has been disabled.'}}