In [231]:
import pandas as pd

## Preprocess EV and RD Data and Merge Them
Read in the EV and RD data cleaned by Sherlock_IFSurface_DataClean.ipynb and Filter

In [232]:
ev = pd.read_csv('Sherlock_data/EV.csv')
rd = pd.read_csv('Sherlock_data/RD.csv')
print(ev.shape, rd.shape)

(608047, 30) (24381, 31)


### EV data: Filter
1. Limit events to only Initialization, Takeoff, Landing, and Stop events
2. Remove UNKN callsigns (AcId)
3. Remove records without characters in aircraft callsigns (AcId) as they do not belong to any particular airlines

In [233]:
ev['EvType'].value_counts(dropna=False)

EV_XING    181126
EV_MOF     140849
EV_USER    109273
EV_INIT     68487
EV_STOP     68487
EV_TOF      12400
EV_LND      12379
EV_ON        7536
EV_OFF       7510
Name: EvType, dtype: int64

In [234]:
# Limit EV data to the following events only
events = ['EV_INIT', 'EV_TOF', 'EV_LND', 'EV_STOP']
fil_ev = ev.loc[ev['EvType'].isin(events)]
print(ev.shape, fil_ev.shape)

(608047, 30) (161753, 30)


In [235]:
# Remove records with UNKN callsigns
fil2_ev = fil_ev.loc[fil_ev['AcId'] != 'UNKN']
print(fil2_ev.shape)

(147916, 30)


In [236]:
# Remove records without airline characters in callsigns
fil3_ev = fil2_ev[fil2_ev['AcId'].str.contains('[A-Za-z]')]
print(fil3_ev.shape)

(131812, 30)


### EV Data: Find/Create identifying feature for each flight in the dataset

In [237]:
fil3_ev['Msn'].value_counts()

37214    24
37240    23
38568    23
37344    23
37192    23
         ..
60324     6
86158     6
36404     6
44637     6
45791     6
Name: Msn, Length: 16134, dtype: int64

In [238]:
# set the max columns to none
pd.set_option('display.max_columns', None)
fil3_ev.loc[fil3_ev['Msn'] == 38422]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo
22845,33648,LAX+ASDEX_20220903_193311_33648,09/03/2022,19:29:34,1662163000.0,1662233000.0,1662234000.0,70174.0,70391.0,217.0,38422,N345CS,C172,70174.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.90246,-118.39629,61.3,320,144.4,360,0.0,0.0,0.0,0.0,0.0,
22846,33648,LAX+ASDEX_20220903_193311_33648,09/03/2022,19:29:34,1662163000.0,1662233000.0,1662234000.0,70174.0,70391.0,217.0,38422,N345CS,C172,70174.0,EV_INIT,SUA,OUTSIDE,OUTSIDE,33.90246,-118.39629,61.3,320,144.4,360,0.0,0.0,0.0,0.0,0.0,
22847,33648,LAX+ASDEX_20220903_193311_33648,09/03/2022,19:29:34,1662163000.0,1662233000.0,1662234000.0,70174.0,70391.0,217.0,38422,N345CS,C172,70174.0,EV_INIT,V02,OUTSIDE,OUTSIDE,33.90246,-118.39629,61.3,320,144.4,360,0.0,0.0,0.0,0.0,0.0,
22850,33648,LAX+ASDEX_20220903_193311_33648,09/03/2022,19:29:34,1662163000.0,1662233000.0,1662234000.0,70174.0,70391.0,217.0,38422,N345CS,C172,70391.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.98757,-118.50199,61.3,296,136.6,0,7.536,7.734,0.199,217.0,0.0,
22851,33648,LAX+ASDEX_20220903_193311_33648,09/03/2022,19:29:34,1662163000.0,1662233000.0,1662234000.0,70174.0,70391.0,217.0,38422,N345CS,C172,70391.0,EV_STOP,SUA,OUTSIDE,OUTSIDE,33.98757,-118.50199,61.3,296,136.6,0,7.536,7.734,0.199,217.0,0.0,
22852,33648,LAX+ASDEX_20220903_193311_33648,09/03/2022,19:29:34,1662163000.0,1662233000.0,1662234000.0,70174.0,70391.0,217.0,38422,N345CS,C172,70391.0,EV_STOP,V02,OUTSIDE,OUTSIDE,33.98757,-118.50199,61.3,296,136.6,0,7.536,7.734,0.199,217.0,0.0,
94283,33534,LAX+ASDEX_20221002_044331_33534,10/02/2022,04:38:14,1664669000.0,1664685000.0,1664686000.0,16694.0,17011.0,317.0,38422,SWA220,B38M,16694.0,EV_TOF,APT,,LAX,33.95667,-118.35739,9.8,262,149.8,-816,0.0,0.0,0.0,0.0,1.544,
94285,33534,LAX+ASDEX_20221002_044331_33534,10/02/2022,04:38:14,1664669000.0,1664685000.0,1664686000.0,16694.0,17011.0,317.0,38422,SWA220,B38M,16694.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.95667,-118.35739,9.8,262,149.8,-816,0.0,0.0,0.0,0.0,0.0,
94286,33534,LAX+ASDEX_20221002_044331_33534,10/02/2022,04:38:14,1664669000.0,1664685000.0,1664686000.0,16694.0,17011.0,317.0,38422,SWA220,B38M,16694.0,EV_INIT,SUA,OUTSIDE,OUTSIDE,33.95667,-118.35739,9.8,262,149.8,-816,0.0,0.0,0.0,0.0,0.0,
94287,33534,LAX+ASDEX_20221002_044331_33534,10/02/2022,04:38:14,1664669000.0,1664685000.0,1664686000.0,16694.0,17011.0,317.0,38422,SWA220,B38M,16694.0,EV_INIT,V02,OUTSIDE,OUTSIDE,33.95667,-118.35739,9.8,262,149.8,-816,0.0,0.0,0.0,0.0,0.0,


Cannot use 'Msn' column to identify flights, as it is linked to different aircraft callsigns on different days. From the above result, 'lKey' column is also not a great identifying column. Two columns may be needed to identify a single flight => 'lKey' and 'AcId' combined may form a good identifier.

In [239]:
fil3_ev.columns

Index(['lKey', 'cKey', 'StartDate', 'StartTime', 'tMidnightSecs', 'tStartSecs',
       'tStopSecs', 'tStart', 'tStop', 'Duration', 'Msn', 'AcId', 'AcType',
       'tEv', 'EvType', 'ObjClass', 'OldName', 'NewName', 'Lat', 'Lon', 'aEv',
       'cEv', 'vEv', 'rEv', 'DTD', 'FlD', 'DDT', 'FlT', 'EvNumInfo',
       'EvCharInfo'],
      dtype='object')

In [240]:
fil3_ev['pKey'] = fil3_ev['lKey'].map(str) + '_' + fil3_ev['AcId']
fil3_ev['mergeKey'] = fil3_ev['Msn'].map(str) + '_' + fil3_ev['StartDate']
fil3_ev.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil3_ev['pKey'] = fil3_ev['lKey'].map(str) + '_' + fil3_ev['AcId']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil3_ev['mergeKey'] = fil3_ev['Msn'].map(str) + '_' + fil3_ev['StartDate']


Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey
1,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.0,0.0,0.0,0.0,0.0,,32475_GTI7108,37185_09/03/2022


In [241]:
fil3_ev['pKey'].value_counts(dropna=False)

38285_O01       8
32705_O04       8
31574_ASA68B    8
31573_O04       8
32558_O91       8
               ..
36979_BT92P     6
32016_N2732T    6
43765_N518AG    6
33662_N84347    6
84841_N721BB    6
Name: pKey, Length: 18743, dtype: int64

In [242]:
fil3_ev.loc[fil3_ev['pKey'] == '36959_O05']

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey


In [243]:
unique_ev = fil3_ev['pKey'].unique().tolist()
print(len(unique_ev))

18743


In [244]:
fil3_ev.loc[fil3_ev['pKey'] == unique_ev[0]]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey
1,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.0,0.0,0.0,0.0,0.0,,32475_GTI7108,37185_09/03/2022
2,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,SUA,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.0,0.0,0.0,0.0,0.0,,32475_GTI7108,37185_09/03/2022
3,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,V02,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.0,0.0,0.0,0.0,0.0,,32475_GTI7108,37185_09/03/2022
4,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27664.0,EV_LND,APT,,LAX,33.94681,-118.43468,3.3,82,172.1,-780,0.0,0.0,0.0,0.0,0.001,06R,32475_GTI7108,37185_09/03/2022
42,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,29058.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.93324,-118.40485,1.1,157,1.8,0,7.138,9.954,2.817,1520.0,0.0,,32475_GTI7108,37185_09/03/2022
43,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,29058.0,EV_STOP,SUA,OUTSIDE,OUTSIDE,33.93324,-118.40485,1.1,157,1.8,0,7.138,9.954,2.817,1520.0,0.0,,32475_GTI7108,37185_09/03/2022
44,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,29058.0,EV_STOP,V02,RAMP-SouthPads@LAX,RAMP-SouthPads@LAX,33.93324,-118.40485,1.1,157,1.8,0,0.018,0.02,0.002,63.0,0.0,,32475_GTI7108,37185_09/03/2022


### EV Data: Drop duplicating records for each flight based on record type
For example, records with the duplicating 'pKey' and 'EvType' should be dropped.

In [245]:
# Drop duplicates of events
fil4_ev = fil3_ev.drop_duplicates(subset = ['pKey', 'EvType'])
print(fil4_ev.shape)

(56840, 32)


In [246]:
fil4_ev.loc[fil4_ev['pKey'] == unique_ev[0]]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey
1,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.0,0.0,0.0,0.0,0.0,,32475_GTI7108,37185_09/03/2022
4,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27664.0,EV_LND,APT,,LAX,33.94681,-118.43468,3.3,82,172.1,-780,0.0,0.0,0.0,0.0,0.001,06R,32475_GTI7108,37185_09/03/2022
42,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,29058.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.93324,-118.40485,1.1,157,1.8,0,7.138,9.954,2.817,1520.0,0.0,,32475_GTI7108,37185_09/03/2022


### EV Data: Propagate the correct time based on tEv.
Cannot use 'StartTime' to get the time of the event recorded, need to combine with StartDate to get the full picture.

In [247]:
# Convert seconds to HH:MM:SS format
fil4_ev['RecordTime'] = pd.to_datetime(fil4_ev['tEv'], unit='s').dt.strftime("%H:%M:%S")
fil4_ev['time_Duration'] = pd.to_datetime(fil4_ev['Duration'], unit='s').dt.strftime("%H:%M:%S")

# Reset index of the dataframe
fil4_ev.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil4_ev['RecordTime'] = pd.to_datetime(fil4_ev['tEv'], unit='s').dt.strftime("%H:%M:%S")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil4_ev['time_Duration'] = pd.to_datetime(fil4_ev['Duration'], unit='s').dt.strftime("%H:%M:%S")


In [248]:
fil4_ev.head(10)

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey,RecordTime,time_Duration
0,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.0,0.0,0.0,0.0,0.0,,32475_GTI7108,37185_09/03/2022,07:38:58,00:25:20
1,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,27664.0,EV_LND,APT,,LAX,33.94681,-118.43468,3.3,82,172.1,-780,0.0,0.0,0.0,0.0,0.001,06R,32475_GTI7108,37185_09/03/2022,07:41:04,00:25:20
2,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1662163000.0,1662191000.0,1662192000.0,27538.0,29058.0,1520.0,37185,GTI7108,B744,29058.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.93324,-118.40485,1.1,157,1.8,0,7.138,9.954,2.817,1520.0,0.0,,32475_GTI7108,37185_09/03/2022,08:04:18,00:25:20
3,32476,LAX+ASDEX_20220903_080324_32476,09/03/2022,07:52:21,1662163000.0,1662192000.0,1662192000.0,28341.0,29004.0,663.0,37186,E59,,28341.0,EV_TOF,APT,,LAX,33.93237,-118.41806,1.1,75,4.2,0,0.0,0.0,0.0,0.0,0.093,,32476_E59,37186_09/03/2022,07:52:21,00:11:03
4,32476,LAX+ASDEX_20220903_080324_32476,09/03/2022,07:52:21,1662163000.0,1662192000.0,1662192000.0,28341.0,29004.0,663.0,37186,E59,,28341.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93237,-118.41806,1.1,75,4.2,0,0.0,0.0,0.0,0.0,0.0,,32476_E59,37186_09/03/2022,07:52:21,00:11:03
5,32476,LAX+ASDEX_20220903_080324_32476,09/03/2022,07:52:21,1662163000.0,1662192000.0,1662192000.0,28341.0,29004.0,663.0,37186,E59,,29004.0,EV_LND,APT,,LAX,33.93707,-118.39239,1.1,239,2.7,0,1.345,1.561,0.216,663.0,0.495,,32476_E59,37186_09/03/2022,08:03:24,00:11:03
6,32476,LAX+ASDEX_20220903_080324_32476,09/03/2022,07:52:21,1662163000.0,1662192000.0,1662192000.0,28341.0,29004.0,663.0,37186,E59,,29004.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.93707,-118.39239,1.1,239,2.7,0,1.345,1.561,0.216,663.0,0.0,,32476_E59,37186_09/03/2022,08:03:24,00:11:03
7,32477,LAX+ASDEX_20220903_080440_32477,09/03/2022,07:55:19,1662163000.0,1662192000.0,1662192000.0,28519.0,29080.0,561.0,37187,SW13,ACFT,28519.0,EV_TOF,APT,,LAX,33.93717,-118.43025,1.1,203,9.7,0,0.0,0.0,0.0,0.0,0.43,,32477_SW13,37187_09/03/2022,07:55:19,00:09:21
8,32477,LAX+ASDEX_20220903_080440_32477,09/03/2022,07:55:19,1662163000.0,1662192000.0,1662192000.0,28519.0,29080.0,561.0,37187,SW13,ACFT,28519.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93717,-118.43025,1.1,203,9.7,0,0.0,0.0,0.0,0.0,0.0,,32477_SW13,37187_09/03/2022,07:55:19,00:09:21
9,32477,LAX+ASDEX_20220903_080440_32477,09/03/2022,07:55:19,1662163000.0,1662192000.0,1662192000.0,28519.0,29080.0,561.0,37187,SW13,ACFT,29080.0,EV_LND,APT,,LAX,33.94746,-118.40014,1.1,0,5.4,0,1.666,2.478,0.812,561.0,0.193,,32477_SW13,37187_09/03/2022,08:04:40,00:09:21


In [249]:
fil4_ev['EvType'].value_counts()

EV_INIT    18743
EV_STOP    18743
EV_LND      9739
EV_TOF      9615
Name: EvType, dtype: int64

In [250]:
# Save the filtered data
fil4_ev.to_csv('Sherlock_data/Filtered_EV.csv')

### EV Data: Partial analysis
Noticed that are many flights with short duration (<30mins)

In [251]:
hf_hr = 30.0 * 60
print(len(unique_ev) - len(fil4_ev[['pKey']].loc[fil4_ev['Duration'] < hf_hr].value_counts()))

626


In [252]:
len(fil4_ev.loc[fil4_ev['Duration'] >= hf_hr]), len(fil4_ev.loc[fil4_ev['Duration'] < hf_hr])

(1962, 54878)

In [253]:
59806/(2241+59806)

0.9638822183183715

In [254]:
fil4_ev.loc[fil4_ev['Duration'] < hf_hr]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey,RecordTime,time_Duration
0,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1.662163e+09,1.662191e+09,1.662192e+09,27538.0,29058.0,1520.0,37185,GTI7108,B744,27538.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93553,-118.54432,20.9,83,161.0,-2280,0.000,0.000,0.000,0.0,0.000,,32475_GTI7108,37185_09/03/2022,07:38:58,00:25:20
1,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1.662163e+09,1.662191e+09,1.662192e+09,27538.0,29058.0,1520.0,37185,GTI7108,B744,27664.0,EV_LND,APT,,LAX,33.94681,-118.43468,3.3,82,172.1,-780,0.000,0.000,0.000,0.0,0.001,06R,32475_GTI7108,37185_09/03/2022,07:41:04,00:25:20
2,32475,LAX+ASDEX_20220903_080418_32475,09/03/2022,07:38:58,1.662163e+09,1.662191e+09,1.662192e+09,27538.0,29058.0,1520.0,37185,GTI7108,B744,29058.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.93324,-118.40485,1.1,157,1.8,0,7.138,9.954,2.817,1520.0,0.000,,32475_GTI7108,37185_09/03/2022,08:04:18,00:25:20
3,32476,LAX+ASDEX_20220903_080324_32476,09/03/2022,07:52:21,1.662163e+09,1.662192e+09,1.662192e+09,28341.0,29004.0,663.0,37186,E59,,28341.0,EV_TOF,APT,,LAX,33.93237,-118.41806,1.1,75,4.2,0,0.000,0.000,0.000,0.0,0.093,,32476_E59,37186_09/03/2022,07:52:21,00:11:03
4,32476,LAX+ASDEX_20220903_080324_32476,09/03/2022,07:52:21,1.662163e+09,1.662192e+09,1.662192e+09,28341.0,29004.0,663.0,37186,E59,,28341.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.93237,-118.41806,1.1,75,4.2,0,0.000,0.000,0.000,0.0,0.000,,32476_E59,37186_09/03/2022,07:52:21,00:11:03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56835,38284,LAX+ASDEX_20220703_075802_38284,07/03/2022,07:39:42,1.656806e+09,1.656834e+09,1.656835e+09,27582.0,28682.0,1100.0,44327,VOI901,A320,28682.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.86147,-118.53201,53.3,223,271.8,3720,7.843,11.734,3.891,1100.0,0.000,,38284_VOI901,44327_07/03/2022,07:58:02,00:18:20
56836,38285,LAX+ASDEX_20220703_075546_38285,07/03/2022,07:47:38,1.656806e+09,1.656834e+09,1.656835e+09,28058.0,28546.0,488.0,44328,O01,,28058.0,EV_TOF,APT,,LAX,33.94105,-118.39002,1.1,271,13.6,0,0.000,0.000,0.000,0.0,0.437,,38285_O01,44328_07/03/2022,07:47:38,00:08:08
56837,38285,LAX+ASDEX_20220703_075546_38285,07/03/2022,07:47:38,1.656806e+09,1.656834e+09,1.656835e+09,28058.0,28546.0,488.0,44328,O01,,28058.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.94105,-118.39002,1.1,271,13.6,0,0.000,0.000,0.000,0.0,0.000,,38285_O01,44328_07/03/2022,07:47:38,00:08:08
56838,38285,LAX+ASDEX_20220703_075546_38285,07/03/2022,07:47:38,1.656806e+09,1.656834e+09,1.656835e+09,28058.0,28546.0,488.0,44328,O01,,28546.0,EV_LND,APT,,LAX,33.93695,-118.42203,1.1,270,19.3,0,1.657,1.681,0.024,488.0,0.086,,38285_O01,44328_07/03/2022,07:55:46,00:08:08


In [255]:
# Check to see if corresponding entry exist in RD
rd.loc[(rd['AcId'] == 'DAL1967') & (rd['Msn'] == 37214)]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdDep,ATAThdArr,tOFF,tON,FacName,equipList


### RD Data: Filter
1. Keep out Operation type A
2. Keep only records with Airline information
3. Keep only records with Aircraft information

In [256]:
rd.head()

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdDep,ATAThdArr,tOFF,tON,FacName,equipList
0,4,LAX+ASDEX_20220806_080024_4,08/06/2022,07:35:42,1659744000.0,1659771000.0,1659773000.0,27342.0,28824.0,1482.0,3094,VIV147,A20N,J,L,6771,D,VIV,AC,LAX,LAX,25R,,,,28715.0,-9999999.0,28673.0,-9999999.0,LAX,F/
1,5,LAX+ASDEX_20220806_080141_5,08/06/2022,07:38:14,1659744000.0,1659771000.0,1659773000.0,27494.0,28901.0,1407.0,3095,DAL554,B763,J,H,6772,D,DAL,AC,LAX,LAX,25R,,,,28796.0,-9999999.0,28758.0,-9999999.0,LAX,C/
2,6,LAX+ASDEX_20220806_080342_6,08/06/2022,07:53:42,1659744000.0,1659772000.0,1659773000.0,28422.0,29022.0,600.0,3096,AAL2188,B738,J,L,2067,D,AAL,AC,LAX,LAX,25R,,,,28896.0,-9999999.0,28856.0,-9999999.0,LAX,F/
3,8,LAX+ASDEX_20220806_080523_8,08/06/2022,07:55:37,1659744000.0,1659773000.0,1659773000.0,28537.0,29123.0,586.0,3098,UNKN,,U,U,1200,I,,UU,LAX,LAX,,LAX,LAX,,-9999999.0,-9999999.0,-9999999.0,-9999999.0,SMR,
4,9,LAX+ASDEX_20220806_081108_9,08/06/2022,07:49:25,1659744000.0,1659772000.0,1659773000.0,28165.0,29468.0,1303.0,3099,UAL2674,B772,J,H,2622,A,UAL,AC,,,,LAX,LAX,06R,-9999999.0,28757.0,-9999999.0,28781.0,SMR,B/


In [257]:
# Filter RD data such that only shows Operation type of Arrival
fil_rd = rd.loc[rd['OT'] == 'A']
print(rd.shape, fil_rd.shape)

(24381, 31) (8120, 31)


In [258]:
fil_rd['Airline'].isnull().sum()

9

In [259]:
# Keep only records with Airline information
fil2_rd = fil_rd[fil_rd['Airline'].notna()]
print(fil2_rd.shape)

(8111, 31)


In [260]:
fil2_rd['Airline'].value_counts()

SKW    1306
DAL    1008
AAL    1000
UAL     886
SWA     610
       ... 
ELX       1
SDE       1
LET       1
JCM       1
GCK       1
Name: Airline, Length: 164, dtype: int64

In [261]:
fil2_rd['AcType'].isnull().sum()

4

In [262]:
fil2_rd.loc[fil2_rd['AcType'].isnull()][['PC', 'WC', 'Airline', 'AcId', 'CT']]

Unnamed: 0,PC,WC,Airline,AcId,CT
4894,U,U,GA.SH,SH7,GA
6132,U,U,GA.SH,SH24,GA
10799,U,U,GA.V,VPBHC,GA
18385,U,U,GA.SH,SH21,GA


In [263]:
# Filter out records with no aircraft type
fil3_rd = fil2_rd[fil2_rd['AcType'].notna()]
print(fil3_rd.shape)

(8107, 31)


In [264]:
fil3_rd['AcType'].value_counts()

E75L    1037
A321     810
B738     755
A320     654
B739     618
        ... 
E190       1
BE9L       1
GA5C       1
C25M       1
GL7T       1
Name: AcType, Length: 95, dtype: int64

In [265]:
# Carrier Type
fil3_rd['CT'].value_counts(dropna=False)

AC    7656
GA     177
AT     150
UU     122
MI       2
Name: CT, dtype: int64

In [266]:
# Filter out unknown carrier type
fil4_rd = fil3_rd.loc[fil3_rd['CT'] != 'UU']
print(fil4_rd.shape)

(7985, 31)


In [267]:
# Weight Class
fil4_rd['WC'].value_counts(dropna=False)

L    5838
H    1590
F     324
S     177
U      56
Name: WC, dtype: int64

In [268]:
# Filter out unknwon weight class
fil5_rd = fil4_rd.loc[fil4_rd['WC'] != 'U']
print(fil5_rd.shape)

(7929, 31)


In [269]:
fil5_rd['WC'].value_counts()

L    5838
H    1590
F     324
S     177
Name: WC, dtype: int64

In [270]:
# Performance category
fil5_rd['PC'].value_counts(dropna=False)

J    7916
T      10
P       3
Name: PC, dtype: int64

In [271]:
def sameval_cols(df):
    """
    Display columns if all values in the column are the same
    """
    nunique = df.nunique()
    cols_to_drop = nunique[nunique == 1].index
    print(cols_to_drop)
    return

In [272]:
sameval_cols(fil5_rd)

Index(['OT', 'TOFRwy', 'Dest', 'EstDest', 'tOFF'], dtype='object')


In [273]:
fil5_rd[['OT', 'Orig', 'EstOrig', 'Dest', 'EstDest', 'ATAThdDep', 'tOFF']]

Unnamed: 0,OT,Orig,EstOrig,Dest,EstDest,ATAThdDep,tOFF
4,A,,,LAX,LAX,-9999999.0,-9999999.0
11,A,,,LAX,LAX,-9999999.0,-9999999.0
12,A,,,LAX,LAX,-9999999.0,-9999999.0
25,A,,,LAX,LAX,-9999999.0,-9999999.0
26,A,,,LAX,LAX,-9999999.0,-9999999.0
...,...,...,...,...,...,...,...
24349,A,,,LAX,LAX,-9999999.0,-9999999.0
24351,A,,,LAX,LAX,-9999999.0,-9999999.0
24361,A,,,LAX,LAX,-9999999.0,-9999999.0
24362,A,,,LAX,LAX,-9999999.0,-9999999.0


In [274]:
# Drop only the following columns: 'ATAThdDep', 'tOFF' as their values indicate it's null
fil5_rd.drop(['ATAThdDep', 'tOFF'], axis=1, inplace=True)
fil5_rd.shape

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil5_rd.drop(['ATAThdDep', 'tOFF'], axis=1, inplace=True)


(7929, 29)

In [275]:
fil5_rd.head()

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdArr,tON,FacName,equipList
4,9,LAX+ASDEX_20220806_081108_9,08/06/2022,07:49:25,1659744000.0,1659772000.0,1659773000.0,28165.0,29468.0,1303.0,3099,UAL2674,B772,J,H,2622,A,UAL,AC,,,,LAX,LAX,06R,28757.0,28781.0,SMR,B/
11,16,LAX+ASDEX_20220806_081943_16,08/06/2022,08:13:52,1659744000.0,1659774000.0,1659774000.0,29632.0,29983.0,351.0,3106,SWA1813,B738,J,L,1437,A,SWA,AC,,,,LAX,LAX,06R,29758.0,29784.0,LAX,F/
12,17,LAX+ASDEX_20220806_082243_17,08/06/2022,08:11:15,1659744000.0,1659773000.0,1659774000.0,29475.0,30163.0,688.0,3107,UAL333,B39M,J,L,3406,A,UAL,AC,,,,LAX,LAX,06R,29587.0,29613.0,LAX,F/
25,32,LAX+ASDEX_20220806_085107_32,08/06/2022,08:41:10,1659744000.0,1659775000.0,1659776000.0,31270.0,31867.0,597.0,3122,DAL657,B763,J,H,1556,A,DAL,AC,,,,LAX,LAX,06R,31401.0,31425.0,LAX,C/
26,33,LAX+ASDEX_20220806_085203_33,08/06/2022,08:46:42,1659744000.0,1659776000.0,1659776000.0,31602.0,31923.0,321.0,3123,SWA493,B737,J,L,2614,A,SWA,AC,,,,LAX,LAX,06R,31730.0,31747.0,SMR,F/


### RD Data: Create identifying key for each flight
Same strategy as in EV data

In [276]:
fil5_rd['Msn'].value_counts()

38597    3
37461    3
37185    3
38242    3
38193    3
        ..
17641    1
17640    1
17631    1
17630    1
44313    1
Name: Msn, Length: 7498, dtype: int64

In [277]:
fil5_rd.loc[fil5_rd['Msn'] == 32507]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdArr,tON,FacName,equipList


Column 'Msn' is also not a good identifying column for RD data. Try 'lKey' column.

In [278]:
fil5_rd['lKey'].value_counts()

32648    4
32771    3
33344    3
32880    3
33276    3
        ..
12767    1
12765    1
12764    1
12761    1
38270    1
Name: lKey, Length: 7334, dtype: int64

In [279]:
fil5_rd.loc[fil5_rd['lKey'] == 34412]

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdArr,tON,FacName,equipList
11641,34412,LAX+ASDEX_20220904_000840_34412,09/03/2022,23:55:50,1662163000.0,1662249000.0,1662250000.0,86150.0,86920.0,770.0,39244,UAL1221,B738,J,L,2764,A,UAL,AC,,,,LAX,LAX,24R,86301.0,86326.0,LAX,F/
13615,34412,LAX+ASDEX_20221204_012117_34412,12/04/2022,01:08:55,1670112000.0,1670116000.0,1670117000.0,4135.0,4877.0,742.0,39078,SKW5341,CRJ2,J,L,3503,A,SKW,AC,,,,LAX,LAX,24R,4306.0,4325.0,LAX,G/


Column 'lKey' alone is also not a good identifier, need to combine 'lKey' with 'AcId' to make 'pKey' as in EV data.

In [280]:
fil5_rd['pKey'] = fil5_rd['lKey'].map(str) + '_' + fil5_rd['AcId']
fil5_rd['mergeKey'] = fil5_rd['Msn'].map(str) + '_' + fil5_rd['StartDate']
fil5_rd.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil5_rd['pKey'] = fil5_rd['lKey'].map(str) + '_' + fil5_rd['AcId']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fil5_rd['mergeKey'] = fil5_rd['Msn'].map(str) + '_' + fil5_rd['StartDate']


Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdArr,tON,FacName,equipList,pKey,mergeKey
4,9,LAX+ASDEX_20220806_081108_9,08/06/2022,07:49:25,1659744000.0,1659772000.0,1659773000.0,28165.0,29468.0,1303.0,3099,UAL2674,B772,J,H,2622,A,UAL,AC,,,,LAX,LAX,06R,28757.0,28781.0,SMR,B/,9_UAL2674,3099_08/06/2022
11,16,LAX+ASDEX_20220806_081943_16,08/06/2022,08:13:52,1659744000.0,1659774000.0,1659774000.0,29632.0,29983.0,351.0,3106,SWA1813,B738,J,L,1437,A,SWA,AC,,,,LAX,LAX,06R,29758.0,29784.0,LAX,F/,16_SWA1813,3106_08/06/2022
12,17,LAX+ASDEX_20220806_082243_17,08/06/2022,08:11:15,1659744000.0,1659773000.0,1659774000.0,29475.0,30163.0,688.0,3107,UAL333,B39M,J,L,3406,A,UAL,AC,,,,LAX,LAX,06R,29587.0,29613.0,LAX,F/,17_UAL333,3107_08/06/2022
25,32,LAX+ASDEX_20220806_085107_32,08/06/2022,08:41:10,1659744000.0,1659775000.0,1659776000.0,31270.0,31867.0,597.0,3122,DAL657,B763,J,H,1556,A,DAL,AC,,,,LAX,LAX,06R,31401.0,31425.0,LAX,C/,32_DAL657,3122_08/06/2022
26,33,LAX+ASDEX_20220806_085203_33,08/06/2022,08:46:42,1659744000.0,1659776000.0,1659776000.0,31602.0,31923.0,321.0,3123,SWA493,B737,J,L,2614,A,SWA,AC,,,,LAX,LAX,06R,31730.0,31747.0,SMR,F/,33_SWA493,3123_08/06/2022


In [281]:
fil5_rd.reset_index(drop=True, inplace=True)

In [282]:
fil5_rd.shape

(7929, 31)

In [283]:
# pKey is unqiue for each record
fil5_rd['pKey'].value_counts(dropna=False)

44715_SKW3692    2
9_UAL2674        1
44760_SKW4861    1
44789_VOI920     1
44786_ACA793     1
                ..
12780_TAI520     1
12779_JBU1069    1
12778_ASA544     1
12776_UAL466     1
38270_SWA1497    1
Name: pKey, Length: 7928, dtype: int64

In [284]:
fil5_rd.to_csv('Sherlock_data/Filtered_RD.csv')

In [285]:
unique_rd = fil5_rd['pKey'].unique().tolist()
print(len(unique_rd))

7928


### Create a dataframe with only the intersected records of EV and RD
Filtered EV is called fil4_ev and filtered RD is called fil5_rd

In [286]:
# Compare unique_ev and unique_rd to see if there are any intersections
len(list(set(unique_ev) & set(unique_rd)))

7455

In [287]:
intersected = list(set(unique_ev) & set(unique_rd))

In [288]:
common_ev = fil4_ev.loc[fil4_ev['pKey'].isin(intersected)]
common_ev.shape

(22375, 34)

In [289]:
common_rd = fil5_rd.loc[fil5_rd['pKey'].isin(intersected)]
common_rd.shape

(7456, 31)

In [290]:
common_ev.sort_values(by=['pKey'], inplace=True)
common_ev.reset_index(drop=True, inplace=True)
common_ev.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  common_ev.sort_values(by=['pKey'], inplace=True)


Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey,RecordTime,time_Duration
0,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,67476.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.94372,-118.41454,1.1,203,6.8,0,7.307,9.9,2.593,832.0,0.0,,1001_AFR070,4151_08/06/2022,18:44:36,00:13:52
1,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,66793.0,EV_LND,APT,,LAX,33.95217,-118.40158,3.1,265,134.4,1140,0.0,0.0,0.0,0.0,0.019,24R,1001_AFR070,4151_08/06/2022,18:33:13,00:13:52
2,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,66644.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.9651,-118.27399,23.8,264,199.4,0,0.0,0.0,0.0,0.0,0.0,,1001_AFR070,4151_08/06/2022,18:30:44,00:13:52
3,1024,LAX+ASDEX_20220806_185140_1024,08/06/2022,18:40:16,1659744000.0,1659811000.0,1659812000.0,67216.0,67900.0,684.0,4176,DAL1966,B739,67900.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.94681,-118.40998,1.1,0,1.6,0,7.109,8.323,1.213,684.0,0.0,,1024_DAL1966,4176_08/06/2022,18:51:40,00:11:24
4,1024,LAX+ASDEX_20220806_185140_1024,08/06/2022,18:40:16,1659744000.0,1659811000.0,1659812000.0,67216.0,67900.0,684.0,4176,DAL1966,B739,67216.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.94874,-118.27099,20.9,265,189.4,-2280,0.0,0.0,0.0,0.0,0.0,,1024_DAL1966,4176_08/06/2022,18:40:16,00:11:24


In [291]:
common_rd.sort_values(by=['pKey'], inplace=True)
common_rd.reset_index(drop=True, inplace=True)
common_rd.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  common_rd.sort_values(by=['pKey'], inplace=True)


Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,PC,WC,Bcn,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdArr,tON,FacName,equipList,pKey,mergeKey
0,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,J,H,2270,A,AFR,AC,,,,LAX,LAX,24R,66793.0,66818.0,MODE_S,B/,1001_AFR070,4151_08/06/2022
1,1024,LAX+ASDEX_20220806_185140_1024,08/06/2022,18:40:16,1659744000.0,1659811000.0,1659812000.0,67216.0,67900.0,684.0,4176,DAL1966,B739,J,L,7211,A,DAL,AC,,,,LAX,LAX,25L,67344.0,67358.0,LAX,F/,1024_DAL1966,4176_08/06/2022
2,1025,LAX+ASDEX_20220806_185428_1025,08/06/2022,18:42:12,1659744000.0,1659811000.0,1659812000.0,67332.0,68068.0,736.0,4177,CKK223,B77L,J,H,1114,A,CKK,AC,,,,LAX,LAX,25L,67465.0,67473.0,SMRADSB,B/,1025_CKK223,4177_08/06/2022
3,1026,LAX+ASDEX_20220806_185336_1026,08/06/2022,18:44:01,1659744000.0,1659811000.0,1659812000.0,67441.0,68016.0,575.0,4178,WJA1510,B738,J,L,3372,A,WJA,AC,,,,LAX,LAX,24R,67594.0,67614.0,LAX,F/,1026_WJA1510,4178_08/06/2022
4,1035,LAX+ASDEX_20220806_185545_1035,08/06/2022,18:26:03,1659744000.0,1659810000.0,1659812000.0,66363.0,68145.0,1782.0,4188,ACA552,B38M,J,L,7043,A,ACA,AC,,,,LAX,LAX,24R,66515.0,66532.0,MODE_SADSB,F/,1035_ACA552,4188_08/06/2022


In [292]:
common_rd.columns

Index(['lKey', 'cKey', 'StartDate', 'StartTime', 'tMidnightSecs', 'tStartSecs',
       'tStopSecs', 'tStart', 'tStop', 'Duration', 'Msn', 'AcId', 'AcType',
       'PC', 'WC', 'Bcn', 'OT', 'Airline', 'CT', 'Orig', 'EstOrig', 'TOFRwy',
       'Dest', 'EstDest', 'LNDRwy', 'ATAThdArr', 'tON', 'FacName', 'equipList',
       'pKey', 'mergeKey'],
      dtype='object')

In [293]:
ev_rd = common_ev.merge(common_rd[['PC', 'WC', 'OT', 'Airline', 'CT', 'Orig', 'EstOrig', 'TOFRwy', 'Dest', 'EstDest', 'LNDRwy', 'ATAThdArr', 'tON', 'pKey']], on='pKey')
ev_rd.shape

(22378, 47)

In [294]:
ev_rd.head()

Unnamed: 0,lKey,cKey,StartDate,StartTime,tMidnightSecs,tStartSecs,tStopSecs,tStart,tStop,Duration,Msn,AcId,AcType,tEv,EvType,ObjClass,OldName,NewName,Lat,Lon,aEv,cEv,vEv,rEv,DTD,FlD,DDT,FlT,EvNumInfo,EvCharInfo,pKey,mergeKey,RecordTime,time_Duration,PC,WC,OT,Airline,CT,Orig,EstOrig,TOFRwy,Dest,EstDest,LNDRwy,ATAThdArr,tON
0,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,67476.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.94372,-118.41454,1.1,203,6.8,0,7.307,9.9,2.593,832.0,0.0,,1001_AFR070,4151_08/06/2022,18:44:36,00:13:52,J,H,A,AFR,AC,,,,LAX,LAX,24R,66793.0,66818.0
1,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,66793.0,EV_LND,APT,,LAX,33.95217,-118.40158,3.1,265,134.4,1140,0.0,0.0,0.0,0.0,0.019,24R,1001_AFR070,4151_08/06/2022,18:33:13,00:13:52,J,H,A,AFR,AC,,,,LAX,LAX,24R,66793.0,66818.0
2,1001,LAX+ASDEX_20220806_184436_1001,08/06/2022,18:30:44,1659744000.0,1659811000.0,1659811000.0,66644.0,67476.0,832.0,4151,AFR070,B772,66644.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.9651,-118.27399,23.8,264,199.4,0,0.0,0.0,0.0,0.0,0.0,,1001_AFR070,4151_08/06/2022,18:30:44,00:13:52,J,H,A,AFR,AC,,,,LAX,LAX,24R,66793.0,66818.0
3,1024,LAX+ASDEX_20220806_185140_1024,08/06/2022,18:40:16,1659744000.0,1659811000.0,1659812000.0,67216.0,67900.0,684.0,4176,DAL1966,B739,67900.0,EV_STOP,CTR,OUTSIDE,OUTSIDE,33.94681,-118.40998,1.1,0,1.6,0,7.109,8.323,1.213,684.0,0.0,,1024_DAL1966,4176_08/06/2022,18:51:40,00:11:24,J,L,A,DAL,AC,,,,LAX,LAX,25L,67344.0,67358.0
4,1024,LAX+ASDEX_20220806_185140_1024,08/06/2022,18:40:16,1659744000.0,1659811000.0,1659812000.0,67216.0,67900.0,684.0,4176,DAL1966,B739,67216.0,EV_INIT,CTR,OUTSIDE,OUTSIDE,33.94874,-118.27099,20.9,265,189.4,-2280,0.0,0.0,0.0,0.0,0.0,,1024_DAL1966,4176_08/06/2022,18:40:16,00:11:24,J,L,A,DAL,AC,,,,LAX,LAX,25L,67344.0,67358.0


In [295]:
ev_rd.columns

Index(['lKey', 'cKey', 'StartDate', 'StartTime', 'tMidnightSecs', 'tStartSecs',
       'tStopSecs', 'tStart', 'tStop', 'Duration', 'Msn', 'AcId', 'AcType',
       'tEv', 'EvType', 'ObjClass', 'OldName', 'NewName', 'Lat', 'Lon', 'aEv',
       'cEv', 'vEv', 'rEv', 'DTD', 'FlD', 'DDT', 'FlT', 'EvNumInfo',
       'EvCharInfo', 'pKey', 'mergeKey', 'RecordTime', 'time_Duration', 'PC',
       'WC', 'OT', 'Airline', 'CT', 'Orig', 'EstOrig', 'TOFRwy', 'Dest',
       'EstDest', 'LNDRwy', 'ATAThdArr', 'tON'],
      dtype='object')

In [296]:
ev_rd.to_csv('Sherlock_data/Merged_Filtered_EV_RD.csv')