In [1]:
import pandas as pd
import numpy as np

# These files use \N as a missing value indicator.  When reading the CSVs, we will tell
# it to use that value as missing or NA.  The double backslash is required because
# otherwise it will interpret \N as a carriage return. 

# Read in the airports data.
airports = pd.read_csv("data/airports.dat", header=None, na_values='\\N')
airports.columns = ["ID", "NAME", "CITY", "COUNTRY", "IATA", "ICAO", "LATITUDE", "LONGITUDE", "ALTITUDE", "TIMEZONE", "DST", "TZ", "TYPE", "SOURCE"]

# Read in the airlines data.
airlines = pd.read_csv("data/airlines.dat", header=None, na_values='\\N')
airlines.columns = ["ID", "NAME", "ALIAS", "IATA", "ICAO", "CALLSIGN", "COUNTRY", "ACTIVE"]

# Read in the routes data.
routes = pd.read_csv("data/routes.dat", header=None, na_values='\\N')
routes.columns = ["AIRLINE", "AIRLINE_ID", "SOURCE", "SOURCE_ID", "DEST", "DEST_ID", "CODESHARE", "STOPS", "EQUIPMENT"]

# Read in the planes data.
planes = pd.read_csv("data/planes.dat", header=None, na_values='\\N')
planes.columns = ["NAME", "IATA", "ICAO"]

# Read in the countries data.
countries = pd.read_csv("data/countries.dat", header=None, na_values='\\N')
countries.columns = ["NAME", "ISO", "DAFIF"]

In [None]:
# find all those destinations served from LEX
firstdestination = routes[routes['SOURCE'].str.contains("LEX")].drop_duplicates("DEST")["DEST"].unique()
# find all those destinations served from firstdestination
seconddestination = [(routes[routes["SOURCE"].str.contains(x) & (~(routes['DEST'].str.contains(x)))].drop_duplicates("DEST")["DEST"].unique()) for x in firstdestination]

In [None]:
[ x for x in firstdestination if x not in sUnique ]

In [27]:
# routes[routes['SOURCE'].str.contains("LEX") & routes['DEST'].str.contains("ATL")]

In [28]:
# source = ["LEX",]

In [29]:
# find all those destinations served from LEX
firstdestination = routes[routes['SOURCE'].str.contains("LEX")].drop_duplicates("DEST")["DEST"].unique()
firstdestination

In [40]:
# find all those destinations served from firstdestination
seconddestination = [(routes[routes["SOURCE"].str.contains(x) & (~(routes['DEST'].str.contains(x)))].drop_duplicates("DEST")["DEST"].unique()) for x in firstdestination]
seconddestination

[array(['LWB', 'MCN', 'MEI', 'MSL', 'PIB', 'TUP', 'AZO', 'CHA', 'CID',
        'CRW', 'CVG', 'EVV', 'FWA', 'GSO', 'GSP', 'LAN', 'LEX', 'MBS',
        'MSP', 'ROA', 'SYR', 'TYS', 'XNA', 'CLT', 'DFW', 'LHR', 'MIA',
        'ORD', 'PHL', 'PHX', 'DEN', 'YYZ', 'ABE', 'ABQ', 'ABY', 'AEX',
        'AGS', 'ALB', 'ATW', 'AUS', 'AVL', 'AVP', 'BDL', 'BHM', 'BMI',
        'BNA', 'BOS', 'BQK', 'BRU', 'BTR', 'BTV', 'BUF', 'BWI', 'CAE',
        'CAK', 'CDG', 'CHO', 'CHS', 'CLE', 'CMH', 'COS', 'CSG', 'CUN',
        'CZM', 'DAB', 'DAL', 'DAY', 'DCA', 'DHN', 'DSM', 'DTW', 'DUB',
        'DUS', 'ECP', 'ELP', 'EWN', 'EWR', 'EYW', 'FAY', 'FCO', 'FLL',
        'FNT', 'FRA', 'FSM', 'GDL', 'GNV', 'GPT', 'GRB', 'GRK', 'GRR',
        'GTR', 'HPN', 'HSV', 'IAD', 'IAH', 'ICT', 'ILM', 'IND', 'JAN',
        'JAX', 'JFK', 'LAS', 'LAX', 'LFT', 'LGA', 'LIT', 'MAD', 'MAN',
        'MCI', 'MCO', 'MDT', 'MDW', 'MEM', 'MEX', 'MGM', 'MHT', 'MKE',
        'MLB', 'MLI', 'MLU', 'MOB', 'MSN', 'MSY', 'MTY', 'MUC', 'MYR',
      

In [30]:
seconddestination = [(routes[routes["SOURCE"].str.contains(x)].drop_duplicates("DEST")["DEST"].unique()) for x in firstdestination]

In [32]:
two_transfers_4m_lex = [(x,y,z) for x in source for y in firstdestination for z in seconddestination]

In [33]:
two_transfers_4m_lex

[('LEX',
  'ATL',
  array(['LWB', 'MCN', 'MEI', 'MSL', 'PIB', 'TUP', 'AZO', 'CHA', 'CID',
         'CRW', 'CVG', 'EVV', 'FWA', 'GSO', 'GSP', 'LAN', 'LEX', 'MBS',
         'MSP', 'ROA', 'SYR', 'TYS', 'XNA', 'CLT', 'DFW', 'LHR', 'MIA',
         'ORD', 'PHL', 'PHX', 'DEN', 'YYZ', 'ABE', 'ABQ', 'ABY', 'AEX',
         'AGS', 'ALB', 'ATW', 'AUS', 'AVL', 'AVP', 'BDL', 'BHM', 'BMI',
         'BNA', 'BOS', 'BQK', 'BRU', 'BTR', 'BTV', 'BUF', 'BWI', 'CAE',
         'CAK', 'CDG', 'CHO', 'CHS', 'CLE', 'CMH', 'COS', 'CSG', 'CUN',
         'CZM', 'DAB', 'DAL', 'DAY', 'DCA', 'DHN', 'DSM', 'DTW', 'DUB',
         'DUS', 'ECP', 'ELP', 'EWN', 'EWR', 'EYW', 'FAY', 'FCO', 'FLL',
         'FNT', 'FRA', 'FSM', 'GDL', 'GNV', 'GPT', 'GRB', 'GRK', 'GRR',
         'GTR', 'HPN', 'HSV', 'IAD', 'IAH', 'ICT', 'ILM', 'IND', 'JAN',
         'JAX', 'JFK', 'LAS', 'LAX', 'LFT', 'LGA', 'LIT', 'MAD', 'MAN',
         'MCI', 'MCO', 'MDT', 'MDW', 'MEM', 'MEX', 'MGM', 'MHT', 'MKE',
         'MLB', 'MLI', 'MLU', 'MOB', 'MSN', 'M

In [35]:
two_transfers_4m_lex = [(x,y,z) for x in source for y in firstdestination for z in seconddestination if (x!=y or y!=z or z!=x)]
two_transfers_4m_lex

[('LEX',
  'ATL',
  array(['LWB', 'MCN', 'MEI', 'MSL', 'PIB', 'TUP', 'AZO', 'CHA', 'CID',
         'CRW', 'CVG', 'EVV', 'FWA', 'GSO', 'GSP', 'LAN', 'LEX', 'MBS',
         'MSP', 'ROA', 'SYR', 'TYS', 'XNA', 'CLT', 'DFW', 'LHR', 'MIA',
         'ORD', 'PHL', 'PHX', 'DEN', 'YYZ', 'ABE', 'ABQ', 'ABY', 'AEX',
         'AGS', 'ALB', 'ATW', 'AUS', 'AVL', 'AVP', 'BDL', 'BHM', 'BMI',
         'BNA', 'BOS', 'BQK', 'BRU', 'BTR', 'BTV', 'BUF', 'BWI', 'CAE',
         'CAK', 'CDG', 'CHO', 'CHS', 'CLE', 'CMH', 'COS', 'CSG', 'CUN',
         'CZM', 'DAB', 'DAL', 'DAY', 'DCA', 'DHN', 'DSM', 'DTW', 'DUB',
         'DUS', 'ECP', 'ELP', 'EWN', 'EWR', 'EYW', 'FAY', 'FCO', 'FLL',
         'FNT', 'FRA', 'FSM', 'GDL', 'GNV', 'GPT', 'GRB', 'GRK', 'GRR',
         'GTR', 'HPN', 'HSV', 'IAD', 'IAH', 'ICT', 'ILM', 'IND', 'JAN',
         'JAX', 'JFK', 'LAS', 'LAX', 'LFT', 'LGA', 'LIT', 'MAD', 'MAN',
         'MCI', 'MCO', 'MDT', 'MDW', 'MEM', 'MEX', 'MGM', 'MHT', 'MKE',
         'MLB', 'MLI', 'MLU', 'MOB', 'MSN', 'M