Libraries

In [1]:
import pandas as pd
import requests
from datetime import datetime


Bring in data

In [2]:
capital_cities = pd.read_csv('../Data/capital_cities_and_airports.csv')

display(capital_cities.shape)
display(capital_cities.head())

(46, 6)

Unnamed: 0,country,capital_city,capital_latitude,capital_longitude,airport_iata,airport_name
0,Afghanistan,Kabul,34.5167,69.1833,KBL,Hamid Karzai International Airport
1,Armenia,Yerevan,40.1812,44.5136,EVN,Zvartnots International Airport
2,Azerbaijan,Baku,40.3953,49.8622,GYD,Heydar Aliyev International Airport
3,Bahrain,Manama,26.2361,50.5831,BAH,Bahrain International Airport
4,Bangladesh,Dhaka,23.7231,90.4086,DAC,Hazrat Shahjalal International Airport


Helper functions

In [3]:
# format departure datetime
def combine_to_datetime(dataframe, direction):
    # combine the departure columns into a single datetime column
    dataframe[f'{direction}_datetime'] = pd.to_datetime(
        dataframe[f'{direction}_year'].astype(str) + '-' +
        dataframe[f'{direction}_month'].astype(str).str.zfill(2) + '-' +
        dataframe[f'{direction}_day'].astype(str).str.zfill(2) + ' ' +
        dataframe[f'{direction}_hour'].astype(str).str.zfill(2) + ':' +
        dataframe[f'{direction}_minute'].astype(str).str.zfill(2) + ':' +
        dataframe[f'{direction}_second'].astype(str).str.zfill(2)
    )
    
    # drop the original departure columns
    dataframe.drop([f'{direction}_day', f'{direction}_month', f'{direction}_year', f'{direction}_hour', f'{direction}_minute', f'{direction}_second'], axis=1, inplace=True)
    

Get data

In [4]:
# bring in key
with open('../Data/skyscanner_auth.txt', 'r') as file:
    skyscanner_auth = file.read()


In [5]:
# set up api
def get_flight_tables(departure_airport, arrival_airport, skyscanner_auth=skyscanner_auth):
    url = 'https://partners.api.skyscanner.net/apiservices/v3/flights/live/search/create'
    headers = {'x-api-key': skyscanner_auth}

    data = {
        'query': {
            'market': 'US',
            'locale': 'en-US',
            'currency': 'USD',
            'query_legs': [{
                'origin_place_id': {'iata': departure_airport},
                'destination_place_id': {'iata': arrival_airport},
                'date': {'year': 2023, 'month': 5, 'day': 22}
            }],
            'adults': 1,
            'cabin_class': 'CABIN_CLASS_ECONOMY'
        }
    }
    
    response = requests.post(url, headers=headers, json=data)
    
    if response.status_code == 200:
        # store data
        response = response.json()
        return response['content']['results']
    else:
        raise Exception("Status code not 200")

# test
flight_tables = get_flight_tables('NRT', 'TPE')


### Itineraries

In [6]:
def clean_itineraries_table(flight_tables):
    # subset
    itineraries_dict = flight_tables['itineraries']

    # format
    rows = []
    for key, value in itineraries_dict.items():
        for pricing_option in value['pricingOptions']:
            price = pricing_option['price']
            for item in pricing_option['items']:
                item_price = item['price']
                for fare in item['fares']:
                    rows.append({
                        'id': key,
                        'price_amount': price['amount'],
                        'price_unit': price['unit'],
                        'price_update_status': price['updateStatus'],
                        'agent_id': item['agentId'],
                        'deep_link': item['deepLink'],
                        'segment_id': fare['segmentId'],
                        'booking_code': fare['bookingCode'],
                        'fare_basis_code': fare['fareBasisCode'],
                        'transfer_type': pricing_option['transferType'],
                        'option_id': pricing_option['id']
                    })

    # create dataframe
    return pd.DataFrame(rows)


# test
itineraries = clean_itineraries_table(flight_tables)
display(itineraries.head())


Unnamed: 0,id,price_amount,price_unit,price_update_status,agent_id,deep_link,segment_id,booking_code,fare_basis_code,transfer_type,option_id
0,14788-2305220925--32444-0-17075-2305221210,240900,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,cair,https://skyscanner.pxf.io/c/2850210/1103265/13...,14788-17075-2305220925-2305221210--32444,L,LLOA2JN,TRANSFER_TYPE_MANAGED,BWKVIHQfvIVQ
1,14788-2305221220--31757-0-17075-2305221455,187600,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,cust,https://skyscanner.pxf.io/c/2850210/1103265/13...,14788-17075-2305221220-2305221455--31757,,,TRANSFER_TYPE_MANAGED,0tRdY5DIN5dt
2,14788-2305221325--32331-0-17075-2305221605,319000,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,vaya,https://skyscanner.pxf.io/c/2850210/1103265/13...,14788-17075-2305221325-2305221605--32331,,,TRANSFER_TYPE_MANAGED,ncC5AloD9zuN
3,14788-2305221330--31444-0-17075-2305221625,251900,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,cust,https://skyscanner.pxf.io/c/2850210/1103265/13...,14788-17075-2305221330-2305221625--31444,,,TRANSFER_TYPE_MANAGED,GYdwaExVOM_T
4,14788-2305221415--32331-0-17075-2305221655,319000,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,vaya,https://skyscanner.pxf.io/c/2850210/1103265/13...,14788-17075-2305221415-2305221655--32331,,,TRANSFER_TYPE_MANAGED,thqn5VFAi3G7


### Legs

In [7]:
def clean_legs_table(flight_tables):
    # subset
    legs_dict = flight_tables['legs']

    # format
    rows = []
    for key, value in legs_dict.items():
        departure_dt = value['departureDateTime']
        arrival_dt = value['arrivalDateTime']
        for segment_id, marketing_carrier_id, operating_carrier_id in zip(value['segmentIds'], value['marketingCarrierIds'], value['operatingCarrierIds']):
            rows.append({
                'id': key,
                'origin_place_id': value['originPlaceId'],
                'destination_place_id': value['destinationPlaceId'],
                'departure_year': departure_dt['year'],
                'departure_month': departure_dt['month'],
                'departure_day': departure_dt['day'],
                'departure_hour': departure_dt['hour'],
                'departure_minute': departure_dt['minute'],
                'departure_second': departure_dt['second'],
                'arrival_year': arrival_dt['year'],
                'arrival_month': arrival_dt['month'],
                'arrival_day': arrival_dt['day'],
                'arrival_hour': arrival_dt['hour'],
                'arrival_minute': arrival_dt['minute'],
                'arrival_second': arrival_dt['second'],
                'duration_minutes': value['durationInMinutes'],
                'stop_count': value['stopCount'],
                'marketing_carrier_id': marketing_carrier_id,
                'operating_carrier_id': operating_carrier_id,
                'segment_id': segment_id
            })

    # create dataframe
    legs = pd.DataFrame(rows)

    # combine time  columns
    #combine_to_datetime(legs, 'departure')
    #combine_to_datetime(legs, 'arrival')

    # rename id column
    return legs.rename(columns={'id': 'leg_id'})


# test
legs = clean_legs_table(flight_tables)
display(legs.head())


Unnamed: 0,leg_id,origin_place_id,destination_place_id,departure_year,departure_month,departure_day,departure_hour,departure_minute,departure_second,arrival_year,arrival_month,arrival_day,arrival_hour,arrival_minute,arrival_second,duration_minutes,stop_count,marketing_carrier_id,operating_carrier_id,segment_id
0,14788-2305220925--32444-0-17075-2305221210,128668889,128667054,2023,5,22,9,25,0,2023,5,22,12,10,0,225,0,-32444,-32444,14788-17075-2305220925-2305221210--32444
1,14788-2305221220--31757-0-17075-2305221455,128668889,128667054,2023,5,22,12,20,0,2023,5,22,14,55,0,215,0,-31757,-31757,14788-17075-2305221220-2305221455--31757
2,14788-2305221325--32331-0-17075-2305221605,128668889,128667054,2023,5,22,13,25,0,2023,5,22,16,5,0,220,0,-32331,-32331,14788-17075-2305221325-2305221605--32331
3,14788-2305221330--31444-0-17075-2305221625,128668889,128667054,2023,5,22,13,30,0,2023,5,22,16,25,0,235,0,-31444,-31444,14788-17075-2305221330-2305221625--31444
4,14788-2305221415--32331-0-17075-2305221655,128668889,128667054,2023,5,22,14,15,0,2023,5,22,16,55,0,220,0,-32331,-32331,14788-17075-2305221415-2305221655--32331


### Segments

In [8]:
def clean_segments_table(flight_tables):
    # subset
    segments_dict = flight_tables['segments']

    # format
    rows = []
    for key, value in segments_dict.items():
        departure_dt = value['departureDateTime']
        arrival_dt = value['arrivalDateTime']

        rows.append({
            'id': key,
            'origin_place_id': value['originPlaceId'],
            'destination_place_id': value['destinationPlaceId'],
            'departure_year': departure_dt['year'],
            'departure_month': departure_dt['month'],
            'departure_day': departure_dt['day'],
            'departure_hour': departure_dt['hour'],
            'departure_minute': departure_dt['minute'],
            'departure_second': departure_dt['second'],
            'arrival_year': arrival_dt['year'],
            'arrival_month': arrival_dt['month'],
            'arrival_day': arrival_dt['day'],
            'arrival_hour': arrival_dt['hour'],
            'arrival_minute': arrival_dt['minute'],
            'arrival_second': arrival_dt['second'],
            'duration_minutes': value['durationInMinutes'],
            'marketing_flight_number': value['marketingFlightNumber'],
            'marketing_carrier_id': value['marketingCarrierId'],
            'operating_carrier_id': value['operatingCarrierId']
        })


    # create dataframe
    segments = pd.DataFrame(rows)

    # combine time columns
    #combine_to_datetime(segments, 'departure')
    #combine_to_datetime(segments, 'arrival')

    # rename id column
    return segments.rename(columns={'id': 'segment_id'})


# test
segments = clean_segments_table(flight_tables)
display(segments.head())


Unnamed: 0,segment_id,origin_place_id,destination_place_id,departure_year,departure_month,departure_day,departure_hour,departure_minute,departure_second,arrival_year,arrival_month,arrival_day,arrival_hour,arrival_minute,arrival_second,duration_minutes,marketing_flight_number,marketing_carrier_id,operating_carrier_id
0,14788-17075-2305220925-2305221210--32444,128668889,128667054,2023,5,22,9,25,0,2023,5,22,12,10,0,225,107,-32444,-32444
1,14788-17075-2305221220-2305221455--31757,128668889,128667054,2023,5,22,12,20,0,2023,5,22,14,55,0,215,899,-31757,-31757
2,14788-17075-2305221325-2305221605--32331,128668889,128667054,2023,5,22,13,25,0,2023,5,22,16,5,0,220,183,-32331,-32331
3,14788-17075-2305221330-2305221625--31444,128668889,128667054,2023,5,22,13,30,0,2023,5,22,16,25,0,235,201,-31444,-31444
4,14788-17075-2305221415-2305221655--32331,128668889,128667054,2023,5,22,14,15,0,2023,5,22,16,55,0,220,197,-32331,-32331


### Places

In [9]:
def clean_places_table(flight_tables):
    # subset
    places_dict = flight_tables['places']

    # format
    rows = []
    for key, value in places_dict.items():
        rows.append({
            'entity_id': value['entityId'],
            'parent_id': value['parentId'],
            'name': value['name'],
            'place_type': value['type'],
            'iata': value['iata'],
            'coordinates': value['coordinates']
        })


    # create dataframe
    return pd.DataFrame(rows)


# test
places = clean_places_table(flight_tables)
display(places.head())


Unnamed: 0,entity_id,parent_id,name,place_type,iata,coordinates
0,128667054,27547236.0,Taipei Taiwan Taoyuan,PLACE_TYPE_AIRPORT,TPE,
1,128668889,27542089.0,Tokyo Narita,PLACE_TYPE_AIRPORT,NRT,
2,27542089,29475330.0,Tokyo,PLACE_TYPE_CITY,TYO,
3,27547236,29475323.0,Taipei,PLACE_TYPE_CITY,TPE,
4,29475323,,Taiwan,PLACE_TYPE_COUNTRY,,


### Carriers

In [10]:
def clean_carriers_table(flight_tables):
    # subset
    carriers_dict = flight_tables['carriers']

    # format
    rows = []
    for key, value in carriers_dict.items():
        rows.append({
            'carrier_id': key,
            'name': value['name'],
            'alliance_id': value['allianceId'],
            'image_url': value['imageUrl'],
            'iata': value['iata']
        })



    # create dataframe
    return pd.DataFrame(rows)


# test
carriers = clean_carriers_table(flight_tables)
display(carriers.head())

Unnamed: 0,carrier_id,name,alliance_id,image_url,iata
0,-31444,Tigerair Taiwan,,https://logos.skyscnr.com/images/airlines/IT.png,IT
1,-31757,Scoot,-31987.0,https://logos.skyscnr.com/images/airlines/TR.png,TR
2,-31974,Peach,,https://logos.skyscnr.com/images/airlines/03.png,MM
3,-32164,Jetstar Japan,,https://logos.skyscnr.com/images/airlines/GK.png,GK
4,-32166,Jetstar,,https://logos.skyscnr.com/images/airlines/JQ.png,JQ


### Agents

In [11]:
def clean_agents_table(flight_tables):
    # subset
    agents_dict = flight_tables['agents']

    # format
    rows = []
    for key, value in agents_dict.items():
        rating_breakdown = value.get('ratingBreakdown') or {}
        rows.append({
            'agent_id': key,
            'name': value['name'],
            'agent_type': value['type'],
            'image_url': value['imageUrl'],
            'feedback_count': value['feedbackCount'],
            'rating': value['rating'],
            'customer_service': rating_breakdown.get('customerService', None),
            'reliable_prices': rating_breakdown.get('reliablePrices', None),
            'clear_extra_fees': rating_breakdown.get('clearExtraFees', None),
            'ease_of_booking': rating_breakdown.get('easeOfBooking', None),
            'other': rating_breakdown.get('other', None),
            'is_optimised_for_mobile': value['isOptimisedForMobile']
        })

    # create dataframe
    return pd.DataFrame(rows)


# test
agents = clean_agents_table(flight_tables)
display(agents.head())


Unnamed: 0,agent_id,name,agent_type,image_url,feedback_count,rating,customer_service,reliable_prices,clear_extra_fees,ease_of_booking,other,is_optimised_for_mobile
0,anai,ANA (All Nippon Airways),AGENT_TYPE_AIRLINE,https://logos.skyscnr.com/images/websites/anai...,220,3.68,5.0,3.772136,4.716648,3.583236,2.638724,True
1,arus,Mytrip,AGENT_TYPE_TRAVEL_AGENT,https://logos.skyscnr.com/images/websites/arus...,8424,3.25,4.856892,3.538052,4.064076,3.325836,2.217688,True
2,asia,Asiana Airlines,AGENT_TYPE_AIRLINE,https://logos.skyscnr.com/images/websites/asia...,180,4.2,5.0,4.574412,4.361616,4.361616,3.51044,True
3,bcom,Booking.com,AGENT_TYPE_TRAVEL_AGENT,https://logos.skyscnr.com/images/websites/bcom...,4381,3.62,4.983912,3.188784,4.415532,3.8495,3.050716,True
4,bfus,Bravofly,AGENT_TYPE_TRAVEL_AGENT,https://logos.skyscnr.com/images/websites/bfus...,2773,2.62,4.711384,2.962752,3.976468,1.48406,2.339328,True


### Alliances

In [12]:
def clean_alliances_table(flight_tables):
    # subset
    alliances_dict = flight_tables['alliances']

    # format
    rows = []
    for key, value in alliances_dict.items():
        rows.append({
            'alliance_id': key,
            'name': value['name']
        })

    # create dataframe
    return pd.DataFrame(rows)


# test
alliances = clean_alliances_table(flight_tables)
display(alliances.head())



Unnamed: 0,alliance_id,name
0,-31987,Value Alliance
1,-31998,SkyTeam
2,-31999,Star Alliance
3,-32000,OneWorld


## Combine

In [13]:
def create_full_journey_table(departure_airport, arrival_airport, itineraries_table, agents_table, segments_table, carriers_table, places_table):
    # sort lowest to higest price
    trips_df = itineraries_table.sort_values(by="price_amount", ascending=True)
    trips_df['price_amount'] = trips_df['price_amount'].astype(float)

    # drop unecesary columns
    trips_df = trips_df.drop(['deep_link', 'option_id', 'booking_code', 'fare_basis_code', 'price_update_status'], axis=1)


    # bring in the agent name
    trips_df = pd.merge(trips_df, agents_table[['agent_id', 'name']], on='agent_id')
    trips_df = trips_df.rename(columns={'name': 'agent_name'})
    trips_df = trips_df.drop('agent_id', axis=1)

    # bring in segment info
    trips_df = pd.merge(trips_df, segments_table, on='segment_id')

    # get carrier names
    trips_df = pd.merge(trips_df, carriers_table[['carrier_id', 'name']], left_on='marketing_carrier_id', right_on='carrier_id')
    trips_df = trips_df.rename(columns={'name': 'marketing_carrier_name'})
    trips_df = pd.merge(trips_df, carriers_table[['carrier_id', 'name']], left_on='operating_carrier_id', right_on='carrier_id')
    trips_df = trips_df.rename(columns={'name': 'operating_carrier_name'})
    trips_df = trips_df.drop(['marketing_carrier_id', 'operating_carrier_id', 'carrier_id_x', 'carrier_id_y'], axis=1)

    # bring in places
    trips_df = pd.merge(trips_df, places_table[['entity_id', 'name', 'iata']], left_on='origin_place_id', right_on='entity_id')
    trips_df = trips_df.rename(columns={'name': 'origin_place_name', 'iata': 'origin_place_iata' })
    trips_df = pd.merge(trips_df, places_table[['entity_id', 'name', 'iata']], left_on='destination_place_id', right_on='entity_id')
    trips_df = trips_df.rename(columns={'name': 'destination_place_name', 'iata': 'destination_place_iata'})
    trips_df = trips_df.drop(['origin_place_id', 'destination_place_id', 'entity_id_x', 'entity_id_y'], axis=1)

    # results don't only show airports requested, remove unrequested airports
    trips_df = trips_df[trips_df['origin_place_iata'] == departure_airport]
    return trips_df[trips_df['destination_place_iata'] == arrival_airport]


# test
trips_df = create_full_journey_table('NRT', 'TPE', itineraries, agents, segments, carriers, places)
display(trips_df.head(100))


Unnamed: 0,id,price_amount,price_unit,segment_id,transfer_type,agent_name,departure_year,departure_month,departure_day,departure_hour,...,arrival_minute,arrival_second,duration_minutes,marketing_flight_number,marketing_carrier_name,operating_carrier_name,origin_place_name,origin_place_iata,destination_place_name,destination_place_iata
0,14788-2305222215--31974-0-17075-2305230110,173260.0,PRICE_UNIT_MILLI,14788-17075-2305222215-2305230110--31974,TRANSFER_TYPE_MANAGED,eDreams,2023,5,22,22,...,10,0,235,627,Peach,Peach,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
1,14788-2305221220--31757-0-17075-2305221455,187600.0,PRICE_UNIT_MILLI,14788-17075-2305221220-2305221455--31757,TRANSFER_TYPE_MANAGED,Trip.com,2023,5,22,12,...,55,0,215,899,Scoot,Scoot,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
2,14788-2305221330--31444-0-17075-2305221625,251900.0,PRICE_UNIT_MILLI,14788-17075-2305221330-2305221625--31444,TRANSFER_TYPE_MANAGED,Trip.com,2023,5,22,13,...,25,0,235,201,Tigerair Taiwan,Tigerair Taiwan,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
3,14788-2305221940--31444-0-17075-2305222235,297900.0,PRICE_UNIT_MILLI,14788-17075-2305221940-2305222235--31444,TRANSFER_TYPE_MANAGED,Trip.com,2023,5,22,19,...,35,0,235,203,Tigerair Taiwan,Tigerair Taiwan,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
4,14788-2305222250--32166-0-17075-2305230140,187860.0,PRICE_UNIT_MILLI,14788-17075-2305222250-2305230140--32166,TRANSFER_TYPE_MANAGED,Jetstar,2023,5,22,22,...,40,0,230,11,Jetstar,Jetstar Japan,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
5,14788-2305220925--32444-0-17075-2305221210,240900.0,PRICE_UNIT_MILLI,14788-17075-2305220925-2305221210--32444,TRANSFER_TYPE_MANAGED,China Airlines,2023,5,22,9,...,10,0,225,107,China Airlines,China Airlines,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
6,14788-2305221430--32444-0-17075-2305221720,296990.0,PRICE_UNIT_MILLI,14788-17075-2305221430-2305221720--32444,TRANSFER_TYPE_MANAGED,Booking.com,2023,5,22,14,...,20,0,230,101,China Airlines,China Airlines,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
7,14788-2305221540--32456-0-17075-2305221835,311200.0,PRICE_UNIT_MILLI,14788-17075-2305221540-2305221835--32456,TRANSFER_TYPE_MANAGED,SmartFares,2023,5,22,15,...,35,0,235,451,Cathay Pacific,Cathay Pacific,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
8,14788-2305221325--32331-0-17075-2305221605,319000.0,PRICE_UNIT_MILLI,14788-17075-2305221325-2305221605--32331,TRANSFER_TYPE_MANAGED,BudgetAir,2023,5,22,13,...,5,0,220,183,EVA Air,EVA Air,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
9,14788-2305221415--32331-0-17075-2305221655,319000.0,PRICE_UNIT_MILLI,14788-17075-2305221415-2305221655--32331,TRANSFER_TYPE_MANAGED,BudgetAir,2023,5,22,14,...,55,0,220,197,EVA Air,EVA Air,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE


## Single function

In [14]:
# set departure and arrival airports
departure_airport = 'NRT'
arrival_airport = 'TPE'

def get_flight_data(departure_airport, arrival_airport):

    flight_tables = get_flight_tables(departure_airport, arrival_airport)

    itineraries_table = clean_itineraries_table(flight_tables)
    agents_table = clean_agents_table(flight_tables)
    segments_table = clean_segments_table(flight_tables)
    carriers_table = clean_carriers_table(flight_tables)
    places_table =clean_places_table(flight_tables)

    return create_full_journey_table(departure_airport, arrival_airport, itineraries_table, agents_table, segments_table, carriers_table, places_table)

get_flight_data(departure_airport, arrival_airport)

Unnamed: 0,id,price_amount,price_unit,segment_id,transfer_type,agent_name,departure_year,departure_month,departure_day,departure_hour,...,arrival_minute,arrival_second,duration_minutes,marketing_flight_number,marketing_carrier_name,operating_carrier_name,origin_place_name,origin_place_iata,destination_place_name,destination_place_iata
0,14788-2305222215--31974-0-17075-2305230110,173260.0,PRICE_UNIT_MILLI,14788-17075-2305222215-2305230110--31974,TRANSFER_TYPE_MANAGED,eDreams,2023,5,22,22,...,10,0,235,627,Peach,Peach,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
1,14788-2305221220--31757-0-17075-2305221455,187600.0,PRICE_UNIT_MILLI,14788-17075-2305221220-2305221455--31757,TRANSFER_TYPE_MANAGED,Trip.com,2023,5,22,12,...,55,0,215,899,Scoot,Scoot,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
2,14788-2305221330--31444-0-17075-2305221625,251900.0,PRICE_UNIT_MILLI,14788-17075-2305221330-2305221625--31444,TRANSFER_TYPE_MANAGED,Trip.com,2023,5,22,13,...,25,0,235,201,Tigerair Taiwan,Tigerair Taiwan,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
3,14788-2305221940--31444-0-17075-2305222235,297900.0,PRICE_UNIT_MILLI,14788-17075-2305221940-2305222235--31444,TRANSFER_TYPE_MANAGED,Trip.com,2023,5,22,19,...,35,0,235,203,Tigerair Taiwan,Tigerair Taiwan,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
4,14788-2305222250--32166-0-17075-2305230140,187860.0,PRICE_UNIT_MILLI,14788-17075-2305222250-2305230140--32166,TRANSFER_TYPE_MANAGED,Jetstar,2023,5,22,22,...,40,0,230,11,Jetstar,Jetstar Japan,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
5,14788-2305220925--32444-0-17075-2305221210,241800.0,PRICE_UNIT_MILLI,14788-17075-2305220925-2305221210--32444,TRANSFER_TYPE_MANAGED,China Airlines,2023,5,22,9,...,10,0,225,107,China Airlines,China Airlines,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
6,14788-2305221430--32444-0-17075-2305221720,296990.0,PRICE_UNIT_MILLI,14788-17075-2305221430-2305221720--32444,TRANSFER_TYPE_MANAGED,Booking.com,2023,5,22,14,...,20,0,230,101,China Airlines,China Airlines,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
7,14788-2305221540--32456-0-17075-2305221835,311200.0,PRICE_UNIT_MILLI,14788-17075-2305221540-2305221835--32456,TRANSFER_TYPE_MANAGED,SmartFares,2023,5,22,15,...,35,0,235,451,Cathay Pacific,Cathay Pacific,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
8,14788-2305221325--32331-0-17075-2305221605,319000.0,PRICE_UNIT_MILLI,14788-17075-2305221325-2305221605--32331,TRANSFER_TYPE_MANAGED,BudgetAir,2023,5,22,13,...,5,0,220,183,EVA Air,EVA Air,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE
9,14788-2305221415--32331-0-17075-2305221655,319000.0,PRICE_UNIT_MILLI,14788-17075-2305221415-2305221655--32331,TRANSFER_TYPE_MANAGED,BudgetAir,2023,5,22,14,...,55,0,220,197,EVA Air,EVA Air,Tokyo Narita,NRT,Taipei Taiwan Taoyuan,TPE


## Loop

In [22]:
iata_codes_list = capital_cities['airport_iata'].tolist()
iata_codes_list[:5]

['KBL', 'EVN', 'GYD', 'BAH', 'DAC']

In [23]:
# get airport pairs without departing and arriving at same place
airport_pairs = [(iata_codes_list[i], iata_codes_list[j]) for i in range(len(iata_codes_list)) for j in range(len(iata_codes_list)) if i != j]
display(len(iata_codes_list))
display(len(airport_pairs))


46

2070

In [27]:
# use shorter if needed
if True == False:
    airport_pairs_for_run = airport_pairs[-5:]
    len(airport_pairs_for_run)
    display(airport_pairs_for_run)
else:
    airport_pairs_for_run = airport_pairs
    len(airport_pairs_for_run)
    display(airport_pairs_for_run[:5])



[('KBL', 'EVN'),
 ('KBL', 'GYD'),
 ('KBL', 'BAH'),
 ('KBL', 'DAC'),
 ('KBL', 'PBH')]

In [29]:
# init empty list
dataframes = []

# Loop through the airport pairs and fetch flight data for each pair
for departure, arrival in airport_pairs_for_run:
    print(f'departure: {departure}, arrival: {arrival}')
    df = get_flight_data(departure, arrival)
    dataframes.append(df)

# Concatenate all dataframes into a single dataframe
combined_dataframe = pd.concat(dataframes, ignore_index=True)
combine_to_datetime(combined_dataframe, 'arrival')
combine_to_datetime(combined_dataframe, 'departure')

# Now, combined_dataframe contains aggregated flight data for all airport pairs
display(combined_dataframe)

# save
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
combined_dataframe.to_csv(f'../Data/FlightAPIData/{timestamp}_flight_api_data.csv', index=False)


departure: KBL, arrival: EVN


KeyError: 'price_amount'