Libraries

In [1]:
import pandas as pd
import requests


Helper functions

In [2]:
# format departure datetime
def combine_to_datetime(dataframe, direction):
    # combine the departure columns into a single datetime column
    dataframe[f'{direction}_datetime'] = pd.to_datetime(
        dataframe[f'{direction}_year'].astype(str) + '-' +
        dataframe[f'{direction}_month'].astype(str).str.zfill(2) + '-' +
        dataframe[f'{direction}_day'].astype(str).str.zfill(2) + ' ' +
        dataframe[f'{direction}_hour'].astype(str).str.zfill(2) + ':' +
        dataframe[f'{direction}_minute'].astype(str).str.zfill(2) + ':' +
        dataframe[f'{direction}_second'].astype(str).str.zfill(2)
    )
    
    # drop the original departure columns
    dataframe.drop([f'{direction}_day', f'{direction}_month', f'{direction}_year', f'{direction}_hour', f'{direction}_minute', f'{direction}_second'], axis=1, inplace=True)
    

Get data

In [3]:
# brin in key
with open('../Data/skyscanner_auth.txt', 'r') as file:
    skyscanner_auth = file.read()


In [4]:
# set up api
url = 'https://partners.api.skyscanner.net/apiservices/v3/flights/live/search/create'
headers = {'x-api-key': skyscanner_auth}

# set params
data = {
    'query': {
        'market': 'UK',
        'locale': 'en-GB',
        'currency': 'GBP',
        'query_legs': [{
            'origin_place_id': {'iata': 'HND'},
            'destination_place_id': {'iata': 'TPE'},
            'date': {'year': 2023, 'month': 12, 'day': 22}
        }],
        'adults': 1,
        'cabin_class': 'CABIN_CLASS_ECONOMY'
    }
}

# query
response = requests.post(url, headers=headers, json=data)

#check status
print(response.status_code)

# store data
response_store = response.json()
response_store = response_store['content']['results']


200


### Itineraries

In [6]:
# subset
itineraries_dict = response_store['itineraries']

# format
rows = []
for key, value in itineraries_dict.items():
    for pricing_option in value['pricingOptions']:
        price = pricing_option['price']
        for item in pricing_option['items']:
            item_price = item['price']
            for fare in item['fares']:
                rows.append({
                    'id': key,
                    'price_amount': price['amount'],
                    'price_unit': price['unit'],
                    'price_update_status': price['updateStatus'],
                    'agent_id': item['agentId'],
                    'deep_link': item['deepLink'],
                    'segment_id': fare['segmentId'],
                    'booking_code': fare['bookingCode'],
                    'fare_basis_code': fare['fareBasisCode'],
                    'transfer_type': pricing_option['transferType'],
                    'option_id': pricing_option['id']
                })

# create dataframe
itineraries = pd.DataFrame(rows)

# print
display(itineraries.head(5))

Unnamed: 0,id,price_amount,price_unit,price_update_status,agent_id,deep_link,segment_id,booking_code,fare_basis_code,transfer_type,option_id
0,"12234-2312220635--32571,-32444-1-17075-2312221235",302000,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,ctuk,https://skyscanner.pxf.io/c/2850210/1103265/13...,12234-14964-2312220635-2312220920--32571,,,TRANSFER_TYPE_PROTECTED_SELF_TRANSFER,WsfWxW9kecm0
1,"12234-2312220635--32571,-32444-1-17075-2312221235",302000,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,ctuk,https://skyscanner.pxf.io/c/2850210/1103265/13...,14964-17075-2312221155-2312221235--32444,,,TRANSFER_TYPE_PROTECTED_SELF_TRANSFER,WsfWxW9kecm0
2,12234-2312220755--32184-0-17122-2312221055,443900,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,ctuk,https://skyscanner.pxf.io/c/2850210/1103265/13...,12234-17122-2312220755-2312221055--32184,,,TRANSFER_TYPE_MANAGED,8Wkg-sM1Off5
3,12234-2312220755--32444-0-17122-2312221055,223000,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,s1uk,https://skyscanner.pxf.io/c/2850210/1103265/13...,12234-17122-2312220755-2312221055--32444,,,TRANSFER_TYPE_MANAGED,7xL1BtQ_W_Bb
4,"12234-2312220830--32571,-32331-1-17075-2312221350",355000,PRICE_UNIT_MILLI,PRICE_UPDATE_STATUS_UNSPECIFIED,s1uk,https://skyscanner.pxf.io/c/2850210/1103265/13...,12234-11666-2312220830-2312221020--32571,,,TRANSFER_TYPE_MANAGED,NeMHOGINC7Ts


### legs

In [7]:
# subset
legs_dict = response_store['legs']

# format
rows = []
for key, value in legs_dict.items():
    departure_dt = value['departureDateTime']
    arrival_dt = value['arrivalDateTime']
    for segment_id, marketing_carrier_id, operating_carrier_id in zip(value['segmentIds'], value['marketingCarrierIds'], value['operatingCarrierIds']):
        rows.append({
            'id': key,
            'origin_place_id': value['originPlaceId'],
            'destination_place_id': value['destinationPlaceId'],
            'departure_year': departure_dt['year'],
            'departure_month': departure_dt['month'],
            'departure_day': departure_dt['day'],
            'departure_hour': departure_dt['hour'],
            'departure_minute': departure_dt['minute'],
            'departure_second': departure_dt['second'],
            'arrival_year': arrival_dt['year'],
            'arrival_month': arrival_dt['month'],
            'arrival_day': arrival_dt['day'],
            'arrival_hour': arrival_dt['hour'],
            'arrival_minute': arrival_dt['minute'],
            'arrival_second': arrival_dt['second'],
            'duration_minutes': value['durationInMinutes'],
            'stop_count': value['stopCount'],
            'marketing_carrier_id': marketing_carrier_id,
            'operating_carrier_id': operating_carrier_id,
            'segment_id': segment_id
        })

# create dataframe
legs = pd.DataFrame(rows)



combine_to_datetime(legs, 'departure')
combine_to_datetime(legs, 'arrival')



# print
display(legs.head(5))

Unnamed: 0,id,origin_place_id,destination_place_id,duration_minutes,stop_count,marketing_carrier_id,operating_carrier_id,segment_id,departure_datetime,arrival_datetime
0,"12234-2312220635--32571,-32444-1-17075-2312221235",128667143,128667054,420,1,-32571,-32571,12234-14964-2312220635-2312220920--32571,2023-12-22 06:35:00,2023-12-22 12:35:00
1,"12234-2312220635--32571,-32444-1-17075-2312221235",128667143,128667054,420,1,-32444,-32444,14964-17075-2312221155-2312221235--32444,2023-12-22 06:35:00,2023-12-22 12:35:00
2,12234-2312220755--32184-0-17122-2312221055,128667143,104120388,240,0,-32184,-32444,12234-17122-2312220755-2312221055--32184,2023-12-22 07:55:00,2023-12-22 10:55:00
3,12234-2312220755--32444-0-17122-2312221055,128667143,104120388,240,0,-32444,-32444,12234-17122-2312220755-2312221055--32444,2023-12-22 07:55:00,2023-12-22 10:55:00
4,"12234-2312220830--32571,-32331-1-17075-2312221350",128667143,128667054,380,1,-32571,-32571,12234-11666-2312220830-2312221020--32571,2023-12-22 08:30:00,2023-12-22 13:50:00


### Segments

In [8]:
# subset
segments_dict = response_store['segments']

# format
rows = []
for key, value in segments_dict.items():
    departure_dt = value['departureDateTime']
    arrival_dt = value['arrivalDateTime']

    rows.append({
        'id': key,
        'origin_place_id': value['originPlaceId'],
        'destination_place_id': value['destinationPlaceId'],
        'departure_year': departure_dt['year'],
        'departure_month': departure_dt['month'],
        'departure_day': departure_dt['day'],
        'departure_hour': departure_dt['hour'],
        'departure_minute': departure_dt['minute'],
        'departure_second': departure_dt['second'],
        'arrival_year': arrival_dt['year'],
        'arrival_month': arrival_dt['month'],
        'arrival_day': arrival_dt['day'],
        'arrival_hour': arrival_dt['hour'],
        'arrival_minute': arrival_dt['minute'],
        'arrival_second': arrival_dt['second'],
        'duration_minutes': value['durationInMinutes'],
        'marketing_flight_number': value['marketingFlightNumber'],
        'marketing_carrier_id': value['marketingCarrierId'],
        'operating_carrier_id': value['operatingCarrierId']
    })


# create dataframe
segments = pd.DataFrame(rows)


combine_to_datetime(segments, 'departure')
combine_to_datetime(segments, 'arrival')


# print
display(segments.head(5))

Unnamed: 0,id,origin_place_id,destination_place_id,duration_minutes,marketing_flight_number,marketing_carrier_id,operating_carrier_id,departure_datetime,arrival_datetime
0,11666-17075-2312221215-2312221350--32331,128667957,128667054,155,105,-32331,-32331,2023-12-22 12:15:00,2023-12-22 13:50:00
1,12234-11666-2312220830-2312221020--32571,128667143,128667957,110,243,-32571,-32571,2023-12-22 08:30:00,2023-12-22 10:20:00
2,12234-14964-2312220635-2312220920--32571,128667143,128668904,165,993,-32571,-32571,2023-12-22 06:35:00,2023-12-22 09:20:00
3,12234-17122-2312220755-2312221055--32184,128667143,104120388,240,5041,-32184,-32444,2023-12-22 07:55:00,2023-12-22 10:55:00
4,12234-17122-2312220755-2312221055--32444,128667143,104120388,240,223,-32444,-32444,2023-12-22 07:55:00,2023-12-22 10:55:00


### places

In [9]:
# subset
places_dict = response_store['places']

# format
rows = []
for key, value in places_dict.items():
    rows.append({
        'entity_id': value['entityId'],
        'parent_id': value['parentId'],
        'name': value['name'],
        'place_type': value['type'],
        'iata': value['iata'],
        'coordinates': value['coordinates']
    })


# create dataframe
places = pd.DataFrame(rows)

# print
display(places.head(5))

Unnamed: 0,entity_id,parent_id,name,place_type,iata,coordinates
0,104120388,27547236,Taipei Sung Shan,PLACE_TYPE_AIRPORT,TSA,
1,128667054,27547236,Taipei Taiwan Taoyuan,PLACE_TYPE_AIRPORT,TPE,
2,128667143,27542089,Tokyo Haneda,PLACE_TYPE_AIRPORT,HND,
3,128667957,27541740,Fukuoka,PLACE_TYPE_AIRPORT,FUK,
4,128668904,27540768,Okinawa Naha,PLACE_TYPE_AIRPORT,OKA,


### carriers

In [10]:
# subset
carriers_dict = response_store['carriers']

# format
rows = []
for key, value in carriers_dict.items():
    rows.append({
        'carrier_id': key,
        'name': value['name'],
        'alliance_id': value['allianceId'],
        'image_url': value['imageUrl'],
        'iata': value['iata']
    })



# create dataframe
carriers = pd.DataFrame(rows)

# print
display(carriers.head(5))

Unnamed: 0,carrier_id,name,alliance_id,image_url,iata
0,-32184,Japan Airlines,-32000,https://logos.skyscnr.com/images/airlines/JL.png,JL
1,-32331,EVA Air,-31999,https://logos.skyscnr.com/images/airlines/BR.png,BR
2,-32444,China Airlines,-31998,https://logos.skyscnr.com/images/airlines/CI.png,CI
3,-32571,ANA (All Nippon Airways),-31999,https://logos.skyscnr.com/images/airlines/NH.png,NH


### agents

In [11]:
# subset
agents_dict = response_store['agents']

# format
rows = []
for key, value in agents_dict.items():
    rating_breakdown = value.get('ratingBreakdown') or {}
    rows.append({
        'agent_id': key,
        'name': value['name'],
        'agent_type': value['type'],
        'image_url': value['imageUrl'],
        'feedback_count': value['feedbackCount'],
        'rating': value['rating'],
        'customer_service': rating_breakdown.get('customerService', None),
        'reliable_prices': rating_breakdown.get('reliablePrices', None),
        'clear_extra_fees': rating_breakdown.get('clearExtraFees', None),
        'ease_of_booking': rating_breakdown.get('easeOfBooking', None),
        'other': rating_breakdown.get('other', None),
        'is_optimised_for_mobile': value['isOptimisedForMobile']
    })



# create dataframe
agents = pd.DataFrame(rows)

# print
display(agents.head(5))

Unnamed: 0,agent_id,name,agent_type,image_url,feedback_count,rating,customer_service,reliable_prices,clear_extra_fees,ease_of_booking,other,is_optimised_for_mobile
0,a341,telme,AGENT_TYPE_TRAVEL_AGENT,https://logos.skyscnr.com/images/websites/a341...,0,0.0,,,,,,False
1,anai,ANA (All Nippon Airways),AGENT_TYPE_AIRLINE,https://logos.skyscnr.com/images/websites/anai...,1949,4.71,5.0,4.642408,4.903352,4.758384,4.5361,True
2,asia,Asiana Airlines,AGENT_TYPE_AIRLINE,https://logos.skyscnr.com/images/websites/asia...,109,3.92,5.0,4.359468,4.679732,4.359468,2.277732,True
3,bcuk,Booking.com,AGENT_TYPE_TRAVEL_AGENT,https://logos.skyscnr.com/images/websites/bcuk...,28087,3.84,4.985368,3.82584,4.339,4.253968,2.974952,True
4,cair,China Airlines,AGENT_TYPE_AIRLINE,https://logos.skyscnr.com/images/websites/cair...,164,4.69,5.0,4.624796,4.8928,4.785596,4.463992,True


### alliances

In [12]:
# subset
alliances_dict = response_store['alliances']

# format
rows = []
for key, value in alliances_dict.items():
    rows.append({
        'alliance_id': key,
        'name': value['name']
    })

# create dataframe
alliances = pd.DataFrame(rows)

# print
display(alliances.head(5))



Unnamed: 0,alliance_id,name
0,-31998,SkyTeam
1,-31999,Star Alliance
2,-32000,OneWorld
