# FLIGHT TRACKER

What should this program do?
- scrap data from multiple sources about the price of an airline for a specific date, origin, destination (with/without luggage), different classes etc.
- save the data in a database
- returns the (historical and current) data/graph when is asked
- predicts future prices
- be available on a webpage, mobile app
- work as a bot in whatsapp
- send email alerts

## General - Imports

In [1]:
# import numpy as np
import pandas as pd
import requests
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import urllib.parse
import urllib.request
from urllib.parse import urlencode
from bs4 import BeautifulSoup
import datetime as dt
from datetime import timedelta
# import beakerx
# from beakerx import *
# from beakerx.object import beakerx

# KIWI - API

In [2]:
apikey = 'hsgRmFhjJRKIzN6o0MEovJXr9VJFxQQh'

In [3]:
# Convert dictionary to dataframe

def dict_to_df(d):

    df = pd.DataFrame(columns=['option_id', 'deep_link', 'price', 'duration_total', 
                               'duration_departure', 'duration_return', 'route_id_unique', 'route_id_comb', 'is_return',
                               'from', 'to', 'airline', 'departure_time', 'arrival_time'])
      
    for i in d:
        for j in i['route']:
            df = df.append(
                {'option_id': i['id'],
                 'deep_link': i['deep_link'],
                 'price': i['price'], 
    #              'bags_price': i['bags_price'],
                 'duration_total': i['duration']['total'],
                 'duration_departure': i['duration']['departure'],
                 'duration_return': i['duration']['return'],               
                 'route_id_unique': j['id'],
                 'route_id_comb': j['combination_id'],
                 'is_return': j['return'],
                 'from': j['flyFrom'], 
                 'to': j['flyTo'], 
                 'airline': j['airline'],
                 'departure_time': j['local_departure'], 
                 'arrival_time': j['local_arrival'],              
                },ignore_index=True)
        
    return df

In [4]:
inputs_oneway = {
    'flight_type': 'oneway',
    'fly_from': 'BCN',
    'fly_to': 'BOG',
    'date_from': '06/04/2020',
    'date_to': '05/04/2021',
    'max_fly_duration': 20,
    'selected_cabins': 'M', # M (economy), W (economy premium), C (business), F (first class)
#     'only_weekends': 0,
    'partner_market': 'es',
    'locale': 'us',
    'curr': 'EUR',
    'price_from': 0,
    'price_to': 2000,
    'max_stopovers': 1,   # do not change
#     'vehicle_type': 'aircraft', # aircraft (default), bus, train
#     'sort': 'price',
#     'asc': 1
}

In [3]:
inputs_round = {
    'flight_type': 'round',
    'fly_from': 'BCN',
    'fly_to': 'BOG',
    'date_from': '01/10/2020',
    'date_to': '15/12/2020',
    'return_from': '01/10/2020',
    'return_to': '15/12/2020',
    'nights_in_dst_from': 18, 
    'nights_in_dst_to': 21,
    'max_fly_duration': 15,
    'selected_cabins': 'M', # M (economy), W (economy premium), C (business), F (first class)
#     'only_weekends': 0,
    'partner_market': 'es',
    'locale': 'us',
    'curr': 'EUR',
    'price_from': 0,
    'price_to': 800,
    'max_stopovers': 1,   # do not change
    'select_airlines': 'IB',
#     'select_airlines_exclude': False,
    'vehicle_type': 'aircraft', # aircraft (default), bus, train
    'sort': 'price',
    'asc': 1
}

inputs_round = {
    'flight_type': 'round',
    'fly_from': 'ATH',
    'fly_to': 'MEX',
    'date_from': '01/10/2020',
    'date_to': '15/12/2020',
#     'return_from': '01/10/2020',
#     'return_to': '15/12/2020',
#     'nights_in_dst_from': 5, 
#     'nights_in_dst_to': 15,
    'max_fly_duration': 25,
    'selected_cabins': 'M',  # M (economy), W (economy premium), C (business), F (first class)
    'partner_market': 'es',
    'locale': 'us',
    'curr': 'EUR',
    'price_from': 0,
    'price_to': 1200,
    'max_stopovers': 3,
    'sort': 'price',
    'asc': 1
}

In [6]:
# TODO: Check if possible, it seems it's not
inputs_multiple = {}

In [7]:
inputs_str = urlencode(inputs_round)

url = "https://kiwicom-prod.apigee.net/v2/search?" + \
      'apikey=' + apikey + '&' + inputs_str

In [8]:
try:
    with open('data/'+inputs_str+'.json') as json_file:
        response_json = json.load(json_file)
    print("File found, no need to download!")
except IOError:
    print("File not found!")
    response = requests.get(url)
    print(response)
    print("File was downloaded from kiwi")
    response_json = response.json()
    # TODO: Save JSON!!

File not found!
<Response [200]>
File was downloaded from kiwi


In [9]:
# response_json['data'][0]

## Pandas approach

In [327]:
df_flights = dict_to_df(response_json['data'])

# Transform duration into hours
for dur in  ['duration_total', 'duration_departure', 'duration_return']:
    df_flights[dur] = round((df_flights[dur].astype('float')/60/60), 2)
    
# Transform datetimes into desire format    
# for dtime in ['departure_time', 'arrival_time']:
#     df_flights[dtime] = pd.to_datetime(df_flights[dtime]).apply(lambda x: dt.datetime.strftime(x, '%a %d-%b-%Y %H:%M'))
#     df_flights[dtime] = pd.to_datetime(df_flights[dtime])
                                                                
df_flights.loc[df_flights['is_return']==0, 'outbound/inbound'] = 'outbound'
df_flights.loc[df_flights['is_return']==1, 'outbound/inbound'] = 'inbound'

In [328]:
df_flights.loc[df_flights['is_return']==0, 'duration'] = df_flights['duration_departure']
df_flights.loc[df_flights['is_return']==1, 'duration'] = df_flights['duration_return']

In [329]:
# df_flights['Option'] = range(len(df_flights))
# df_flights['Option'] = df_flights.groupby('id').count()
# df_flights_g = df_flights.groupby(df_flights.columns.tolist()).count()

In [330]:
sortby_parameter = 'price'

In [331]:
# Add ID counts
df_flights.sort_values([sortby_parameter, 'option_id'], inplace=True)
df_flights['option#'] = df_flights.groupby([sortby_parameter, 'option_id']).ngroup() +1   # add price in groupby for counting the groups from cheapest to most expensive
# df_flights['flight#'] = df_flights.groupby(['price', 'option_id', 'route_id_comb']).cumsum() +1
# df_flights['flight#'] = df_flights['id_route'].str[-1].astype('int') +1

In [332]:
df_flights

Unnamed: 0,option_id,deep_link,price,duration_total,duration_departure,duration_return,route_id_unique,route_id_comb,is_return,from,to,airline,departure_time,arrival_time,outbound/inbound,duration,option#
0,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_0,01af049b486f48818c1eef8b,0,BCN,MAD,IB,2020-10-08T10:05:00.000Z,2020-10-08T11:30:00.000Z,outbound,12.75,1
1,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_1,01af049b486f48818c1eef8b,0,MAD,BOG,IB,2020-10-08T12:35:00.000Z,2020-10-08T15:50:00.000Z,outbound,12.75,1
2,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_2,01af049b486f48818c1eef8b,1,BOG,MAD,IB,2020-10-26T18:15:00.000Z,2020-10-27T10:15:00.000Z,inbound,12.58,1
3,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_3,01af049b486f48818c1eef8b,1,MAD,BCN,IB,2020-10-27T11:30:00.000Z,2020-10-27T12:50:00.000Z,inbound,12.58,1
4,01af049b48764888053849e8_0|01af049b48764888053...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b48764888053849e8_0,01af049b48764888053849e8,0,BCN,MAD,IB,2020-10-15T10:05:00.000Z,2020-10-15T11:30:00.000Z,outbound,12.75,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,01af0a22488900003c6b17e9_0|0a22049b4889489e4c0...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,760,28.83,14.58,14.25,0a2201af489f000049f7d6c2_0,0a2201af489f000049f7d6c2,1,MAD,BCN,IB,2020-11-25T13:10:00.000Z,2020-11-25T14:30:00.000Z,inbound,14.25,28
112,01af0a22488a00008cde0a07_0|0a22049b488a489ef08...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,775,28.83,14.58,14.25,01af0a22488a00008cde0a07_0,01af0a22488a00008cde0a07,0,BCN,MAD,IB,2020-11-04T08:00:00.000Z,2020-11-04T09:25:00.000Z,outbound,14.58,29
113,01af0a22488a00008cde0a07_0|0a22049b488a489ef08...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,775,28.83,14.58,14.25,0a22049b488a489ef083cc30_0,0a22049b488a489ef083cc30,0,MAD,BOG,IB,2020-11-04T12:10:00.000Z,2020-11-04T16:35:00.000Z,outbound,14.58,29
114,01af0a22488a00008cde0a07_0|0a22049b488a489ef08...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,775,28.83,14.58,14.25,0a22049b488a489ef083cc30_1,0a22049b488a489ef083cc30,1,BOG,MAD,IB,2020-11-24T18:15:00.000Z,2020-11-25T10:15:00.000Z,inbound,14.25,29


In [354]:
df_flights_clean = df_flights[['option#', 'price', 'duration_total', 
                               'duration', 'outbound/inbound',
                               'from', 'to', 'airline', 'departure_time', 
                               'arrival_time']].sort_values(['price', 'option#'])
df_flights_clean

Unnamed: 0,option#,price,duration_total,duration,outbound/inbound,from,to,airline,departure_time,arrival_time
0,1,636,25.33,12.75,outbound,BCN,MAD,IB,2020-10-08T10:05:00.000Z,2020-10-08T11:30:00.000Z
1,1,636,25.33,12.75,outbound,MAD,BOG,IB,2020-10-08T12:35:00.000Z,2020-10-08T15:50:00.000Z
2,1,636,25.33,12.58,inbound,BOG,MAD,IB,2020-10-26T18:15:00.000Z,2020-10-27T10:15:00.000Z
3,1,636,25.33,12.58,inbound,MAD,BCN,IB,2020-10-27T11:30:00.000Z,2020-10-27T12:50:00.000Z
4,2,636,25.33,12.75,outbound,BCN,MAD,IB,2020-10-15T10:05:00.000Z,2020-10-15T11:30:00.000Z
...,...,...,...,...,...,...,...,...,...,...
111,28,760,28.83,14.25,inbound,MAD,BCN,IB,2020-11-25T13:10:00.000Z,2020-11-25T14:30:00.000Z
112,29,775,28.83,14.58,outbound,BCN,MAD,IB,2020-11-04T08:00:00.000Z,2020-11-04T09:25:00.000Z
113,29,775,28.83,14.58,outbound,MAD,BOG,IB,2020-11-04T12:10:00.000Z,2020-11-04T16:35:00.000Z
114,29,775,28.83,14.25,inbound,BOG,MAD,IB,2020-11-24T18:15:00.000Z,2020-11-25T10:15:00.000Z


In [359]:
# TableDisplay(df_flights)

In [334]:
pd.set_option('display.max_rows', 20)
df_flights_clean.groupby(['option#', 'price', 'duration_total', 'outbound/inbound', 'duration', 'from', 'to', 'airline', 'departure_time', 'arrival_time']).count()

option#,price,duration_total,outbound/inbound,duration,from,to,airline,departure_time,arrival_time
1,636,25.33,inbound,12.58,BOG,MAD,IB,2020-10-26T18:15:00.000Z,2020-10-27T10:15:00.000Z
1,636,25.33,inbound,12.58,MAD,BCN,IB,2020-10-27T11:30:00.000Z,2020-10-27T12:50:00.000Z
1,636,25.33,outbound,12.75,BCN,MAD,IB,2020-10-08T10:05:00.000Z,2020-10-08T11:30:00.000Z
1,636,25.33,outbound,12.75,MAD,BOG,IB,2020-10-08T12:35:00.000Z,2020-10-08T15:50:00.000Z
2,636,25.33,inbound,12.58,BOG,MAD,IB,2020-11-02T18:15:00.000Z,2020-11-03T10:15:00.000Z
...,...,...,...,...,...,...,...,...,...
28,760,28.83,outbound,14.58,MAD,BOG,IB,2020-11-03T12:10:00.000Z,2020-11-03T16:35:00.000Z
29,775,28.83,inbound,14.25,BOG,MAD,IB,2020-11-24T18:15:00.000Z,2020-11-25T10:15:00.000Z
29,775,28.83,inbound,14.25,MAD,BCN,IB,2020-11-25T13:10:00.000Z,2020-11-25T14:30:00.000Z
29,775,28.83,outbound,14.58,BCN,MAD,IB,2020-11-04T08:00:00.000Z,2020-11-04T09:25:00.000Z


In [336]:
df_flights

Unnamed: 0,option_id,deep_link,price,duration_total,duration_departure,duration_return,route_id_unique,route_id_comb,is_return,from,to,airline,departure_time,arrival_time,outbound/inbound,duration,option#
0,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_0,01af049b486f48818c1eef8b,0,BCN,MAD,IB,2020-10-08T10:05:00.000Z,2020-10-08T11:30:00.000Z,outbound,12.75,1
1,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_1,01af049b486f48818c1eef8b,0,MAD,BOG,IB,2020-10-08T12:35:00.000Z,2020-10-08T15:50:00.000Z,outbound,12.75,1
2,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_2,01af049b486f48818c1eef8b,1,BOG,MAD,IB,2020-10-26T18:15:00.000Z,2020-10-27T10:15:00.000Z,inbound,12.58,1
3,01af049b486f48818c1eef8b_0|01af049b486f48818c1...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b486f48818c1eef8b_3,01af049b486f48818c1eef8b,1,MAD,BCN,IB,2020-10-27T11:30:00.000Z,2020-10-27T12:50:00.000Z,inbound,12.58,1
4,01af049b48764888053849e8_0|01af049b48764888053...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,636,25.33,12.75,12.58,01af049b48764888053849e8_0,01af049b48764888053849e8,0,BCN,MAD,IB,2020-10-15T10:05:00.000Z,2020-10-15T11:30:00.000Z,outbound,12.75,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,01af0a22488900003c6b17e9_0|0a22049b4889489e4c0...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,760,28.83,14.58,14.25,0a2201af489f000049f7d6c2_0,0a2201af489f000049f7d6c2,1,MAD,BCN,IB,2020-11-25T13:10:00.000Z,2020-11-25T14:30:00.000Z,inbound,14.25,28
112,01af0a22488a00008cde0a07_0|0a22049b488a489ef08...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,775,28.83,14.58,14.25,01af0a22488a00008cde0a07_0,01af0a22488a00008cde0a07,0,BCN,MAD,IB,2020-11-04T08:00:00.000Z,2020-11-04T09:25:00.000Z,outbound,14.58,29
113,01af0a22488a00008cde0a07_0|0a22049b488a489ef08...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,775,28.83,14.58,14.25,0a22049b488a489ef083cc30_0,0a22049b488a489ef083cc30,0,MAD,BOG,IB,2020-11-04T12:10:00.000Z,2020-11-04T16:35:00.000Z,outbound,14.58,29
114,01af0a22488a00008cde0a07_0|0a22049b488a489ef08...,https://www.kiwi.com/deep?from=BCN&to=BOG&depa...,775,28.83,14.58,14.25,0a22049b488a489ef083cc30_1,0a22049b488a489ef083cc30,1,BOG,MAD,IB,2020-11-24T18:15:00.000Z,2020-11-25T10:15:00.000Z,inbound,14.25,29


In [9]:
df_flights.sort_values([sortby_parameter, 'option_id', 'departure_time'], inplace=True) # more robust -> sort by something in order to be sure that the -1 below works

# idx_o = df_flights['outbound/inbound']=='outbound'
# idx_i = df_flights['outbound/inbound']=='inbound'

for i in range(1,5):
    print('Option: ', i)
    print('price: ', df_flights.loc[(df_flights['option#']==i)]['price'].unique()[0])
    print('total duration: ', df_flights.loc[(df_flights['option#']==i)]['duration_total'].unique()[0])
    
    for j in ['outbound', 'inbound']:
    
        print('\n')
        print('outbound (duration: {})'.format(df_flights.loc[(df_flights['option#']==i) & 
                                                              (df_flights['outbound/inbound']==j)]['duration'].unique()[0]))
        print('from: ', df_flights.loc[(df_flights['option#']==i) & 
                                       (df_flights['outbound/inbound']==j)]['from'].unique()[0])
        print('via {} stop(s): {}'.format(1, 'MAD'))   # TODO: Remve hardcoded
        print('to: ', df_flights.loc[(df_flights['option#']==i) & 
                                     (df_flights['outbound/inbound']==j)]['to'].unique()[-1])
        print('departure time: ', dt.datetime.strftime(
            pd.to_datetime(
                df_flights.loc[(df_flights['option#']==i) & 
                               (df_flights['outbound/inbound']==j)]['departure_time'].unique()[0]),
            '%a %d-%b-%Y %H:%M'))
        print('arrival time: ', dt.datetime.strftime(
            pd.to_datetime(
                df_flights.loc[(df_flights['option#']==i) & 
                               (df_flights['outbound/inbound']==j)]['arrival_time'].unique()[-1]),
            '%a %d-%b-%Y %H:%M'))
    
    print('------------------------------------------------------')

NameError: name 'df_flights' is not defined

In [75]:
df_flights.sort_values('option').to_csv('output/df_flights.csv', index=False)

## OOP approach

In [9]:
class Flight:
    """
    TODO: Documentation
    """

    def __init__(self, flight_d):
        
        for key in flight_d:
            setattr(self, key, flight_d[key])  
            
#         self.local_departure += 'lol'


class Outbound:
    """
    TODO: Documentation
    """
    
    def __init__(self, flights_list):
        
        flight_list_outbound = list(filter(lambda d: d['return']==0, flights_list))
        
        self.flights = []    
        
        for flight_d in flight_list_outbound:
            self.flights.append(Flight(flight_d))
        
        
class Inbound:
    """
    TODO: Documentation
    """
    
    def __init__(self, flights_list):
        
        flight_list_inbound = list(filter(lambda d: d['return']==1, flights_list))
        
        self.flights = []    
        
        for flight_d in flight_list_inbound:
            self.flights.append(Flight(flight_d))
        
class Option:
    """
    TODO: Documentation
    """

    def __init__(self, option_d):
#         TODO: use the first way only for firts-level dictionary
        
#         For all data on option level:    
#         for key in option_d:
#             setattr(self, key, option_d[key])   


        # For selection of data on option level:    

        self.id = option_d['id']
        self.deep_link = option_d['deep_link']
        self.price = option_d['price']
        self.duration_total = option_d['duration']['total']
        self.duration_outbound = option_d['duration']['departure']
        self.duration_inbound = option_d['duration']['return']
        
        flights_list = option_d['route']
        
        self.outbound = Outbound(flights_list)
        self.inbound = Inbound(flights_list)
        
        
class ResponseData:
    """
    TODO: Documentation
    """
    
    def __init__(self, data):
        
        self.options = []    
        
        for option_d in data:
            self.options.append(Option(option_d))
    

In [10]:
def to_hm(s):
    hours = int(s/60//60)
    mins = int(s/60 - hours*60)
    return f'{hours}h{mins}m'

In [11]:
options = ResponseData(response_json['data']).options

In [79]:
def airline_name(airline_code):
    
    try:
        df_operating_carriers = pd.read_csv('data/airlines.dat') # https://github.com/jpatokal/openflights
        
        df_operating_carriers.columns = ['ind', 'full_name', 'full_name2', 'two_digit_code', 'three_digit_code', 'name_in_capital', 'country', 'yesno']

        try:
            airline_name = df_operating_carriers.loc[df_operating_carriers['two_digit_code']==airline_code]['full_name'].unique()[0]
        except IndexError:
            try: 
                airline_name = df_operating_carriers.loc[df_operating_carriers['three_digit_code']==airline_code]['full_name'].unique()[0]
            except IndexError:
                airline_name = airline_code   
      

    except FileNotFoundError:
        print("Airline mapping table was not found")
        airline_name = airline_code
        
    if airline_name == '':
        airline_name = airline_code

    return airline_name    

In [35]:
airline_name('RNAAX')

'RNAAX'

In [19]:
response_json['data'][0]

{'id': '01af049b4889489d0f50336f_0|01af049b4889489d0f50336f_1|01af049b4889489d0f50336f_2|01af049b4889489d0f50336f_3',
 'route': [{'fare_basis': 'AON0NQB6',
   'fare_category': 'M',
   'fare_classes': 'A',
   'fare_family': '',
   'last_seen': '2020-04-21T23:19:24.000Z',
   'refresh_timestamp': '2020-04-21T23:19:24.000Z',
   'return': 0,
   'bags_recheck_required': False,
   'guarantee': False,
   'id': '01af049b4889489d0f50336f_0',
   'combination_id': '01af049b4889489d0f50336f',
   'cityTo': 'Madrid',
   'cityFrom': 'Barcelona',
   'cityCodeFrom': 'BCN',
   'cityCodeTo': 'MAD',
   'flyTo': 'MAD',
   'flyFrom': 'BCN',
   'airline': 'IB',
   'operating_carrier': 'IB',
   'equipment': '321',
   'flight_no': 935,
   'vehicle_type': 'aircraft',
   'operating_flight_no': '935',
   'local_arrival': '2020-11-03T11:00:00.000Z',
   'utc_arrival': '2020-11-03T10:00:00.000Z',
   'local_departure': '2020-11-03T09:35:00.000Z',
   'utc_departure': '2020-11-03T08:35:00.000Z'},
  {'fare_basis': 'AON0N

In [55]:
options[0].outbound.flights[1:-1]

[<__main__.Flight at 0x2da2c758a58>,
 <__main__.Flight at 0x2da2c758748>,
 <__main__.Flight at 0x2da2c758e10>,
 <__main__.Flight at 0x2da2c7589b0>]

In [74]:
options[0].outbound.flights[:-1]

[<__main__.Flight at 0x2da2c72f748>]

In [75]:
lst_stops = []
for j in range(len(options[0].outbound.flights[:-1])):   # ignore first and last
    lst_stops.append(options[0].outbound.flights[j].flyTo)
    
lst_stops

['MAD']

In [13]:
def print_n_options(options, n):
    for i in range(1, n + 1):
        print('--OPTION ', i)
        print('link: ', pyshorteners.Shortener().tinyurl.short(options[i].deep_link))
        print('price: ', options[i].price)
        print('total duration: ', to_hm(options[i].duration_total))

        for x, y, z in zip(['outbound', 'inbound'],
                           [options[i].duration_outbound, options[i].duration_inbound],
                           [options[i].outbound, options[i].inbound]
                           ):
            
            print('\n')
            print('{} (duration: {})'.format(x, to_hm(y)))
            
            lst_airlines = []
            for j in range(len(z.flights)):
                a_n = airline_name(z.flights[j].airline)
                lst_airlines.append(a_n) if a_n not in lst_airlines else lst_airlines
            
            print('list of airlines: ', lst_airlines)

            print('from: ', z.flights[0].flyFrom)
            
            lst_stops = []
            for v in range(len(z.flights[:-1])):   # ignore first and last
                lst_stops.append(z.flights[v].flyTo)

            print('via {} stop(s): {}'.format(len(lst_stops), lst_stops))

            print('to: ', z.flights[-1].flyTo)

            print('departure time: ',
                  dt.datetime.strftime(
                      dt.datetime.strptime(
                          z.flights[0].local_departure,  # first departure time
                          '%Y-%m-%dT%H:%M:%S.000Z'
                      ),
                      '%a %d-%b-%Y %H:%M')
                  )

            print('arrival time: ',
                  dt.datetime.strftime(
                      dt.datetime.strptime(
                          z.flights[-1].local_arrival,  # last arrival time
                          '%Y-%m-%dT%H:%M:%S.000Z'
                      ),
                      '%a %d-%b-%Y %H:%M')
                  )

        print('\n')
        print('\n')

In [14]:
import pyshorteners
print_n_options(options, 3)

ModuleNotFoundError: No module named 'pyshorteners'

# IBERIA - SELENIUM

In [754]:
date_from = dt.datetime(2020, 11, 7)
date_to = dt.datetime(2020, 11, 28)
first_destination = 'BOG'
second_destination = 'MEX'

inputs_iberia = {
     'bookingMarket': 'ES',
     'TRIP_TYPE': 3,
     'BEGIN_CITY_01': 'BCN',
     'END_CITY_01': first_destination,
     'text-date-from2': date_from.strftime('%m/%d/%Y'),
     'BEGIN_DAY_01': date_from.strftime('%d'),
     'BEGIN_MONTH_01': date_from.strftime('%Y%m'),
     'BEGIN_YEAR2_01': date_from.strftime('%Y'),
     'BEGIN_HOUR_01': '0000',
     'BEGIN_CITY_02': second_destination,
     'END_CITY_02': 'BCN',
     'text-date-from3': date_to.strftime('%m/%d/%Y'),
     'BEGIN_DAY_02': date_to.strftime('%d'),
     'BEGIN_MONTH_02': date_to.strftime('%Y%m'),
     'BEGIN_YEAR3_01': date_to.strftime('%Y'),
     'BEGIN_HOUR_02': '0000',
     'text-from4-visible': '',
     'BEGIN_CITY_03': '',
     'text-to4-visible': '',
     'END_CITY_03': '',
     'text-date-from4': '',
     'BEGIN_DAY_03': '',
     'BEGIN_MONTH_03': '',
     'BEGIN_YEAR4_01': '',
     'BEGIN_HOUR_03': '',
     'text-from5-visible': '',
     'BEGIN_CITY_04': '',
     'text-to5-visible': '',
     'END_CITY_04': '',
     'text-date-from5': '',
     'BEGIN_DAY_04': '',
     'BEGIN_MONTH_04': '',
     'BEGIN_YEAR5_01': '',
     'BEGIN_HOUR_04': '',
     'numTrayectos': 2,
     'FARE_TYPE': 'R',
     'ADT': 1,
}

inputs_str = urlencode(inputs_iberia)

In [710]:
# from selenium.common.exceptions import WebDriverException

# while True:
#     try:
#         #do somethings
#     except selenium.common.exceptions.WebDriverException as e:
#         if 'chrome not reachable' in str(e):
#             os.system('taskkill /FI "WindowTitle eq YourTitleIfExistsOrDeleteThisLine*" /T /F')

In [711]:
# url = "https://www.iberia.com/flights/?bookingMarket=ES&TRIP_TYPE=3&text-from2-visible=Barcelona%2520(BCN)&BEGIN_CITY_01=BCN&text-to2-visible=Bogota%2520(BOG)&END_CITY_01=BOG&text-date-from2=11/07/2020&BEGIN_DAY_01=07&BEGIN_MONTH_01=202011&BEGIN_YEAR2_01=2020&BEGIN_HOUR_01=0000&text-from3-visible=Mexico%2520City%2520(MEX)&BEGIN_CITY_02=MEX&text-to3-visible=Barcelona%2520(BCN)&END_CITY_02=BCN&text-date-from3=11/28/2020&BEGIN_DAY_02=28&BEGIN_MONTH_02=202011&BEGIN_YEAR3_01=2020&BEGIN_HOUR_02=0000&text-from4-visible=&BEGIN_CITY_03=&text-to4-visible=&END_CITY_03=&text-date-from4=&BEGIN_DAY_03=&BEGIN_MONTH_03=&BEGIN_YEAR4_01=&BEGIN_HOUR_03=&text-from5-visible=&BEGIN_CITY_04=&text-to5-visible=&END_CITY_04=&text-date-from5=&BEGIN_DAY_04=&BEGIN_MONTH_04=&BEGIN_YEAR5_01=&BEGIN_HOUR_04=&numTrayectos=2&FARE_TYPE=R&ADT=1&CHD=0&INF=0&YCD=0&YTH=0#!/availability"

In [796]:
from selenium.common.exceptions import NoSuchElementException

In [777]:
import time 

In [807]:
driver.close()

WebDriverException: Message: chrome not reachable
  (Session info: chrome=80.0.3987.163)


In [1]:
driver = webdriver.Chrome('C:/Users/yanis/Documents/chromedriver.exe')
driver.get("https://www.iberia.com/flights/?" + inputs_str)
time.sleep(5)

# Accept Cookies button click
driver.find_element_by_id('onetrust-accept-btn-handler').click()


# Click on economy for first flight
for i in range(0,30):
    while True:
        try:
            time.sleep(0.5)            
            driver.find_element_by_id('bbki-slice-info-cabin-0-0-E-btn').click()
        except NoSuchElementException:
            continue
        break


html = driver.find_elements_by_class_name("col-md-12")[1].get_attribute('innerHTML')
soup = BeautifulSoup(html, 'html.parser')
soup_prices = soup.find("div", class_="ib-box-select-radio__header-right ib-box-select-radio__price-long")
# Price for economy - optimal
price_first = list(soup_prices.children)[1].contents[0]
print("Price for the first ticket is: ", price_first)

# Click to choose the first flight
time.sleep(1)
driver.find_elements_by_css_selector("label.ib-box-select-radio__radio-label.ib-box-select-radio__radio-label--full-header")[1].click()

# Click to choose the first economy to see the plans
for i in range(0,30):
    while True:
        try:
            time.sleep(0.5)            
            driver.find_elements_by_css_selector("span.ib-box-mini-fare__box-price")[3].click()
        except IndexError:
            continue
        break

# Click to choose the second flight
time.sleep(1)
driver.find_elements_by_css_selector("label.ib-box-select-radio__radio-label.ib-box-select-radio__radio-label--full-header")[3].click()

# Price for both flights
for i in range(0,30):
    while True:
        try:
            time.sleep(0.5)
            price_both = driver.find_elements_by_css_selector("span.ib-text.ib-text--especial.ib-text--price-big")[0].get_attribute('innerHTML')
        except IndexError:
            continue
        break

print("Price for both tickets is: ", price_both)

NameError: name 'webdriver' is not defined

In [None]:
# soup3 = soup.find("div", class_="ib-box-select-radio__header-right ib-box-select-radio__price-long")

In [None]:
# wait = WebDriverWait(driver, 5)
# wait.until(EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))).click()

In [776]:
# timeout = 10
# try:
#     element_present = EC.presence_of_element_located((By.ID, 'onetrust-accept-btn-handler'))
#     WebDriverWait(driver, timeout).until(element_present)
# except TimeoutException:
#     print("Timed out waiting for page to load")

In [757]:
# driver.refresh()

In [698]:
# for i in range(len(driver.find_elements_by_css_selector("span.ib-text.ib-text--block"))):
#     print(i)
#     print(driver.find_elements_by_css_selector("span.ib-text.ib-text--block")[i].get_attribute('innerHTML'))

In [739]:
# Price for economy - optimal
# driver.find_elements_by_css_selector("span.ib-text.ib-text--block")[27].get_attribute('innerHTML')
# driver.find_elements_by_css_selector("span.ib-text.ib-text--block")[31].get_attribute('innerHTML')

In [768]:
# driver.implicitly_wait(1) # in seconds

In [205]:
# HTML from `<html>`
# html = driver.execute_script("return document.documentElement.outerHTML;")
# HTML from `<body>`
# body = driver.execute_script("return document.body.outerHTML;")

In [203]:
# page = requests.get(url)

In [242]:
# page = urllib.request.urlopen(url)
# soup = BeautifulSoup(page, 'html.parser')
# print(soup)