# description

**purpose**: search multiple dates and itineraries from skiplagged 

# setup

In [None]:
# dependencies

# rare
import geopy.distance
import airportsdata
import requests
import concurrent
import json
import pandas as pd
import numpy as np

# common
import os
from time import sleep
import datetime
from datetime import date, timedelta


# functions

In [None]:
def skiplagged_api_get(url, seconds=15, wait_seconds=5):
    i = 0
    call_times = seconds/wait_seconds
    while i < call_times:
        response = requests.get(url)
        response_dict = response.json()
        sleep(wait_seconds)
        i+=1
    return response_dict

In [None]:
def process_response(response_dict, n=5):
    details = []
    flights = []
    cheapest_n_flights = response_dict['itineraries']['outbound'][:n]
    for flight in cheapest_n_flights:
        flight_id = flight['flight']
        details = response_dict['flights'][flight_id]
        departure = response_dict['flights'][flight_id]['segments'][0]['departure']['time']
        arrival = response_dict['flights'][flight_id]['segments'][-1]['arrival']['time']
        hours = response_dict['flights'][flight_id]['duration']/60/60
        usd_price = flight['one_way_price']/100
        flight_dict = {'flight_id': flight_id, 
                       'cost': usd_price, 
                       'duration': hours, 
                       'departure': departure, 
                       'arrival': arrival}
        details = {flight_id: details}
        flights.append(flight_dict)
    return flights, details

In [None]:
def get_airports_by_country(country):
    airports = airportsdata.load('IATA')  # key is IATA code
    airports_by_country = [a for a in airports if airports[a]['country'] == country]
    return airports_by_country

In [None]:
def get_closest_airports(airport_code, distance_km):
    airports = airportsdata.load('IATA')  # key is IATA code
    src = (airports[airport_code]['lat'], airports[airport_code]['lon'])
    relevant_airports = []
    for a in airports:
        dst = (airports[a]['lat'], airports[a]['lon'])
        dist = geopy.distance.distance(src, dst).km
        if dist <= distance_km:
            relevant_airports.append(a)

    return relevant_airports

In [None]:
def generate_urls(src_airports, dst_airports, dpt_dates):
    urls = []
    N = len(src_airports)*len(dst_airports)*len(dpt_dates)
    i = 1
    dt = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    for src in src_airports:
        for dst in dst_airports:
            for dpt in dpt_dates:
                url = generate_url(src, dst, dpt)
                urls.append(url)
    return urls

In [None]:
def generate_url(src, dst, dpt):
    rtn = ''
    fmt = 'v3'
    alt = '1'
    cld = '0'
    url = f"https://skiplagged.com/api/search.php?from={src}&to={dst}&depart={dpt}&return={rtn}&format={fmt}&counts[adults]={alt}&counts[children]={cld}"
    return url

In [None]:
def make_parallel_requests(request_strings):
    """
    Makes parallel requests to the skiplagged API.
    """
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        responses = executor.map(skiplagged_api_get, request_strings)
    return responses

In [None]:
# sequential calls: old version
def get_all_combinations(src_airports, dst_airports, dpt_dates, save_path):
    results = []
    N = len(src_airports)*len(dst_airports)*len(dpt_dates)
    i = 1
    dt = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    for src in src_airports:
        for dst in dst_airports:
            for dpt in dpt_dates:
                print(f'processing {i}/{N}: {src}, {dst}, {dpt}')
                rtn = ''
                fmt = 'v3'
                alt = '1'
                cld = '0'
                url = f"https://skiplagged.com/api/search.php?from={src}&to={dst}&depart={dpt}&return={rtn}&format={fmt}&counts[adults]={alt}&counts[children]={cld}"
                response_dict = skiplagged_api_get(url, seconds=15, wait_seconds=5)
                best_flights, _ = process_response(response_dict, n=5)
                
                for bf in best_flights:
                    result = {}
                    result.update({'src': src, 'dst': dst, 'depart_date': dpt})
                    result.update(bf)
                    results.append(result)
                    with open(f"{save_path}itinerary_{dt}.txt", "a") as f:
                        f.write(json.dumps(result))
                
                i+=1
                
    return results

In [None]:
def process_response_list(response_list, fout):
    results = []
    
    for response_dict in response_list:
        src = response_dict['info']['from']['airports'][0]
        dst = response_dict['info']['to']['airports'][0]
        dpt = response_dict['flights'][list(response_dict['flights'].keys())[0]]['segments'][0]['departure']['time'][:10]
        best_flights, _ = process_response(response_dict, n=5)
        for bf in best_flights:
            result = {}

            result.update({'src': src, 'dst': dst, 'depart_date': dpt})
            result.update(bf)
            results.append(result)
            with open(fout, "a") as f:
                f.write(json.dumps(result))
    return results

In [None]:
def run_parallel_process_requests(request_strings, fout):
    responses = make_parallel_requests(request_strings)
    response_list = [response for response in responses]
    results = process_response_list(response_list, fout)
    return results

In [None]:
def get_all_combinations_partially_parallel(src_airports, dst_airports, dpt_dates, save_path):
    urls = generate_urls(src_airports, dst_airports, dpt_dates)
    dt = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    fout = f"{save_path}itinerary_{dt}.txt"

    N = len(urls)
    n = int(N/10)
    rem = N%10
    final_results = []
    for i in range(n):
        request_strings = urls[i*10:i*10+10]
        results = run_parallel_process_requests(request_strings, fout)
        final_results+=results

    request_strings = urls[i*10+10:i*10+10+rem]
    results = run_parallel_process_requests(request_strings, fout)
    final_results+=results
    
    return final_results

In [None]:
def generate_dates(start, end):
    fmt = '%Y-%m-%d'

    start_date = datetime.datetime.strptime(start, fmt)
    end_date = datetime.datetime.strptime(end, fmt)

    delta = end_date - start_date   # returns timedelta
    dpt_dates = [(start_date + timedelta(days=i)).strftime(fmt) for i in range(delta.days + 1)]

    return dpt_dates

In [None]:
def read_output(filename):
    with open(filename) as f:
        itinerary_string = f.readlines()[0]

    list_of_entry_dicts = []
    entries_list = itinerary_string.split('}{')
    N = len(entries_list)
    for n in range(N):
        entry = entries_list[n]
        if n == 0:
            entry = entry + "}"
        elif n == N-1:
            entry = "{" + entry
        else:
            entry = "{" + entry + "}"

        entry_dict = json.loads(entry)
        list_of_entry_dicts.append(entry_dict)
    sorted_list = sorted(list_of_entry_dicts, key = lambda i: i['cost'])
    return sorted_list

In [None]:
def create_flight_calendar(results):
    df = pd.DataFrame(results)
    df['adjusted_src_cost'] = np.where(df['src']=='OTP', df['cost']+20,
                       np.where(df['src']=='IST', df['cost']+35,
                       np.where(df['src']=='SOF', df['cost']+5 , df['cost']+35)))

    df['adjusted_cost'] = np.where(df['dst']=='OAK', df['adjusted_src_cost']+10,
                       np.where(df['dst']=='SFO', df['adjusted_src_cost']+10,
                       np.where(df['dst']=='SJC', df['adjusted_src_cost']+30 , df['adjusted_src_cost']+30)))

    final = df.loc[df.groupby('depart_date').adjusted_cost.idxmin()]
    return final

# manual

In [None]:
# list of closest source airports
airports = airportsdata.load('IATA')  # key is IATA code

airport_code = 'SFO'
distance_km = 100
relevant_airports = get_closest_airports(airport_code, distance_km)
relevant_airports

In [None]:
# distance between 2 airports
a = 'MUC'
src = (airports[a]['lat'], airports[a]['lon'])
a = 'SOF'
dst = (airports[a]['lat'], airports[a]['lon'])
dist = geopy.distance.distance(src, dst).km
dist


In [None]:
# list of closest destination airports

dst = 'MUC'
distance_km = 1200.0
dst_airports = get_closest_airports(dst, distance_km)
len(dst_airports), 'SOF' in dst_airports

# execute

In [None]:
# save location

SAVE_PATH = 'output/itineraries/'


if not os.path.exists(SAVE_PATH):
    os.makedirs(SAVE_PATH, exist_ok=True)

## one-way

In [None]:
# search space: final list of source, destination airports and departure times

src_airports = ['OAK', 'SFO', 'SJC']
dst_airports = ['SOF', 'IST', 'OTP']
start = '2022-06-02'
end = '2022-06-14'

src_airports = ['SOF', 'IST', 'OTP']
dst_airports = ['OAK', 'SFO', 'SJC']
start = '2022-09-01'
end = '2022-09-30'

dpt_dates = generate_dates(start, end)

In [None]:
# run in partially parallel way (10x reduction in time)
final_results = get_all_combinations_partially_parallel(src_airports, dst_airports, dpt_dates, SAVE_PATH)
sorted_results = sorted(final_results, key = lambda i: i['cost'])
# 117 combos take 5m 2s
for s in sorted_results:
    print(f"{s['src']} - {s['dst']}, {s['depart_date']}, {s['cost']}, {s['duration']}")


In [None]:
calendar = create_flight_calendar(sorted_results)
calendar

In [None]:
# old way: run sequentially
'''
results = get_all_combinations(src_airports, dst_airports, dpt_dates, save_path=SAVE_PATH)
sorted_results = sorted(results, key = lambda i: i['cost'])
# 435 combos take 2h 11m 47 s
'''

## read results

In [None]:
os.listdir('output/itineraries')

In [None]:
filename = 'output/itineraries/itinerary_20220617212601.txt'
sorted_list = read_output(filename)
for s in sorted_list:
    print(f"{s['src']} - {s['dst']}, {s['depart_date']}, {s['cost']}, {s['duration']}")



In [None]:
calendar = create_flight_calendar(sorted_list)
calendar

## two-way

In [None]:
# seach space: list of source, destination airports, departure and return times

src_airports = ['OAK', 'SFO', 'SJC']
dst_airports = ['SOF', 'IST', 'OTP']
start = '2022-06-02'
end = '2022-06-14'
ret_start = '2022-07-15'
ret_end = '2022-08-01'

dpt_dates = generate_dates(start, end)
ret_dates = generate_dates(ret_start, ret_end)


In [None]:
# TODO: implement skiplagged two-way search