# Testing aviationstack's api

In [1]:
import os
import requests
import sqlite3
import json
from pathlib import Path
from dotenv import load_dotenv

In [8]:
env_path = Path("../.env")
load_dotenv(env_path)
av_api_key = os.getenv("AVIATION_API_KEY", "")
av_api_url = "http://api.aviationstack.com/v1/"
flight_api_url = av_api_url + "flights"
airline_api_url = av_api_url + "airlines"
params = {
    "access_key": av_api_key,
    "airline_name": "Malaysia Airlines",
    # "flight_date": "2023-08-22",
}

## Retrieving flights for malaysia airlines

In [4]:
# basic plan must use http, not https
params = {"access_key": av_api_key, "limit": 100}

In [5]:
api_result = requests.get(airline_api_url, params)
airline_response = api_result.json()
airline_response

{'pagination': {'offset': 0, 'limit': 100, 'count': 100, 'total': 13130},
 'data': [{'id': '1',
   'fleet_average_age': '10.9',
   'airline_id': '1',
   'callsign': 'AMERICAN',
   'hub_code': 'DFW',
   'iata_code': 'AA',
   'icao_code': 'AAL',
   'country_iso2': 'US',
   'date_founded': '1934',
   'iata_prefix_accounting': '1',
   'airline_name': 'American Airlines',
   'country_name': 'United States',
   'fleet_size': '963',
   'status': 'active',
   'type': 'scheduled'},
  {'id': '2',
   'fleet_average_age': '17',
   'airline_id': '2',
   'callsign': 'DELTA',
   'hub_code': 'ATL',
   'iata_code': 'DL',
   'icao_code': 'DAL',
   'country_iso2': 'US',
   'date_founded': '1928',
   'iata_prefix_accounting': '6',
   'airline_name': 'Delta Air Lines',
   'country_name': 'United States',
   'fleet_size': '823',
   'status': 'active',
   'type': 'scheduled,division'},
  {'id': '3',
   'fleet_average_age': '13.8',
   'airline_id': '3',
   'callsign': 'UNITED',
   'hub_code': 'ORD',
   'iata_

In [6]:
# basic plan must use http, not https
params = {"access_key": av_api_key, "airline_name": "Malaysia Airlines"}

In [9]:
api_result = requests.get(flight_api_url, params)
api_response = api_result.json()

In [8]:
api_response.keys()

dict_keys(['pagination', 'data'])

In [10]:
api_response["pagination"]

{'limit': 100, 'offset': 0, 'count': 11, 'total': 11}

In [11]:
api_response["data"]

[{'flight_date': '2023-10-13',
  'flight_status': 'active',
  'departure': {'airport': 'Melbourne - Tullamarine Airport',
   'timezone': 'Australia/Melbourne',
   'iata': 'MEL',
   'icao': 'YMML',
   'terminal': '2',
   'gate': '6',
   'delay': 25,
   'scheduled': '2023-10-13T00:30:00+00:00',
   'estimated': '2023-10-13T00:30:00+00:00',
   'actual': None,
   'estimated_runway': None,
   'actual_runway': None},
  'arrival': {'airport': 'Kuala Lumpur International Airport (klia)',
   'timezone': 'Asia/Kuala_Lumpur',
   'iata': 'KUL',
   'icao': 'WMKK',
   'terminal': '1',
   'gate': None,
   'baggage': None,
   'delay': None,
   'scheduled': '2023-10-13T06:00:00+00:00',
   'estimated': '2023-10-13T06:00:00+00:00',
   'actual': None,
   'estimated_runway': None,
   'actual_runway': None},
  'airline': {'name': 'Malaysia Airlines', 'iata': 'MH', 'icao': 'MAS'},
  'flight': {'number': '128',
   'iata': 'MH128',
   'icao': 'MAS128',
   'codeshared': None},
  'aircraft': None,
  'live': None}

In [23]:
params = {
    "access_key": av_api_key,
    # "airline_name": "American Airlines",
    "min_delay_arr": 30,
    # "flight_date": "2023-08-22",
}
api_result = requests.get(url="http://api.aviationstack.com/v1/flights", params=params)
api_response = api_result.json()
api_response

{'pagination': {'limit': 100, 'offset': 0, 'count': 0, 'total': 0}, 'data': []}

### Filter for delays

In [12]:
params = {
    "access_key": av_api_key,
    "airline_name": "Malaysia Airlines",
    "min_delay_arr": 30,
    "flight_date": "2023-08-22",
}
api_result = requests.get(flight_api_url, params)
api_response = api_result.json()

In [13]:
api_response

{'error': {'code': 'function_access_restricted',
  'message': 'Your current subscription plan does not support this API function.'}}

Basic plan cannot filter by date???

In [20]:
params = {
    "access_key": av_api_key,
    "airline_name": "Malaysia Airlines",
    "min_delay_arr": 30,
    # "flight_date": "2023-08-22",
}
api_result = requests.get(url="http://api.aviationstack.com/v1/flights", params=params)
api_response = api_result.json()

In [21]:
api_response["pagination"]

{'limit': 100, 'offset': 0, 'count': 0, 'total': 0}

In [12]:
api_response["data"]

[{'flight_date': '2023-08-29',
  'flight_status': 'scheduled',
  'departure': {'airport': 'Doha International',
   'timezone': 'Asia/Qatar',
   'iata': 'DOH',
   'icao': 'OTHH',
   'terminal': None,
   'gate': 'C37',
   'delay': 25,
   'scheduled': '2023-08-29T02:05:00+00:00',
   'estimated': '2023-08-29T02:05:00+00:00',
   'actual': '2023-08-29T03:10:00+00:00',
   'estimated_runway': '2023-08-29T03:10:00+00:00',
   'actual_runway': '2023-08-29T03:10:00+00:00'},
  'arrival': {'airport': 'Cape Town International',
   'timezone': 'Africa/Johannesburg',
   'iata': 'CPT',
   'icao': 'FACT',
   'terminal': 'B',
   'gate': 'A5',
   'baggage': '1.4',
   'delay': 31,
   'scheduled': '2023-08-29T10:50:00+00:00',
   'estimated': '2023-08-29T10:50:00+00:00',
   'actual': None,
   'estimated_runway': None,
   'actual_runway': None},
  'airline': {'name': 'Malaysia Airlines', 'iata': 'MH', 'icao': 'MAS'},
  'flight': {'number': '9305',
   'iata': 'MH9305',
   'icao': 'MAS9305',
   'codeshared': {'a

In [11]:
with open("../tests/data/sample_flight_response.json", "w") as j:
    j.write(json.dumps(api_response))

In [24]:
for key in api_response["data"][0]:
    print(key)

flight_date
flight_status
departure
arrival
airline
flight
aircraft
live


In [29]:
api_response["data"][0]["airline"]

{'name': 'Lion Air', 'iata': 'JT', 'icao': 'LNI'}

In [27]:
mh = [flight for flight in api_response["data"] if "malaysia" in flight["airline"]]
mh

[]

In [30]:
airlines = [flight["airline"]["name"] for flight in api_response["data"]]
airlines = set(airlines)
airlines

{'Air Astana',
 'Air Canada',
 'Air China LTD',
 'Air India',
 'Air New Zealand',
 'Airblue',
 'Alaska Airlines',
 'Alliance Airlines',
 'Aurora',
 'Bamboo Airways',
 'Bangkok Airways',
 'China Eastern Airlines',
 'China Postal Airlines',
 'Emirates',
 'Etihad Airways',
 'Finnair',
 'FlexFlight',
 'Hawaiian Airlines',
 'Jet Linx Aviation',
 'Jetstar',
 'Kuwait Airways',
 'LATAM Airlines',
 'Lion Air',
 'Longhao Airlines',
 'Myanmar Airways International',
 None,
 'Philippine Airlines',
 'Qantas',
 'Qatar Airways',
 'S7 Airlines',
 'SF Airlines',
 'Sichuan Airlines',
 'Singapore Airlines',
 'SriLankan Airlines',
 'Sriwijaya Air',
 'Virgin Australia',
 'Yakutia'}

In [33]:
"Malaysia Airlines" in airlines

False

## Pagination limit

Free tier only offers 100 counts so we must use additional requests to retrieve the rest of the count using offset

First request will retrive the first 100, with offset of 0; second will retrieve the next 100, until the total is met

For example, given total of 563, 6 requests will be made; the last retrieving the remaining 63 flight records

In [15]:
from time import sleep
from datetime import date

In [16]:
def get_flight_api(
    offset: int = 0,
    limit: int = 100,
    airline: str = "Malaysia Airlines",
    min_delay: int = 1,
    flight_api_url="http://api.aviationstack.com/v1/flights",
) -> dict:
    params = {
        "access_key": av_api_key,  # retrieved from .env, global scope
        "offset": offset,
        "limit": limit,
        "airline_name": airline,
        "min_delay_arr": min_delay,
    }
    result = requests.get(flight_api_url, params, timeout=5)
    return result.json()


def write_local_json(api_response: dict, offset: int = 0, limit: int = 100):
    local_json_path = Path(
        f"../data/responses/flight-{str(date.today())}-{offset}-{offset+limit}.json"
    )
    with open(local_json_path, "w") as j:
        j.write(json.dumps(api_response))
        print(f"saved to {local_json_path}")
    return local_json_path


def get_all_delays(
    limit: int = 100,
    airline: str = "Malaysia Airlines",
    min_delay: int = 1,
    flight_api_url="http://api.aviationstack.com/v1/flights",
):
    print(f"retrieving 0th to {limit}th records")
    first = get_flight_api()
    write_local_json(first)
    retrieved = limit
    total = first["pagination"]["total"]
    print(f"total: {total}")
    while retrieved < total:
        sleep(0.5)
        print(f"retrieving {retrieved}th to {retrieved + limit}th")
        response = get_flight_api(offset=retrieved, limit=limit)
        # save response
        json_path = write_local_json(response, offset=retrieved)
        retrieved += response["pagination"]["count"]
    return retrieved

In [17]:
num_records = get_all_delays()
print(f"{num_records} flights were late")

retrieving 0th to 100th records
saved to ../data/responses/flight-2023-09-16-0-100.json
total: 714
retrieving 100th to 200th
saved to ../data/responses/flight-2023-09-16-100-200.json
retrieving 200th to 300th
saved to ../data/responses/flight-2023-09-16-200-300.json
retrieving 300th to 400th
saved to ../data/responses/flight-2023-09-16-300-400.json
retrieving 400th to 500th
saved to ../data/responses/flight-2023-09-16-400-500.json
retrieving 500th to 600th
saved to ../data/responses/flight-2023-09-16-500-600.json
retrieving 600th to 700th
saved to ../data/responses/flight-2023-09-16-600-700.json
retrieving 700th to 800th
saved to ../data/responses/flight-2023-09-16-700-800.json
714 flights were late


## Automatic retries with Session

Session object persists params across requests which increases performance when making requests to the same host, and allows for [automatic retries](https://requests.readthedocs.io/en/latest/user/advanced/#example-automatic-retries) through `urllib3`

In [3]:
from urllib3.util import Retry
from requests import Session
from requests.adapters import HTTPAdapter

In [5]:
s = Session()

retries = Retry(
    total=3,
    backoff_factor=0.1,
    status_forcelist=[502, 503, 504],
    allowed_methods={"POST"},
)
# mount registers this specific transport adapter to this url prefix
s.mount(av_api_url, HTTPAdapter(max_retries=retries))
params = {
    "access_key": av_api_key,
    "airline_name": "Malaysia Airlines",
    "min_delay_arr": 1,
    # "flight_date": "2023-08-22",
}
res = s.get(url=flight_api_url, params=params)

In [7]:
res.json()

{'pagination': {'limit': 100, 'offset': 0, 'count': 100, 'total': 618},
 'data': [{'flight_date': '2023-09-25',
   'flight_status': 'landed',
   'departure': {'airport': 'Kuala Lumpur International Airport (klia)',
    'timezone': 'Asia/Kuala_Lumpur',
    'iata': 'KUL',
    'icao': 'WMKK',
    'terminal': '1',
    'gate': 'H4',
    'delay': 38,
    'scheduled': '2023-09-25T10:25:00+00:00',
    'estimated': '2023-09-25T10:25:00+00:00',
    'actual': '2023-09-25T11:02:00+00:00',
    'estimated_runway': '2023-09-25T11:02:00+00:00',
    'actual_runway': '2023-09-25T11:02:00+00:00'},
   'arrival': {'airport': 'Singapore Changi',
    'timezone': 'Asia/Singapore',
    'iata': 'SIN',
    'icao': 'WSSS',
    'terminal': '3',
    'gate': None,
    'baggage': '47',
    'delay': 15,
    'scheduled': '2023-09-25T11:35:00+00:00',
    'estimated': '2023-09-25T11:35:00+00:00',
    'actual': '2023-09-25T11:41:00+00:00',
    'estimated_runway': '2023-09-25T11:41:00+00:00',
    'actual_runway': '2023-09-