# Putting it all together

1. call aviationstack API
2. save json response to sqlite3 db
3. extract data from response
4. tweet data

## Calling `aviationstack` API

In [4]:
import os
import requests
import sqlite3
import json
from pathlib import Path
from dotenv import load_dotenv
from datetime import date
from time import sleep

In [7]:
str(date.today())

'2023-09-16'

In [None]:
env_path = Path("../.env")
load_dotenv(env_path)
av_api_key = os.getenv("AVIATION_API_KEY", "")
av_api_url = "http://api.aviationstack.com/v1/"
flight_api_url = av_api_url + "flights"
params = {
    "access_key": av_api_key,
    "airline_name": "Malaysia Airlines",
    "min_delay_arr": 1,
    # "flight_date": "2023-08-22",
}
api_result = requests.get(flight_api_url, params)
api_response = api_result.json()

In [10]:
# saving api response locally
local_json_path = Path(f"../data/responses/avstack-{str(date.today())}.json")
with open(local_json_path, "w") as j:
    j.write(json.dumps(api_response))

In [11]:
api_response["pagination"]["total"]

593

In [12]:
def get_flight_api(
    offset: int = 0,
    limit: int = 100,
    airline: str = "Malaysia Airlines",
    min_delay: int = 1,
    flight_api_url="http://api.aviationstack.com/v1/flights",
) -> dict:
    params = {
        "access_key": av_api_key,  # retrieved from .env, global scope
        "offset": offset,
        "limit": limit,
        "airline_name": airline,
        "min_delay_arr": min_delay,
    }
    result = requests.get(flight_api_url, params, timeout=5)
    return result.json()


def write_local_json(api_response: dict, offset: int = 0, limit: int = 100):
    local_json_path = Path(
        f"../data/responses/flight-{str(date.today())}-{offset}-{offset+limit}.json"
    )
    with open(local_json_path, "w") as j:
        j.write(json.dumps(api_response))
        print(f"saved to {local_json_path}")
    return local_json_path


def get_all_delays(
    limit: int = 100,
    airline: str = "Malaysia Airlines",
    min_delay: int = 1,
    flight_api_url="http://api.aviationstack.com/v1/flights",
):
    print(f"retrieving 0th to {limit}th records")
    first = get_flight_api()
    write_local_json(first)
    retrieved = limit
    total = first["pagination"]["total"]
    print(f"total: {total}")
    while retrieved < total:
        sleep(0.5)
        print(f"retrieving {retrieved}th to {retrieved + limit}th")
        response = get_flight_api(offset=retrieved, limit=limit)
        # save response
        json_path = write_local_json(response, offset=retrieved)
        retrieved += response["pagination"]["count"]
    return retrieved

## Insert response into sqlite

1. Flatten the json response 
1. create a table if it doesn't already exist.
    - primary keys:
      - `flight__iata` 
      - `departure__iata`
      - `departure__scheduled`
      - `arrival__iata`
      - `arrival__scheduled`
    - Create schema by comparing the keys of all the entries
1. Upsert entries into the database
    

In [15]:
from collections.abc import MutableMapping


def json_flatten(data: dict, parent_key="", sep="_"):
    """
    Normalizes json, if nested
    """
    items = []
    for key, val in data.items():
        new_key = parent_key + sep + key if parent_key else key
        if isinstance(val, MutableMapping):
            items.extend(json_flatten(val, parent_key=new_key, sep=sep).items())
        else:
            items.append((new_key, val))

    # creates {key: val} from (key, val) tuple
    return dict(items)


def issubstring(text: str, checklist, sep="__") -> bool:
    """
    Returns True for overlapped keys
    """
    for check in checklist:
        if text + sep in check:
            return True
    return False


def find_json_schema(entries: list[dict]) -> list:
    fields = set()
    for entry in entries:
        fields.update(entry.keys())

    fields_uniq = [field for field in fields if not issubstring(field, fields)]
    return fields_uniq

Find schema by comparing keys from all entries

In [32]:
json_paths = Path("../data/responses").glob("flight-2023-09-16-*.json")
entries = []
for json_file in json_paths:
    with open(json_file) as j:
        flight_page = json.loads(j.read())
        json_flat = [json_flatten(nested, sep="__") for nested in flight_page["data"]]
        entries.extend(json_flat)

schema = find_json_schema(entries)

In [33]:
print(f"num cols: {len(schema)}\n{schema}")

num cols: 51
['live__altitude', 'departure__scheduled', 'flight__number', 'aircraft__icao24', 'departure__terminal', 'live__is_ground', 'live__longitude', 'live__speed_vertical', 'departure__gate', 'arrival__estimated', 'arrival__delay', 'flight__codeshared__airline_iata', 'arrival__icao', 'flight_status', 'arrival__actual_runway', 'flight__codeshared__flight_icao', 'flight__codeshared__flight_number', 'departure__delay', 'arrival__airport', 'departure__estimated_runway', 'flight__codeshared__airline_icao', 'departure__estimated', 'live__speed_horizontal', 'arrival__estimated_runway', 'arrival__terminal', 'flight__codeshared__flight_iata', 'airline__icao', 'departure__airport', 'airline__name', 'arrival__gate', 'flight_date', 'departure__icao', 'departure__actual', 'flight__iata', 'live__latitude', 'flight__icao', 'live__direction', 'live__updated', 'arrival__timezone', 'departure__actual_runway', 'aircraft__icao', 'arrival__scheduled', 'arrival__baggage', 'departure__iata', 'departure