# MVP - JSON in sqlite

Instead of shoehorning JSONs into a traditional relational DB, store as-is, and leverage the JSON features to get our delay data

1. call aviationstack API
2. save json response to sqlite3 db
3. extract data from response
4. tweet data

In [1]:
import os
import requests
from urllib3.util import Retry
from requests import Session, HTTPError
from requests.adapters import HTTPAdapter
from requests.exceptions import ReadTimeout
import sqlite3
import json
from pathlib import Path
from dotenv import load_dotenv
from datetime import datetime, timezone
from time import sleep
import logging
from sys import stdout

In [2]:
logging.basicConfig(
    format="%(asctime)s [%(levelname)s] %(funcName)s: %(message)s",
    datefmt="%Y/%m/%d %H:%M:%S",
    handlers=[logging.StreamHandler(stdout)],
)
logger = logging.getLogger(__name__)

In [3]:
env_path = Path("../.env")
load_dotenv(env_path)
AV_API_KEY = os.getenv("AVIATION_API_KEY", "")
AV_API_URL = "http://api.aviationstack.com/v1/"
FLIGHT_API_URL = AV_API_URL + "flights"

In [15]:
def write_local_json(
    api_response: dict,
    json_dir: Path,
    str_date: str = str(datetime.now(tz=timezone.utc).date()),
    offset: int = 0,
    limit: int = 100,
):
    """
    Saves the flight api response as json, to be uploaded to a data lake
    json will be named according to the UTC date of when it was retrieved
    """
    if not json_dir.exists():
        json_dir.mkdir(parents=True)
    local_json_path = json_dir / f"flight-{str_date}-{offset}-{offset+limit}.json"
    logger.info(f"saving to {local_json_path}")
    with open(local_json_path, "w") as j:
        json.dump(api_response, j)
        logger.debug(f"saved to {local_json_path}")
    return local_json_path

In [20]:
def get_all_delays(
    json_dir: str,
    limit: int = 100,
    airline: str = "Malaysia Airlines",
    min_delay: int = 1,
    str_date: str = str(datetime.now(tz=timezone.utc).date()),
):
    sesh = Session()
    adapter = HTTPAdapter(
        max_retries=Retry(
            total=3,
            backoff_factor=0.1,
            status_forcelist=[500, 502, 503, 504],
            # allowed_methods={"POST"},
        )
    )
    sesh.mount(AV_API_URL, adapter)
    responses = []
    retrieved = total = 0
    while not total or retrieved < total:
        sleep(0.5)
        logger.info(f"retrieving {retrieved}th to {retrieved + limit}th")
        params = {
            "access_key": AV_API_KEY,  # retrieved from .env, global scope
            "offset": retrieved,
            "limit": limit,
            "airline_name": airline,
            "min_delay_arr": min_delay,
        }
        try:
            response = sesh.get(
                    url=FLIGHT_API_URL,
                    params=params,
                    timeout=30.0,
                )
            response.raise_for_status()
        except HTTPError as exc:
            logger.error(f"HTTP Error: \n{exc}")
            
        except ReadTimeout as e:
            logger.error(f"Timeout retrieving {retrieved}th to {retrieved + limit}th:\n{e}")
        # save response
        logger.debug(f"retrieved {retrieved}th to {retrieved + limit}th")
        responses.append(response.json())
        json_path = write_local_json(
            responses[-1], json_dir=json_dir, str_date=str_date, offset=retrieved
        )
        retrieved += responses[-1]["pagination"]["count"]
        if not total:
            total = responses[0]["pagination"]["total"]
            logger.info(f"Total records count: {total}")
            if total == 0:
                # prevent infinite loop
                logger.error("Zero records retrieved; exiting")
                break
                #raise ValueError("Zero records retrieved; exiting")
    return responses

## 1. Fetch the responses

In [21]:
responses = get_all_delays(Path("../data/responses/"))

2023/10/15 23:21:11 [ERROR] get_all_delays: Zero records retrieved; exiting


In [None]:
def create_json_table(
    db_conn: sqlite3.Connection,
    tbl_name: str = "import_flight_records",
):
    """
    Create sqlite db and table if not already exists
    """
    create_sql = f"""
    CREATE TABLE {tblname_json}(
        {colname_json} JSON,
        flight_num TEXT GENERATED ALWAYS AS (JSON_EXTRACT({colname_json}, '$.flight.iata')) VIRTUAL,
        start TEXT GENERATED ALWAYS AS (JSON_EXTRACT({colname_json}, '$.departure.iata')) VIRTUAL,
        dest TEXT GENERATED ALWAYS AS (JSON_EXTRACT({colname_json}, '$.arrival.iata')) VIRTUAL,
        ts_takeoff TEXT GENERATED ALWAYS AS (JSON_EXTRACT({colname_json}, '$.arrival.iata')) VIRTUAL
    );
    """