# sqlite3

## Basics - creating and writing to db

`sqlite3.connect` implicitly creates a .db file if not existent

In [4]:
import sqlite3

con = sqlite3.connect("../tests/data/tutorial.db")

from the `con` object we create a cursor with which to execute our statements

In [5]:
cur = con.cursor()

In [8]:
cols = [
    "flight_date",
    "flight_status",
    "departure",
    "arrival",
    "airline",
    "flight",
    "aircraft",
    "live",
]
cols = ", ".join(cols)
cols
create_table = f"CREATE TABLE flights({cols})"
cur.execute(create_table)

<sqlite3.Cursor at 0x7fd030284840>

In [9]:
res = cur.execute("SELECT name FROM sqlite_master")
res.fetchone()

('flights',)

When inserting `VALUES`, data must be in a list, with the number of items corresponding exactly to number of columns.

Alternatively we can supply a `dict`, with the keys mapped to column names

In [17]:
data = [
    (
        "2000-01-01",
        "active",
        "{'airport': 'KLIA'}",
        "{'airport': 'Seeb'}",
        "Malaysian Airlines",
        None,
        None,
        None,
    ),
    (
        "2023-08-23",
        "scheduled",
        "{'airport': 'KIX'}",
        "{'airport': 'HND'}",
        "Malaysian AIrlines",
        None,
        None,
        None,
    ),
]
placeholders = ", ".join(len(cols.split(",")) * "?")
cur.executemany(f"INSERT INTO flights VALUES({placeholders})", data)
con.commit()

In [19]:
for row in cur.execute(
    "SELECT flight_date, departure FROM flights order by flight_date"
):
    print(row)

('2000-01-01', "{'airport': 'KLIA'}")
('2023-08-23', "{'airport': 'KIX'}")


In [42]:
cur.execute("DROP TABLE flights_json")

<sqlite3.Cursor at 0x7fd030284840>

In [43]:
cur.execute("CREATE TABLE flights_json(flights_response json)")

<sqlite3.Cursor at 0x7fd030284840>

In [44]:
res = cur.execute("SELECT name FROM sqlite_master")
res.fetchall()

[('flights',), ('flights_json',)]

## Import json into sqlite3

Using the sample response json to insert into our sample db. Sqlite3 does have a built-in json function, but it still treats it as a simple string, with unnecessary whitespaces removed to preserve storage

In [24]:
import json

In [25]:
with open("../tests/data/sample_flight_response.json", "r") as j:
    response = json.loads(j.read())

print(response.keys())

dict_keys(['pagination', 'data'])


In [48]:
# dumping into a json str, and putting it into a list
cur.execute(
    "INSERT INTO flights_json VALUES( ? )",
    [json.dumps(response)],
)
con.commit()

In [49]:
# convert dict to str, then put into a tuple
# result is a list of tuple, where each tuple is one json str
flights = [(json.dumps(flight),) for flight in response["data"]]
cur.executemany("INSERT INTO flights_json VALUES( ? )", flights)
con.commit()

In [50]:
res = cur.execute("SELECT * FROM flights_json")
read_flights = res.fetchall()[1:]
read_flights[0]

('{"flight_date": "2023-08-29", "flight_status": "scheduled", "departure": {"airport": "Doha International", "timezone": "Asia/Qatar", "iata": "DOH", "icao": "OTHH", "terminal": null, "gate": "C37", "delay": 25, "scheduled": "2023-08-29T02:05:00+00:00", "estimated": "2023-08-29T02:05:00+00:00", "actual": "2023-08-29T03:10:00+00:00", "estimated_runway": "2023-08-29T03:10:00+00:00", "actual_runway": "2023-08-29T03:10:00+00:00"}, "arrival": {"airport": "Cape Town International", "timezone": "Africa/Johannesburg", "iata": "CPT", "icao": "FACT", "terminal": "B", "gate": "A5", "baggage": "1.4", "delay": 31, "scheduled": "2023-08-29T10:50:00+00:00", "estimated": "2023-08-29T10:50:00+00:00", "actual": null, "estimated_runway": null, "actual_runway": null}, "airline": {"name": "Malaysia Airlines", "iata": "MH", "icao": "MAS"}, "flight": {"number": "9305", "iata": "MH9305", "icao": "MAS9305", "codeshared": {"airline_name": "qatar airways", "airline_iata": "qr", "airline_icao": "qtr", "flight_num

first `[0]` retrieves first row;
each row is a list of values from all columns. 
we pick `[0]` again to retrieve the first (and only) column which contains our json

In [52]:
read_flights_dict = json.loads(read_flights[0][0])
read_flights_dict

{'flight_date': '2023-08-29',
 'flight_status': 'scheduled',
 'departure': {'airport': 'Doha International',
  'timezone': 'Asia/Qatar',
  'iata': 'DOH',
  'icao': 'OTHH',
  'terminal': None,
  'gate': 'C37',
  'delay': 25,
  'scheduled': '2023-08-29T02:05:00+00:00',
  'estimated': '2023-08-29T02:05:00+00:00',
  'actual': '2023-08-29T03:10:00+00:00',
  'estimated_runway': '2023-08-29T03:10:00+00:00',
  'actual_runway': '2023-08-29T03:10:00+00:00'},
 'arrival': {'airport': 'Cape Town International',
  'timezone': 'Africa/Johannesburg',
  'iata': 'CPT',
  'icao': 'FACT',
  'terminal': 'B',
  'gate': 'A5',
  'baggage': '1.4',
  'delay': 31,
  'scheduled': '2023-08-29T10:50:00+00:00',
  'estimated': '2023-08-29T10:50:00+00:00',
  'actual': None,
  'estimated_runway': None,
  'actual_runway': None},
 'airline': {'name': 'Malaysia Airlines', 'iata': 'MH', 'icao': 'MAS'},
 'flight': {'number': '9305',
  'iata': 'MH9305',
  'icao': 'MAS9305',
  'codeshared': {'airline_name': 'qatar airways',
 

In [53]:
read_flights_dict["departure"]

{'airport': 'Doha International',
 'timezone': 'Asia/Qatar',
 'iata': 'DOH',
 'icao': 'OTHH',
 'terminal': None,
 'gate': 'C37',
 'delay': 25,
 'scheduled': '2023-08-29T02:05:00+00:00',
 'estimated': '2023-08-29T02:05:00+00:00',
 'actual': '2023-08-29T03:10:00+00:00',
 'estimated_runway': '2023-08-29T03:10:00+00:00',
 'actual_runway': '2023-08-29T03:10:00+00:00'}

In [54]:
cur.close()
con.close()