In [1]:
import boto3
import json
import pandas as pd
import numpy as np
import time

import matplotlib.pyplot as plt
plt.style.use('seaborn')

  plt.style.use('seaborn')


In [2]:
preprocessing = [
    {
        "update_col": "price", 
        "from_col": "price",
        "fn": lambda x: int(x.split(" ")[0].replace("$", "").replace(",", "")),
    },
    {
        "update_col": "score", 
        "from_col": "score",
        "fn": lambda x: float(x),
    },
    {
        "update_col": "num_tickets", 
        "from_col": "num_tickets",
        "fn": lambda x: x.split(" ")[2],
    },
    {
        "update_col": "section", 
        "from_col": "section",
        "fn": lambda x: x.replace("Section ", "").replace("Row ", "") + ", NA",
    },
    {
        "update_col": "row", 
        "from_col": "section",
        "fn": lambda x: x.split(", ")[1],
    },
    {
        "update_col": "section", 
        "from_col": "section",
        "fn": lambda x: x.split(", ")[0],
    },
]

bucket_name = "seatgeek-tickets"
table = "2023-01-16-09-26-43-941432.json"
s3 = boto3.resource('s3')

In [3]:
# get all data from s3
s3_objs = s3.Bucket(bucket_name).objects.all()
obj_keys = [obj.key for obj in s3_objs]

In [5]:
data = {}
for key in obj_keys:
    obj = s3.Object(bucket_name, key)
    json_obj = json.loads(obj.get()['Body'].read())
    key_stem = key.replace(".json", "")
    data[key_stem] = json_obj

In [18]:
# concatenate data
dfs = []
for datetime, dataset in data.items():
    if isinstance(dataset, list):
        for tickets in dataset:
            if not isinstance(tickets['data'], str):
                df = pd.DataFrame(tickets['data'])
                df['title'] = tickets['title']
                df['game_ts'] = tickets['datetime_utc']
                df['ingestion_ts'] = datetime
                dfs.append(df)
dfs = pd.concat(dfs)

In [19]:

# data preprocessing
for p in preprocessing:
    dfs[p['update_col']] = dfs[p['from_col']].apply(p['fn'])

In [21]:
dfs.head(10)

Unnamed: 0,price,score,deal,num_tickets,section,title,game_ts,aws_ts,row
0,39,9.9,Amazing Deal,1-4,108,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,13
1,38,9.9,Amazing Deal,2,108,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,15
2,52,9.8,Amazing Deal,1,108,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,7
3,37,9.8,Amazing Deal,1-4,108,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,18
4,30,9.8,Amazing Deal,2,120,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,24
5,19,9.8,Amazing Deal,2,204 A,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,8
6,18,9.8,Amazing Deal,1-3,210,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,9
7,51,9.7,Amazing Deal,2,108,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,9
8,34,9.7,Amazing Deal,2,107,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,23
9,20,9.7,Amazing Deal,1,206,Orlando Magic at Philadelphia 76ers,2023-01-31T00:00:00,2023-01-28-22-29-35-160253,2
