In [1]:
import boto3
import json
import pandas as pd
import numpy as np
import time

import matplotlib.pyplot as plt
plt.style.use('seaborn')

  plt.style.use('seaborn')


In [2]:
preprocessing = [
    {
        "update_col": "price", 
        "from_col": "price",
        "fn": lambda x: int(x.split(" ")[0].replace("$", "").replace(",", "")),
    },
    {
        "update_col": "score", 
        "from_col": "score",
        "fn": lambda x: float(x),
    },
    {
        "update_col": "num_tickets", 
        "from_col": "num_tickets",
        "fn": lambda x: x.split(" ")[2],
    },
    {
        "update_col": "section", 
        "from_col": "section",
        "fn": lambda x: x.replace("Section ", "").replace("Row ", "") + ", NA",
    },
    {
        "update_col": "row", 
        "from_col": "section",
        "fn": lambda x: x.split(", ")[1],
    },
    {
        "update_col": "section", 
        "from_col": "section",
        "fn": lambda x: x.split(", ")[0],
    },
]

bucket_name = "seatgeek-tickets"
table = "2023-01-16-09-26-43-941432.json"
s3 = boto3.resource('s3')

In [3]:
# get all data from s3
s3_objs = s3.Bucket(bucket_name).objects.all()
obj_keys = [obj.key for obj in s3_objs][1:]

# load data
data = {}
for key in obj_keys:
    obj = s3.Object(bucket_name, key)
    json_obj = json.loads(obj.get()['Body'].read())
    key_stem = key.replace(".json", "")
    data[key_stem] = json_obj

# concatenate data
dfs = []
for datetime, dataset in data.items():
    for game, table in dataset.items():
        df = pd.DataFrame(table)
        df['game'] = game
        df['datetime'] = datetime
        dfs.append(df)
dfs = pd.concat(dfs)

# data preprocessing
for p in preprocessing:
    dfs[p['update_col']] = dfs[p['from_col']].apply(p['fn'])

In [4]:
section_row = dfs['section'] + dfs['row']

count = {}
for comb in section_row:
    if comb in count:
        count[comb] += 1
    else:
        count[comb] = 1

maxv = 0
maxk = ""
for k, v in count.items():
    if v > maxv:
        maxv = v
        maxk = k
print(maxv, maxk)

77 10113


In [5]:

for section in dfs['section'].unique():
    for row in dfs['row'].unique():
        for game, game_group in dfs[(dfs['section'] == section) * (dfs['row'] == '15')].groupby('game'):
            for date, date_group in game_group.groupby('datetime')['price']:
                if len(date_group) > 1:
                    pass
                    # print(section, row, game, date, date_group.mean())

In [6]:
dfs.datetime.unique()

array(['2023-01-15-16-53-33-938440', '2023-01-16-16-40-57-005388',
       '2023-01-17-19-13-29-247989', '2023-01-18-17-55-57-133107',
       '2023-01-20-08-57-31-228391', '2023-01-21-13-17-38-634514',
       '2023-01-22-12-36-25-160723'], dtype=object)

In [10]:
deals = dfs[dfs['datetime'] ==  '2023-01-22-12-36-25-160723'].sort_values(by='score', ascending=False)
deals[(deals['price'] > 100) * (deals['price'] < 225)].head(50)

Unnamed: 0,price,score,deal,num_tickets,section,game,datetime,row
1,103,10.0,Amazing Deal,1,107,New York Knicks,2023-01-22-12-36-25-160723,13
0,209,10.0,Amazing Deal,2,Luxury Suite 47,New York Knicks,2023-01-22-12-36-25-160723,
1,205,9.8,Amazing Deal,1-3,101,Miami Heat(1),2023-01-22-12-36-25-160723,19
4,146,9.8,Amazing Deal,5,111,Miami Heat(1),2023-01-22-12-36-25-160723,12
2,135,9.8,Amazing Deal,1-3,Luxury Suite 73,Miami Heat,2023-01-22-12-36-25-160723,
1,189,9.8,Amazing Deal,1-3,Luxury Suite 47,Miami Heat,2023-01-22-12-36-25-160723,
0,189,9.8,Amazing Deal,12,Luxury Suite 47,Miami Heat,2023-01-22-12-36-25-160723,
3,202,9.7,Amazing Deal,2,103,New York Knicks,2023-01-22-12-36-25-160723,14
10,191,9.7,Amazing Deal,6,111,Miami Heat(1),2023-01-22-12-36-25-160723,2
8,191,9.7,Amazing Deal,5,111,Miami Heat(1),2023-01-22-12-36-25-160723,1
