In [None]:
import pandas as pd
import json
import datetime
import requests
import numpy as np

### List of cities and Ticketmaster codes
City_List = 
    'Denver' : 264,
    'San Francisco Bay': 382,
    'Portland': 362,
    'Los Angeles': 27,
    'Las Vegas' : 14,
    'Phoenix': 36, 
    'Seattle': 42,
    'Austin' : 40, 
    'Houston': 22,
    'Dallas' : 5,
    'Chicago': 3,
    'Nashville': 31,
    'Atlanta': 10,
    'Boston' : 11,
    'New York': 35,
    'Washington DC': 47,
    'Miami' : 15
### Ticketmaster API info    
base_url = 'https://app.ticketmaster.com/discovery/v2/events.json?countryCode=US&apikey={apikey}'
api_key = 'OhKdHqBZOOuGCrWIcjlhzoxmnjUoaGWL'
dmaId = [382,362,264]
marketId =  [42,27,14,36,40,22,5,3,31,10,11,35,47,15]



### Define Functions

Define functions for pulling event data from Ticketmaster API and formatting dataframe

In [None]:
#DMA and Market numbers
dma_list = [382,362,264]
mkt_list =  [42,27,14,36,40,22,5,3,31,10,11,35,47,15]

# Function to get the number of pages of ticketmaster data
def get_number_of_TM_pages(dma=None,market=None,source='ticketmaster,frontgate'):
    url = 'https://app.ticketmaster.com/discovery/v2/events.json?countryCode=US'
    payload = {'dmaId': dma, 
               'marketId': market, 
               'source': source,
               'classificationName': 'music',
               'size': '200',
               'apikey':'OhKdHqBZOOuGCrWIcjlhzoxmnjUoaGWL'}
    r = requests.get(url,params=payload,verify=True)
    json_obj = json.loads(r.text)
    return json_obj['page']['totalPages']

#Get TicketMaster data, return a dataframe
def getTicketMasterData(dma=None,market=None,page=None, source='ticketmaster,frontgate'):
    url = 'https://app.ticketmaster.com/discovery/v2/events.json?countryCode=US'
    payload = {'dmaId': dma, 
               'marketId': market, 
               'source': source,
               'classificationName' : 'music',
               'sort': 'date,name,asc',
               'size': '200',
               'page': page,
               'apikey':'OhKdHqBZOOuGCrWIcjlhzoxmnjUoaGWL'}
    r = requests.get(url,params=payload,verify=True)
    json_response = json.loads(r.text)
    event_info = []
    for event in json_response.get('_embedded',{}).get('events',{}):
         event_info.append({
            'TM_id': event.get('id',{}),
            'TM_name' : event.get('name',{}),
            'TM_artist': list(attraction.get('name') for attraction in event['_embedded'].get('attractions',{})),
            'TM_venue' : list(venue.get('name') for venue in event['_embedded'].get('venues',{}))[0],
            'TM_venue_city' : list(venue.get('city',{}).get('name') for venue in event['_embedded'].get('venues',{}))[0],
            'TM_venue_state' : list(venue.get('state',{}).get('stateCode') for venue in event['_embedded'].get('venues',{}))[0],
            'TM_description' : event.get('description',{}),
            'TM_more_info' : event.get('additionalInfo',{}),
            'TM_start_date' : event.get('dates',{}).get('start',{}).get('dateTime',{}),
            'TM_timezone' : event.get('dates',{}).get('timezone'),
            'TM_span_multiple_days' : event.get('dates',{}).get('spanMultipleDays'),
            'TM_presale_date_start' : list(presale.get('startDateTime',{}) for presale in event.get('sales').get('presales',{})),
            'TM_presale_date_end' : list(presale.get('endDateTime',{}) for presale in event.get('sales').get('presales',{})),
            'TM_sale_date_start' : event.get('sales',{}).get('public',{}).get('startDateTime'),
            'TM_FV_prices': event.get('priceRanges'),
            'TM_promoter': event.get('promoter',{}).get('name'),
            'TM_genre' : event.get('classifications'),
            'TM_place' : event.get('place')
         })
    tmDF = pd.DataFrame(event_info)
    return tmDF

#Convert timedate information from UTC to local time
def convert_times(df,times_list,tz_col):
    #Loop through each timezone
    df_list = []
    for tz in df[tz_col].unique():
        #Filter rows by timezone
        mask = (df[tz_col] == tz)
        df_local = df.loc[mask]
        #Loop through each datetime row
        for col in times_list:
            #Convert each column to datetime series, localize to UTC and then convert to proper timezone
            df_local[col] = pd.to_datetime(df_local[col],errors='coerce').dt.tz_localize('UTC').dt.tz_convert(tz)
            #Convert each column to datetime series, localize to UTC and then convert to proper timezone
        df_list.append(df_local)
    df = pd.concat(df_list, axis=0)
    return df

#Explode out columns with nested information
def explode(df,col,index):
    df1 = df[col].apply(pd.Series)
    df1.index = df[index]
    df1 = df1[0].apply(pd.Series)
    df = df.join(df1, on=index)
    df = df.drop(col,axis=1)
    return df

## Query Ticketmaster face value data
### Pull the data and create the data frame
For each city/market, get the number of pages of data, pull all data, and then concatanate the dataframes

In [None]:
fv_df_list = []

#Create dictionaries of dma/mkt numbers, and number of pages of data
dma_dict = dict()
mkt_dict = dict()
#Get number of pages for face value dma items
for dma in dma_list:
    dma_dict[dma] = get_number_of_TM_pages(dma=dma)
    
#Get DMA data
for dma in dma_dict.keys():
    for page in range(1,dma_dict[dma]+1):
        fv_df_list.append(getTicketMasterData(dma=dma,page=page))
    
#Get number of pages for face value market items
for mkt in mkt_list:
    mkt_dict[mkt] = get_number_of_TM_pages(market=mkt)
    
#Get market data
for mkt in mkt_dict.keys():
    for page in range(1,mkt_dict[mkt]+1):
        fv_df_list.append(getTicketMasterData(market=mkt,page=page))
        
#Combine all dataframes for full data on events and face_value prices
fv_df = pd.concat(fv_df_list,axis=0)

fv_df.head()

### Convert datetime columns to datetime objects in  proper timezones
Currently the four columns with datetime information are a mess. Some are of type dict, some are nested in lists, and all are in the UTC timezone. We can look and see that in timedate columns with lists of dates, the timedates are identitical, so we can unnest datetimes by simply taking the first element of the list.

The 3 things we need to accomplish:
    1. Convert all datetime columns to dtype string, and unnest 'TM_presale_date_end' and 'TM_presale_date_start' columns
    2. Convert all columns to datetime series localized to UTC (Done in the convert_times function)
    3. Filter rows by timezone, and loop over each datetime column to convert objects to their proper timezone (Done in the convert_times function)

In [None]:
# 1. Convert all datetime columns to dtype string, and unnest objects
fv_df['TM_presale_date_end'] = fv_df['TM_presale_date_end'].str[0]
fv_df['TM_presale_date_start'] = fv_df['TM_presale_date_start'].str[0]
fv_df['TM_start_date'] = fv_df['TM_start_date'].astype(str)
fv_df['TM_sale_date_start'] = fv_df['TM_sale_date_start'].astype(str)


In [None]:
# 2. Convert all columns to datetime series localized to UTC
# 3. Filter rows by timezone and create a new dataframe per timezone, loop over each datetime column to convert objects to their proper timezone 
col_list = ['TM_presale_date_end','TM_presale_date_start','TM_start_date','TM_sale_date_start']
fv_df = convert_times(df=fv_df,times_list=col_list,tz_col='TM_timezone')

### Explode nested rows
Unnest information in prices and genre column

In [None]:
#Explode price dictionaries
fv_df = explode(fv_df,'TM_FV_prices','TM_id')

In [None]:
#Unnest genre info and clean up data in exploded columns
fv_df = explode(fv_df,'TM_genre','TM_id')
for col in ['segment','subGenre','subType','type']:
    fv_df.col = fv_df.col.map(lambda x: x.get('name',{}))
fv_df.head()

## Query TicketMaster resale data and merge with face value data
Next we will query TicketMaster's resale ticket data and then merge it with the face value dataframe

In [None]:
#Query resale data from the Ticketmaster API

rv_df_list = []

#Create dictionaries of dma/mkt numbers, and number of pages of data
dma_rv_dict = dict()
mkt_rv_dict = dict()
#Get number of pages for resale value dma items
for dma in dma_list:
    dma_rv_dict[dma] = get_number_of_TM_pages(dma=dma,source='tmr')
    
#Get DMA data
for dma in dma_rv_dict.keys():
    for page in range(1,dma_dict[dma]+1):
        fv_df_list.append(getTicketMasterData(dma=dma,page=page,source='tmr'))
    
#Get number of pages for resale value market items
for mkt in mkt_list:
    mkt_rv_dict[mkt] = get_number_of_TM_pages(market=mkt,source='tmr')
    
#Get market data
for mkt in mkt_rv_dict.keys():
    for page in range(1,mkt_dict[mkt]+1):
        fv_df_list.append(getTicketMasterData(market=mkt,page=page,source='tmr'))
        
#Combine all dataframes for full data on events and resale value prices
rv_df = pd.concat(fv_df_list,axis=0)
rv_df.head()

### Extract price information and merge it with face value data in a new dataframe

In [None]:
# Explode out reslaeprice information
rv_df = explode(rv_df,'TM_FV_prices','TM_id')
rv_df.rename(columns={'min': 'tmr_min', 'max': 'tmr_max'}, inplace=True)
# Create a new dataframe combining face value and resale value information, joined on ticketmaster event ID
joined_df = fv_df.join(rv_df[['tmr_min','tmr_max']],on='TM_id',how='outer')
joined_df.head()