# Data Management: Barclays Center Events Dataset

## This notebook:
1. Parse event info of 2016 from Barclays Center's Facebook page
2. Returns a csv with events ('events_2016.csv')
3. Manage time variables and subset events beginning after 17:00
4. Returns a csv with subset events ('events_2016_subset.csv')

## Parse events data

In [None]:
import os
import requests
import facebook

import numpy as np
import pandas as pd
import matplotlib.pylab as plt

In [None]:
# define a function that parse event info from Facebook
# sample code retrieved from https://gist.github.com/mylsb/10294040
def some_action(event,results):
    """ Here you might want to do something with each post. E.g. grab the
    post's message (post['message']) or the post's picture (post['picture']).
    In this implementation we just print the post's created time.
    """
    #print(event['start_time'])
    date = event['start_time'].split('-')
    if date[0] == '2016':
        results.append([event['id'], event['name'], event['start_time']])
        #print(event['id'])

In [None]:
# set up parameters for parsing
results = []

# API token: https://developers.facebook.com/tools/explorer/
access_token = os.getenv('$FBAPIKEY')

# Barclays Center Facebook id
user = 'barclayscenter'

graph = facebook.GraphAPI(access_token)
profile = graph.get_object(user)
events = graph.get_connections(profile['id'], 'events')

In [None]:
# run code
while True:
    try:
        # Perform some action on each post in the collection we receive from
        # Facebook.
        [some_action(event, results) for event in events['data']]
        # Attempt to make a request to the next page of data, if it exists.
        events = requests.get(events['paging']['next']).json()
    except KeyError:
        # When there are no more pages (['paging']['next']), break from the
        # loop and end the script.
        break

In [None]:
# examine and clean up dataframe
df = pd.DataFrame(results)
df.columns = ['EventID','Name','Start_Time']
df.head()

In [None]:
len(df)

In [None]:
df.to_csv('events_2016.csv')

## Manually assign event types outside of notebook
events_2016.csv -> events_2016_modified.csv
## Add event end time variable

In [17]:
event = pd.read_csv('events_2016_modified.csv')
event.drop('Unnamed: 0', axis=1, inplace=True)
event.head()

Unnamed: 0,EventID,Event_type,Name,Start_Time,Start_Hour,End_Time
0,1520000000000000.0,basketball,Harlem Globetrotters,2016-01-02T14:00:00-0500,,2016-01-02T16:00:00-0500
1,1490000000000000.0,basketball,Harlem Globetrotters,2016-01-02T19:00:00-0500,,2016-01-02T21:00:00-0500
2,201000000000000.0,basketball,Brooklyn Nets vs. Boston Celtics,2016-01-04T19:30:00-0500,,2016-01-04T21:30:00-0500
3,1080000000000000.0,basketball,Brooklyn Nets vs. Toronto Raptors,2016-01-06T19:30:00-0500,,2016-01-06T21:30:00-0500
4,1390000000000000.0,basketball,Brooklyn Nets vs. Orlando Magic,2016-01-08T19:30:00-0500,,2016-01-08T21:30:00-0500


In [13]:
# remove unreasonable timezone element
event['Start_Time'] = [str(i)[:-5] for i in event['Start_Time']]
event['End_Time'] = [str(i)[:-5] for i in event['End_Time']]

# extract hour element from time
event['Start_Time'] = pd.to_datetime(event['Start_Time'], format='%Y-%m-%dT%H:%M:%S')
event['Start_Hour'] = event['Start_Time'].dt.hour

event['End_Time'] = pd.to_datetime(event['End_Time'], format='%Y-%m-%dT%H:%M:%S')
event['End_Hour'] = event['End_Time'].dt.hour
event.head()

Unnamed: 0,EventID,Event_type,Name,Start_Time,Start_Hour,End_Time,End_Hour
0,1520000000000000.0,basketball,Harlem Globetrotters,2016-01-02 14:00:00,14,2016-01-02 16:00:00,16
1,1490000000000000.0,basketball,Harlem Globetrotters,2016-01-02 19:00:00,19,2016-01-02 21:00:00,21
2,201000000000000.0,basketball,Brooklyn Nets vs. Boston Celtics,2016-01-04 19:30:00,19,2016-01-04 21:30:00,21
3,1080000000000000.0,basketball,Brooklyn Nets vs. Toronto Raptors,2016-01-06 19:30:00,19,2016-01-06 21:30:00,21
4,1390000000000000.0,basketball,Brooklyn Nets vs. Orlando Magic,2016-01-08 19:30:00,19,2016-01-08 21:30:00,21


In [14]:
# subset events that began after 17:00
event_evening = event[event['Start_Hour'] >= 17.0]
event_evening.head()

Unnamed: 0,EventID,Event_type,Name,Start_Time,Start_Hour,End_Time,End_Hour
1,1490000000000000.0,basketball,Harlem Globetrotters,2016-01-02 19:00:00,19,2016-01-02 21:00:00,21
2,201000000000000.0,basketball,Brooklyn Nets vs. Boston Celtics,2016-01-04 19:30:00,19,2016-01-04 21:30:00,21
3,1080000000000000.0,basketball,Brooklyn Nets vs. Toronto Raptors,2016-01-06 19:30:00,19,2016-01-06 21:30:00,21
4,1390000000000000.0,basketball,Brooklyn Nets vs. Orlando Magic,2016-01-08 19:30:00,19,2016-01-08 21:30:00,21
5,1230000000000000.0,basketball,Brooklyn Nets vs. San Antonio Spurs,2016-01-11 19:30:00,19,2016-01-11 21:30:00,21


In [15]:
# create a dummy indicator for weekday
event_evening['weekday'] = [i not in [5,6] for i in event_evening['Start_Time'].dt.weekday.values]
event_evening

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,EventID,Event_type,Name,Start_Time,Start_Hour,End_Time,End_Hour,weekday
1,1.490000e+15,basketball,Harlem Globetrotters,2016-01-02 19:00:00,19,2016-01-02 21:00:00,21,False
2,2.010000e+14,basketball,Brooklyn Nets vs. Boston Celtics,2016-01-04 19:30:00,19,2016-01-04 21:30:00,21,True
3,1.080000e+15,basketball,Brooklyn Nets vs. Toronto Raptors,2016-01-06 19:30:00,19,2016-01-06 21:30:00,21,True
4,1.390000e+15,basketball,Brooklyn Nets vs. Orlando Magic,2016-01-08 19:30:00,19,2016-01-08 21:30:00,21,True
5,1.230000e+15,basketball,Brooklyn Nets vs. San Antonio Spurs,2016-01-11 19:30:00,19,2016-01-11 21:30:00,21,True
6,9.200000e+14,basketball,Brooklyn Nets vs. New York Knicks,2016-01-13 19:30:00,19,2016-01-13 21:30:00,21,True
7,1.110000e+14,basketball,Brooklyn Nets vs. Portland Trail Blazers,2016-01-15 19:30:00,19,2016-01-15 21:30:00,21,True
8,1.720000e+15,boxing,Heavyweight Championship: Deontay Wilder v. Ar...,2016-01-16 18:00:00,18,2016-01-16 19:00:00,19,False
11,1.650000e+15,basketball,Brooklyn Nets vs. Cleveland Cavaliers,2016-01-20 19:30:00,19,2016-01-20 21:30:00,21,True
12,1.660000e+15,concert,Kygo,2016-01-21 20:00:00,20,2016-01-21 23:00:00,23,True


In [16]:
event_evening.to_csv('events_2016_subset.csv')