In [44]:
from pymongo import MongoClient
import pandas as pd
import requests
from bs4 import BeautifulSoup
from pprint import pprint

In [45]:
client = MongoClient()
db = client['meetup']

In [46]:
API_KEY = 'xxx'
BASE_URL = 'https://api.meetup.com'

In [47]:
# get a listing of all meetup events hosted by DE-ProD-SF

def get_events(urlname):
    
    events_url = BASE_URL + '/' + urlname + '/events'
    params = {'sign':'true','key': API_KEY, 'status': 'past'}
    response = requests.get(events_url, params = params)
    events = response.json()
    
    return events

urlname = 'DE-ProD-SF'
get_events(urlname)

[{u'created': 1455863910000,
  u'description': u"<p>Exercise your critical thinking with a data eng challenge problem! Hear how others would solve the problem, what trade-offs they consider, and voice your opinion / ask questions in a non-threatening atmosphere. Like a journal club, but minus reading boring papers, and plus an exciting problem :-)</p> <p>I'll send out the challenge problem by email to the RSVP'd group 2 days before the event.</p> <p>By the way, employers rank whiteboarding &amp; critical thinking skills as one of their top priorities in hiring data engineers. Practice makes perfect!</p> <p>See you soon,<br/>Aaron</p> ",
  u'group': {u'created': 1441761756000,
   u'id': 18913207,
   u'join_mode': u'open',
   u'lat': 37.779998779296875,
   u'lon': -122.41999816894531,
   u'name': u'Data Engineering Professional Development San Francisco',
   u'urlname': u'DE-ProD-SF',
   u'who': u'Professional Developers'},
  u'id': u'228968660',
  u'link': u'http://www.meetup.com/DE-Pro

In [48]:
#query all event ids and descriptions

def query_events():
    
    event_id = []
    event_desc_raw= []
    event_time = []
    for event in get_events(urlname):
        event_id.append(event['id'])
        event_desc_raw.append(event['description'])
        event_time.append(event['time'])
 
    return event_id, event_desc_raw, event_time

# events_ids = [''.join(event['id']) for event in events]
# print events_ids

# events_descriptions = [event['description'] for event in events]
# print events_descriptions

event_id, event_desc_raw, event_time = query_events()

event_description = []
for i in xrange(len(event_id)):
    soup = BeautifulSoup(event_desc_raw[i], 'html.parser')
    event_description.append(soup.get_text())

In [50]:
df_events= pd.DataFrame([event_id, event_description, event_time]).T
df_events.columns = ['event_id', 'event_description', 'event_time']
df_events

Unnamed: 0,event_id,event_description,event_time
0,228968660,Exercise your critical thinking with a data en...,1456455600000
1,228648803,"*Note, expedite your check in at Galvanize; re...",1459301400000


In [51]:
#get all rsvps for each event

member_id = []
event_id = []

for i in query_events()[0]:
    rsvps_url = BASE_URL + '/' + urlname +'/events/' + i + '/rsvps'
    params = {'sign':'true','key': API_KEY}
    response = requests.get(rsvps_url, params = params)
    rsvps = response.json()
    
    for rsvp in rsvps:
        member_id.append(rsvp['member']['id'])
        event_id.append(rsvp['event']['id'])
               
df_rsvps = pd.DataFrame([member_id, event_id]).T
df_rsvps.columns = ['member_id', 'event_id']
df_rsvps['rsvp'] = 1
df_rsvps

Unnamed: 0,member_id,event_id,rsvp
0,87429312,228968660,1
1,149624662,228968660,1
2,129647262,228968660,1
3,132426572,228968660,1
4,67449572,228968660,1
5,111657512,228968660,1
6,198433955,228968660,1
7,11847383,228968660,1
8,10048695,228968660,1
9,14704591,228968660,1


In [52]:
# get all members from a group 
members_url = BASE_URL + '/2/members' 
params = {'sign':'true','key': API_KEY, 'group_id': '18913207'}
response = requests.get(members_url, params = params)
members_all = response.json()
# pprint (members_all)

In [53]:
members = members_all['results']
member_id = [member['id'] for member in members]
join_time = [member['joined'] for member in members]

df_members = pd.DataFrame([member_id,join_time]).T
df_members.columns = ['member_id', 'join_time']
df_members

Unnamed: 0,member_id,join_time
0,194699547,1459391446000
1,87429312,1441761756000
2,81401992,1459377236000
3,185745656,1456036929000
4,11885543,1459228941000
5,55930862,1445727497000
6,195886534,1459373753000
7,192607432,1442034315000
8,183554416,1459380534000
9,183993413,1450205662000


In [54]:
df_members['key'] = 2
df_events['key'] = 2
df_1 = pd.merge(df_members, df_events, on = 'key')
df_1.drop('key', axis = 1, inplace = True)
df_1

Unnamed: 0,member_id,join_time,event_id,event_description,event_time
0,194699547,1459391446000,228968660,Exercise your critical thinking with a data en...,1456455600000
1,194699547,1459391446000,228648803,"*Note, expedite your check in at Galvanize; re...",1459301400000
2,87429312,1441761756000,228968660,Exercise your critical thinking with a data en...,1456455600000
3,87429312,1441761756000,228648803,"*Note, expedite your check in at Galvanize; re...",1459301400000
4,81401992,1459377236000,228968660,Exercise your critical thinking with a data en...,1456455600000
5,81401992,1459377236000,228648803,"*Note, expedite your check in at Galvanize; re...",1459301400000
6,185745656,1456036929000,228968660,Exercise your critical thinking with a data en...,1456455600000
7,185745656,1456036929000,228648803,"*Note, expedite your check in at Galvanize; re...",1459301400000
8,11885543,1459228941000,228968660,Exercise your critical thinking with a data en...,1456455600000
9,11885543,1459228941000,228648803,"*Note, expedite your check in at Galvanize; re...",1459301400000


In [56]:
df_2 = df_1[(df_1['join_time'] < df_1['event_time'])]
df_2
df_2.drop(['join_time','event_time'], axis = 1, inplace = True)
df_2

A value is trying to be set on a copy of a slice from a DataFrame

See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


Unnamed: 0,member_id,event_id,event_description
2,87429312,228968660,Exercise your critical thinking with a data en...
3,87429312,228648803,"*Note, expedite your check in at Galvanize; re..."
6,185745656,228968660,Exercise your critical thinking with a data en...
7,185745656,228648803,"*Note, expedite your check in at Galvanize; re..."
9,11885543,228648803,"*Note, expedite your check in at Galvanize; re..."
10,55930862,228968660,Exercise your critical thinking with a data en...
11,55930862,228648803,"*Note, expedite your check in at Galvanize; re..."
14,192607432,228968660,Exercise your critical thinking with a data en...
15,192607432,228648803,"*Note, expedite your check in at Galvanize; re..."
18,183993413,228968660,Exercise your critical thinking with a data en...


In [63]:
df_3 = pd.merge(df_2, df_rsvps, how ='left', on = ['member_id','event_id'])
df_3.fillna(value = 0, inplace = True)
df_3

# df_3.to_json('data.json')

Unnamed: 0,member_id,event_id,event_description,rsvp
0,87429312,228968660,Exercise your critical thinking with a data en...,1
1,87429312,228648803,"*Note, expedite your check in at Galvanize; re...",1
2,185745656,228968660,Exercise your critical thinking with a data en...,0
3,185745656,228648803,"*Note, expedite your check in at Galvanize; re...",0
4,11885543,228648803,"*Note, expedite your check in at Galvanize; re...",0
5,55930862,228968660,Exercise your critical thinking with a data en...,0
6,55930862,228648803,"*Note, expedite your check in at Galvanize; re...",0
7,192607432,228968660,Exercise your critical thinking with a data en...,0
8,192607432,228648803,"*Note, expedite your check in at Galvanize; re...",0
9,183993413,228968660,Exercise your critical thinking with a data en...,0


In [None]:
# df_2['rsvp_2'] = ((df_2['rsvp'].notnull()) | (df_2['join_time'] < df_2['event_time'])).astype(int)
# df_2['rsvp_2'] = ((df_2['rsvp'].notnull()) | (df_2['join_time'] < event_time)).astype(int)
# df_2['rsvp_2'] = ((df_2.rsvp == 0) & (df_2.join_time < df_2.event_time)).astype(int)
# np.sort(df_members.member_id.astype(int))