In [194]:
import pandas as pd
import numpy as np

In [195]:
all_events = ['thing{}'.format(i) for i in range(1,5)]
all_dates  = ['date{}'.format(i) for i in range(1,5)]

form_data = [
    {
            'person':   'person1',
            'dates':  ("date1", "date4"),
            'events': ("thing1", "thing3"),
            'notes':  '',
    },
    {
            'person':  'person2',
            'dates':  ("date1", "date2"),
            'events': ("thing1", "thing2"),
            'notes':  '',
    },
    {
            'person': 'person3',
            'dates':  ("date1", "date2", "date3"),
            'events':  ("thing1", "thing2", "thing3", "thing4"),
            'notes':  '',
    },
]


df = pd.DataFrame.from_records(form_data)
all_people = [ p['person'] for p in form_data ]
df

Unnamed: 0,person,dates,events,notes
0,person1,"(date1, date4)","(thing1, thing3)",
1,person2,"(date1, date2)","(thing1, thing2)",
2,person3,"(date1, date2, date3)","(thing1, thing2, thing3, thing4)",


In [196]:

date_tuples = []
for d in form_data:
    person = d['person']
    dates = d['dates']
    date_tuples += [(person, date) for date in dates]
availability = pd.DataFrame(date_tuples, columns=('person','date')).set_index('person')
availability

Unnamed: 0_level_0,date
person,Unnamed: 1_level_1
person1,date1
person1,date4
person2,date1
person2,date2
person3,date1
person3,date2
person3,date3


person, dates tuples
person, events tuples
join those on person, group by event,date

use antijoin to remove options as events are scheduled

look up pd.pivot, pivot_values

np book array where rows sum to 1
prob too fancy: dynamic programming matrix

In [197]:
event_tuples = []
for d in form_data:
    person = d['person']
    events = d['events']
    event_tuples += [(person, event) for event in events]
interest = pd.DataFrame(event_tuples, columns=('person','event')).set_index('person')
interest

Unnamed: 0_level_0,event
person,Unnamed: 1_level_1
person1,thing1
person1,thing3
person2,thing1
person2,thing2
person3,thing1
person3,thing2
person3,thing3
person3,thing4


In [198]:
prefs = availability.join(interest).reset_index()
prefs


Unnamed: 0,person,date,event
0,person1,date1,thing1
1,person1,date1,thing3
2,person1,date4,thing1
3,person1,date4,thing3
4,person2,date1,thing1
5,person2,date1,thing2
6,person2,date2,thing1
7,person2,date2,thing2
8,person3,date1,thing1
9,person3,date1,thing2


# Scoring algorithm
## Per-attendee component
- Higher the fewer other dates on which this attendee could attend this event
- Higher the fewer other events in which this attendee is interested

## Per-date component
- Higher the greater the % of people interested in this event are available on this date
- Higher the fewer other events can run on this date

## Per-event component 
- Higher the fewer other dates on which this event could run


1. For each event+date pair, calculate a score factoring in all of the above
 1. Select top-scoring event+date
 2. Remove ^^^
 3. Repeat

In [199]:
def people_for_event(event):
    return set(prefs[prefs['event'] == event]['person'])

def people_for_event_date(event, date):
    return set(prefs[(prefs['date'] == date) & (prefs['event'] == event)]['person'])

def dates_for_event_person(event, person):
    return set(prefs[(prefs['event'] == event) & (prefs['person'] == person)]['date'])

def events_for_person(person):
    return set(prefs[prefs['person'] == person]['event'])

def dates_for_person(person):
    return set(prefs[prefs['person'] == person]['date'])

def dates_for_event(event):
    """
    Note that this does not return dates an event could run but in which 0 people expressed interest
    """
    return set(prefs[prefs['event'] == event]['date'])

def score(event, date):
    # Higher the fewer other dates on which this attendee could attend this event


    # Higher the fewer other events in which this attendee is interested
    pass

people_for_event_date('thing1','date1')
dates_for_event_person('thing1','person1')
events_for_person('person3')
dates_for_event('thing1')


{'date1', 'date2', 'date3', 'date4'}

In [207]:
scores = pd.DataFrame()
scores['other_dates_score'] = prefs.apply(lambda p: 1.0 / len(dates_for_event_person(p['event'], p['person'])), axis=1) 
scores['other_events_score'] = prefs.apply(lambda p: 1.0 / len(events_for_person(p['person'])), axis=1) 
scores


Unnamed: 0,other_dates_score,other_events_score
0,0.5,0.5
1,0.5,0.5
2,0.5,0.5
3,0.5,0.5
4,0.5,0.5
5,0.5,0.5
6,0.5,0.5
7,0.5,0.5
8,0.333333,0.25
9,0.333333,0.25


In [212]:
"""
Higher the fewer other dates on which this attendee could attend this event
Higher the fewer other events in which this attendee is interested
"""

scores['other_dates_score'] = prefs.apply(lambda p: 1.0 / len(dates_for_event_person(p['event'], p['person'])), axis=1) 
scores['other_events_score'] = prefs.apply(lambda p: 1.0 / len(events_for_person(p['person'])), axis=1) 
scores


Unnamed: 0,other_dates_score,other_events_score
0,0.5,0.5
1,0.5,0.5
2,0.5,0.5
3,0.5,0.5
4,0.5,0.5
5,0.5,0.5
6,0.5,0.5
7,0.5,0.5
8,0.333333,0.25
9,0.333333,0.25


In [213]:
"""
Higher the greater the % of people interested in this event are available on this date
Higher the fewer other events can run on this date
"""
scores['other_interest_score'] = prefs.apply(lambda p: len(people_for_event_date(p['event'], p['date'])) / len(people_for_event(p['event'])), axis=1)
scores['date_competition_score'] = prefs.apply(lambda p: 1/len(dates_for_event(p['event'])), axis=1)
scores

Unnamed: 0,other_dates_score,other_events_score,other_interest_score,date_competition_score
0,0.5,0.5,1.0,0.25
1,0.5,0.5,1.0,0.25
2,0.5,0.5,0.333333,0.25
3,0.5,0.5,0.5,0.25
4,0.5,0.5,1.0,0.25
5,0.5,0.5,1.0,0.333333
6,0.5,0.5,0.666667,0.25
7,0.5,0.5,1.0,0.333333
8,0.333333,0.25,1.0,0.25
9,0.333333,0.25,1.0,0.333333


In [214]:
scores = scores.join(prefs).groupby(['date','event']).mean()
scores

Unnamed: 0_level_0,Unnamed: 1_level_0,other_dates_score,other_events_score,other_interest_score,date_competition_score
date,event,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
date1,thing1,0.444444,0.416667,1.0,0.25
date1,thing2,0.416667,0.375,1.0,0.333333
date1,thing3,0.416667,0.375,1.0,0.25
date1,thing4,0.333333,0.25,1.0,0.333333
date2,thing1,0.416667,0.375,0.666667,0.25
date2,thing2,0.416667,0.375,1.0,0.333333
date2,thing3,0.333333,0.25,0.5,0.25
date2,thing4,0.333333,0.25,1.0,0.333333
date3,thing1,0.333333,0.25,0.333333,0.25
date3,thing2,0.333333,0.25,0.5,0.333333


In [215]:
scores = scores.mean(axis=1).sort_values()
scores

date   event 
date3  thing1    0.291667
date2  thing3    0.333333
date3  thing3    0.333333
       thing2    0.354167
date4  thing1    0.395833
date2  thing1    0.427083
date4  thing3    0.437500
date1  thing4    0.479167
date2  thing4    0.479167
date3  thing4    0.479167
date1  thing3    0.510417
       thing1    0.527778
       thing2    0.531250
date2  thing2    0.531250
dtype: float64

In [216]:
(date, event, score) = scores.index[-1] + (scores[scores.index[-1]],)
print((date, event, score))

('date2', 'thing2', 0.53125)


In [189]:
prefs[(prefs.event != event) & (prefs.date != date)]

Unnamed: 0,person,date,event,other_dates_score,other_events_score,other_interest_score,date_competition_score
0,person1,date1,thing1,0.5,0.5,1.0,0.25
1,person1,date1,thing3,0.5,0.5,1.0,0.25
2,person1,date4,thing1,0.5,0.5,0.333333,0.25
3,person1,date4,thing3,0.5,0.5,0.5,0.25
4,person2,date1,thing1,0.5,0.5,1.0,0.25
8,person3,date1,thing1,0.333333,0.25,1.0,0.25
10,person3,date1,thing3,0.333333,0.25,1.0,0.25
11,person3,date1,thing4,0.333333,0.25,1.0,0.333333
16,person3,date3,thing1,0.333333,0.25,0.333333,0.25
18,person3,date3,thing3,0.333333,0.25,0.5,0.25


In [94]:
prefs[(prefs['person'] == 'person1') & (prefs['event'] == 'thing1')].groupby(['person','date','event']).apply(lambda r: (r['other_dates_score'] + r['other_events_score']) / 2)

person   date   event    
person1  date1  thing1  0    0.5
         date4  thing1  2    0.5
dtype: float64

In [23]:
#prefs.pivot(index='date', columns='person', values='event')
prefs.reset_index()

Unnamed: 0,person,date,event
0,person1,date1,thing1
1,person1,date1,thing3
2,person1,date4,thing1
3,person1,date4,thing3
4,person2,date1,thing1
5,person2,date1,thing2
6,person2,date2,thing1
7,person2,date2,thing2
8,person3,date1,thing1
9,person3,date1,thing2


In [21]:
help(prefs.pivot)

Help on method pivot in module pandas.core.frame:

pivot(index=None, columns=None, values=None) -> 'DataFrame' method of pandas.core.frame.DataFrame instance
    Return reshaped DataFrame organized by given index / column values.
    
    Reshape data (produce a "pivot" table) based on column values. Uses
    unique values from specified `index` / `columns` to form axes of the
    resulting DataFrame. This function does not support data
    aggregation, multiple values will result in a MultiIndex in the
    columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
    
    Parameters
    ----------
    index : str or object, optional
        Column to use to make new frame's index. If None, uses
        existing index.
    columns : str or object
        Column to use to make new frame's columns.
    values : str, object or a list of the previous, optional
        Column(s) to use for populating new frame's values. If not
        specified, all remaining columns will be use

## aggregation

In [147]:
options = prefs.groupby(['event','date']).size().sort_values()
options

event   date 
thing1  date3    1
        date4    1
thing2  date3    1
thing3  date2    1
        date3    1
        date4    1
thing4  date1    1
        date2    1
        date3    1
thing1  date2    2
thing2  date1    2
        date2    2
thing3  date1    2
thing1  date1    3
dtype: int64

In [144]:
def num_attendees_for(event, date):
    return options.loc[(event, date)]
num_attendees_for("thing1", "date2")

2

## scoring

In [149]:
def score(x):
    attendees = [] # todo
    num_attendees = len(attendees)
    return num_attendees
        
    
    

prefs.groupby(['event','date']).apply(score).sort_values()

event   date 
thing1  date1    0
        date2    0
        date3    0
        date4    0
thing2  date1    0
        date2    0
        date3    0
thing3  date1    0
        date2    0
        date3    0
        date4    0
thing4  date1    0
        date2    0
        date3    0
dtype: int64

In [114]:
[a.index for td,a in prefs.groupby(['event','date'])]

[Index(['person1', 'person2', 'person3'], dtype='object', name='person'),
 Index(['person2', 'person3'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person'),
 Index(['person1'], dtype='object', name='person'),
 Index(['person2', 'person3'], dtype='object', name='person'),
 Index(['person2', 'person3'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person'),
 Index(['person1', 'person3'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person'),
 Index(['person1'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person'),
 Index(['person3'], dtype='object', name='person')]

In [98]:
pd.DataFrame(options)

Unnamed: 0_level_0,Unnamed: 1_level_0,0
event,date,Unnamed: 2_level_1
thing1,date3,1
thing1,date4,1
thing2,date3,1
thing3,date2,1
thing3,date3,1
thing3,date4,1
thing4,date1,1
thing4,date2,1
thing4,date3,1
thing1,date2,2


In [39]:
options = prefs.groupby(['event','date'])
odf = pd.DataFrame(options, columns=['eventat','attendees'])
odf

Unnamed: 0,eventat,attendees
0,"(thing1, date1)",date event person ...
1,"(thing1, date2)",date event person ...
2,"(thing1, date3)",date event person ...
3,"(thing1, date4)",date event person ...
4,"(thing2, date1)",date event person ...
5,"(thing2, date2)",date event person ...
6,"(thing2, date3)",date event person ...
7,"(thing3, date1)",date event person ...
8,"(thing3, date2)",date event person ...
9,"(thing3, date3)",date event person ...


In [138]:
t1d1 = odf[odf['eventat'] == ('thing1','date1')]['attendees']
t1d1

0              date   event
person                
...
Name: attendees, dtype: object

In [140]:
t1d1_people = list(t1d1[0].index)
t1d1_people

['person1', 'person2', 'person3']

In [57]:
x = odf[odf['eventat'] == ('thing1', 'date1')]['attendees'][0]
x

Unnamed: 0_level_0,date,event
person,Unnamed: 1_level_1,Unnamed: 2_level_1
person1,date1,thing1
person2,date1,thing1
person3,date1,thing1


In [63]:
x.keys()

Index(['date', 'event'], dtype='object')

## how to extract person from ^^^?

In [41]:
odf.append(prefs.groupby(['event','date']).size())

TypeError: Can only append a Series if ignore_index=True or if the Series has a name

In [None]:
def interest_in(df, event):
    return df['events'].apply(lambda v: event in v) 

def available_on(df, date):
    return df['dates'].apply(lambda v: date in v)

def attendees_for(df, event, date):
    return df[interest_in(df, event) & available_on(df, date)]['person']

def dates_for_event(df, event):
    res = set()
    for dates in df[interest_in(df, event)]['dates']:
        res.update(set(dates))
    return res


In [None]:
pd.MultiIndex.from_product([all_events,all_dates], names=['event', 'date'])

In [None]:
event = 'thing1'
df[interest_in(df, event)]

In [None]:
	"""
	goal: figure out which combinations of people+date+event would have the best attendance.
		
	for any "event" with interest above a threshold, show the date that works for the most people, then re-check the threshold and remove if not met.

	- group people by interest in event
	- remove events with total interest below threshold
	- for each remaining event:
		- collect people interested in event, grouped by date
		- remove event if max(group_lens) < thresh
        
    so we want:
    Series((date, event, (person1,person2,...)),...)
	"""

In [None]:
attendance_data = []
for date in all_dates:
    for event in all_events:
        available_on(df,'date1')

In [None]:
attendance = []
for event in all_events:
    for date in dates_for_event(df, event):
        attendance.append((event, date, attendees_for(df,event,date)))
        

pd.DataFrame(attendance, columns=('event','date','people'))

In [None]:
s1 = pd.Series(["a","b"])
s2 = pd.Series(["c","d","e"])
d = pd.DataFrame([s1,s2])
d

In [None]:
s1 = pd.Series((("person1","person2"),), index=["thing1","date1"])
s1

In [None]:
cols = pd.MultiIndex.from_product((all_events, all_dates))
[ [a.upper(),b] for a,b in index.values ]


In [None]:
rows = all_people
table = pd.DataFrame(np.random.randn(3, 16), index=rows, columns=cols)
table

In [None]:
df['thing1']["date1"]['person1']

In [None]:
form = df

for event, date in index.values:
    for person in form[interest_in(form,event) & available_on(form,date)]['person'].values:
        table.loc(event, date, person) = True
    
#form[interest_in(form,'thing1') & available_on(form,'date1')]['person'].values
table

#form[interest_in(form,event) & available_on(form,event)]