I have a bunch of improvements to make for the Spring 23 version of this program:

- Improve the workflow  --> DONE
- Create new rows for new opt-ins --> DONE  
- Create a better way of matching opt-ins to the master list  --> DONE
- Better group size options --> LATER
- Better naming convention --> DONE
~~- A way of tracking people who met during the fall 2022 program if we want them not to match again~~
- Make sure everyone has a unique identifier --> DONE
- Ensure file saving convention (excel or csv) is consistent

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

from matplotlib_inline import backend_inline
backend_inline.set_matplotlib_formats('retina')

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
#from openpyxl import load_workbook

import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import HBox, VBox
from IPython.display import HTML
import base64

#import os
from email.message import EmailMessage
import ssl
import smtplib

pd.set_option('display.max_rows', None)

def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

def convert_cell(cell):
    # converts '[1, 2, 3, ...]' into [1, 2, 3, ...]
    if cell != '[]':
        a = cell.strip('][').split(', ')
        b = [int(i) for i in a]
        return b
    else:
        return []

def init(df):
    # reset some of the columns or create them if they don't exist 
    df['prior_matches'] = [ [] for _ in range(len(df)) ]
    df['current_match'] = [ [] for _ in range(len(df)) ]
    df['current_group'] = -1
    df['num_prior_matches'] = 0
    df['size_prev_match'] = 0 # whether were in a 3 or 4 group previously, or 0 if new
    df['possible_matches'] = [ [] for _ in range(len(df)) ]
    df['num_possible_matches'] = -1

def complete_match(df):
    # move the current match to the end of the list of prior matches and clear the columns 
    for index, row in df.iterrows():
        df.loc[index, 'prior_matches'].extend(row['current_match'])
        df.loc[index, 'num_prior_matches'] = len(row['prior_matches'])
    df['size_prev_match'] = [len(a) for a in df['current_match'].tolist()]
    df['current_match'] = [ [] for _ in range(len(df)) ]
    df['current_group'] = -1

def possible_matches():
    # determines all the possible matches for each person in the program
    data['possible_matches'] = [ [] for _ in range(len(data)) ]
    data['num_possible_matches'] = -1
    for index, row in data.iterrows():
        non_previous_matches = data.index.to_numpy()[~np.in1d(data.index.to_numpy(), row['prior_matches'])]
        non_same_department = data.index.to_numpy()[~(row['department'] == data['department'])]
        data.loc[index, 'possible_matches'].extend(np.intersect1d(non_previous_matches, non_same_department))
        data.loc[index, 'num_possible_matches'] = len(data.loc[index, 'possible_matches'])

def perform_random_loop(df):
    # the loop that attempts to do the matching
    out = 0 # variable to determine when we've succeeded
    # clear any attemps to match that failed
    df['current_match'] = [ [] for _ in range(len(df)) ]
    df['current_group'] = -1
    # create a random column 
    df.loc[:, 'randint'] = np.random.choice(np.arange(0, len(df)), size=len(df), replace=False)
    
    groupnum = 1 # a counter for the group number
    # iterate through, starting with the most number of possible matches 
    for i, (index, row) in enumerate(df.sort_values(['size_prev_match', \
                                                     'num_possible_matches', \
                                                     'randint']).iterrows()):
        # select possible matches for person1
        if i == 0:
            p1_possible = df.loc[index, 'possible_matches']
        elif i > 0 :
            if len(remaining) == 0:
                out = 1
                return out
                break
            elif index not in remaining.index.tolist(): 
                continue
            else:
                p1_possible = np.intersect1d(remaining.loc[index, 'possible_matches'], \
                                             remaining.index.tolist())
        if len(p1_possible) <= 1:
            return out
            break
        # pick a random person2
        p2 = df.loc[p1_possible].sample(1)
        p2_possible = p2['possible_matches'].tolist()
        # take person1 possible matches and remove person2 and all of person2's not possible matches
        p1p2_possible_step1 = np.array(p1_possible)[~np.isin(p1_possible, p2.index.tolist())] # remove p2
        p1p2_possible = p1p2_possible_step1[np.isin(p1p2_possible_step1, p2_possible)]

        if len(p1p2_possible) == 0:
            return out
            break
        # pick a random person3
        p3 = df.loc[p1p2_possible].sample(1)
        p3_possible = p3['possible_matches'].tolist()

        if groupnum <= number4groups:
            # take person3 out oc p1p2_possible
            p1p2p3_possible_step1 = np.array(p1p2_possible)[~np.isin(p1p2_possible, p3.index.tolist())] 
            # keep only person3's possible matches 
            p1p2p3_possible = p1p2p3_possible_step1[np.isin(p1p2p3_possible_step1, p3_possible)]
            
            if len(p1p2p3_possible) == 0:
                return out
                break
            # pick a random person4
            p4 = df.loc[p1p2p3_possible].sample(1)

            # write the current match for all *4* group members
            df.loc[index, 'current_match'].extend([index, p2.index[0], p3.index[0], p4.index[0]])
            df.loc[p2.index[0], 'current_match'].extend([p2.index[0], index, p3.index[0], p4.index[0]])
            df.loc[p3.index[0], 'current_match'].extend([p3.index[0], index, p2.index[0], p4.index[0]])
            df.loc[p4.index[0], 'current_match'].extend([p4.index[0], index, p2.index[0], p3.index[0]])
            
            df.loc[index, 'current_group'] = groupnum
            df.loc[p2.index[0], 'current_group'] = groupnum
            df.loc[p3.index[0], 'current_group'] = groupnum
            df.loc[p4.index[0], 'current_group'] = groupnum
            
        else:
            # write the current match for all *3* group members
            df.loc[index, 'current_match'].extend([index, p2.index[0], p3.index[0]])
            df.loc[p2.index[0], 'current_match'].extend([p2.index[0], index, p3.index[0]])
            df.loc[p3.index[0], 'current_match'].extend([p3.index[0], index, p2.index[0]])
            
            df.loc[index, 'current_group'] = groupnum
            df.loc[p2.index[0], 'current_group'] = groupnum
            df.loc[p3.index[0], 'current_group'] = groupnum

        # create a new version of the overall df with the matches rows removed
        if i == 0:
            if groupnum <= number4groups:
                remaining = df.loc[df.index.difference((index, p2.index[0], p3.index[0], p4.index[0]))]
            else:
                remaining = df.loc[df.index.difference((index, p2.index[0], p3.index[0]))]
        if i > 0:
            if groupnum <= number4groups:
                remaining = remaining.loc[remaining.index.difference((index, p2.index[0], \
                                                                      p3.index[0], p4.index[0]))]
            else:
                remaining = remaining.loc[remaining.index.difference((index, p2.index[0], p3.index[0]))]

        if i == len(df) - 1:
            out = 1
            return out

        groupnum+=1

def create_match():
    # calls the matching loop up to 1000 times to create the match 
    global out, hr_full_data, hr_data, full_data, data
    counter = 0
    out = 0
    while out == 0:
        counter += 1
        out = perform_random_loop(data)  
        print(counter, '\r', end='')
        if counter >= 1000:
            print('match failed, not possible')
            break
            
    # use data to populate hr_data, full_data, and hr_full_data
    hr_data['group' + str(round_widget.value)] = data['current_group']
    hr_full_data = pd.merge(hr_full_data, hr_data, how='left')
    full_data.loc[data.index, :] = data[:].copy()
    
def create_download_link(df, title="Download CSV file", filename="data.csv"): 
    # turns the pandas DataFrame into a csv to download
    csv = df.to_csv(index=True)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    display(HTML(html))
    
    
def send_emails(df, round_number):
    # sends individual emails to each group 
    print('sending emails...', end='\r')
    for group_num in np.arange(1, df['current_group'].max()+1):
        group = df.loc[df['current_group'] == group_num]

        email_sender = 'bumeetup@bu.edu'
        email_password = 'SWsocial2022'

        email_recipients = group['email'].tolist()

        greeting = ', ' . join(group['first_name'].tolist()[:-1] + \
                               ['and ' + group['first_name'].tolist()[-1]])
        
        if groupsize_widget.value == 3:
            extra_sentence = ''
        elif groupsize_widget.value == 4:
            if len(group) == 4:
                extra_sentence = ''
            elif len(group) == 3:
                extra_sentence = " We were hoping to put everyone in groups of four this time, but the \
numbers required some groups of three. We will do our best to put you in a group of four next round."

        subject = 'BU Meetup Spring Round %s'%str(round_number)
        body = \
'''Hello %s,

You all are a group for round %s of BU Meetup.%s Please figure out amongst yourselves what day, time, \
and place would work to meet up for at least 45 minutes sometime in the next 2 weeks. \
You might find it helpful to use a tool like When2Meet to share your availability: \
https://www.when2meet.com/.

Some suggestions that previous groups enjoyed: get coffee/tea, \
get lunch or dinner at a nearby restaurant, \
get a drink at the BU Pub, or go for a walk along the esplanade. \
Enjoy the opportunity to get to know each other! 

Feel free to reach out with any questions!

~BU Meetup (Eric Wellers & Will Saunders)''' %(greeting, str(round_number), extra_sentence)

        em = EmailMessage()
        em['From'] = email_sender
        em['To'] = email_recipients
        em['Subject'] = subject
        em.set_content(body)

        context = ssl.create_default_context()

        with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:
            smtp.login(email_sender, email_password)
            smtp.sendmail(email_sender, email_recipients, em.as_string())

    print('emails sent       ', '\r', end='')
    
def send_reminder_emails(df, round_number):
    # sends individual reminder emails to each group
    print('sending reminder emails...', end='\r')
    for group_num in np.arange(1, df['current_group'].max()+1):
        group = df.loc[df['current_group'] == group_num]
        #group['email'].tolist()

        email_sender = 'bumeetup@bu.edu'
        email_password = 'SWsocial2022'

        email_recipients = group['email'].tolist()

        greeting = ', ' . join(group['first_name'].tolist()[:-1] + \
                               ['and ' + group['first_name'].tolist()[-1]])

        subject = 'Reminder: BU Meetup Round %s'%str(round_number)
        body = \
'''Hello %s,

This is a friendly reminder that Round %s of BU Meetup ends this week.\
If you've already reached out to each other or \
met, hurray! and you may ignore this email. \
If not, now is a great time to reach out and make a plan to meet.

Feel free to reach out with any questions or concerns.

~BU Meetup (Eric & Will)''' %(greeting, str(round_number))

        em = EmailMessage()
        em['From'] = email_sender
        em['To'] = email_recipients
        em['Subject'] = subject
        em.set_content(body)

        context = ssl.create_default_context()

        with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:
            smtp.login(email_sender, email_password)
            smtp.sendmail(email_sender, email_recipients, em.as_string())

    print('reminder emails sent       ', '\r', end='')

# initialize jupyter widgets 
style = {'description_width': 'auto'}

file_widget = widgets.Text(
    description='Participants file:', 
    disabled=False, 
    style=style)

datafile_widget = widgets.Text(
    description='Data file:', 
    disabled=False, 
    style=style)

round_widget = widgets.Dropdown(
    options=np.arange(1, 10),
    description='Matching round:', 
    disabled=False, 
    style=style)

groupsize_widget = widgets.Dropdown(
    options=[3,4],
    description='Group Size',
    disabled=False,
    style=style)

import_button = widgets.Button(description='Import', style=style)
#possible_matches_botton = widgets.Button(description='Possible Matches', style=style)
create_match_button = widgets.Button(description='Create Match', style=style)
#write_match_button = widgets.Button(description='Write Match', style=style)
complete_match_button = widgets.Button(description='Complete Match', style=style)
download_match_button = widgets.Button(description='Download Match', style=style)
download_data_button = widgets.Button(description='Download Data', style=style)
send_emails_button = widgets.Button(description='Send Emails', style=style)
send_reminder_emails_button = widgets.Button(description='Send Reminder Emails', style=style)

def display_widget():
    display(round_widget), \
    display(file_widget), \
    display(datafile_widget), \
    display(groupsize_widget), \
    display(import_button), \
    display(create_match_button), \
    display(download_match_button), \
    display(download_data_button), \
    display(send_emails_button), \
    display(send_reminder_emails_button), \
    display(complete_match_button)

data = None
number3groups = None
number4groups = None

# functions for each widget button press

def event_import(button):
    # imports the round opt-ins and the previous round matching data
    global hr_full_data, hr_data, full_data, data, number3groups, number4groups
    clear_output()
    display_widget()

    # if there is no datafile to import, it's round 1 pairing 
    if datafile_widget.value == '':
        # hr_full_data is human-readable data with everyone who filled out the intake form
        hr_full_data = pd.read_excel(file_widget.value, index_col=0) 
        # hr_data is hr_full_data with only people opted into the current round  
        hr_data = hr_full_data[hr_full_data[hr_full_data.columns[-1]]=='Yes'].copy() 
        # full_data is [kind of] non-human-readable data for everyone 
        full_data = hr_full_data[['full_name', 'first_name', 'BUID', 'email', 'school', \
                                  'department']].copy()
        # data is [kind of] non-human-readable data for people opted into the round. 
        #     data is what I use to do all of the actual processing
        data = hr_data[['full_name','first_name','BUID','email','school','department']].copy()
       
        init(full_data)
        init(data)

    # if there is a datafile, it means it's either round 1 reminder email or rounds 2+
    else:
        hr_full_data = pd.read_excel(file_widget.value, index_col=0)
        full_data = pd.read_excel(datafile_widget.value, index_col=0)
        
        full_data['prior_matches'] = \
            [convert_cell(row['prior_matches']) for index, row in full_data.iterrows()]
        full_data['current_match'] = \
            [convert_cell(row['current_match']) for index, row in full_data.iterrows()]
        full_data['possible_matches'] = \
            [convert_cell(row['possible_matches']) for index, row in full_data.iterrows()]

        # make all emails same formatting
        hr_full_data['email'] = hr_full_data['email'].str.lower()
        hr_full_data['email'] = hr_full_data['email'].str.strip()
        full_data['email'] = full_data['email'].str.lower()
        full_data['email'] = full_data['email'].str.strip()
        
        # select the HR data of the opt-ins
        hr_data = hr_full_data[hr_full_data['round'+str(round_widget.value)]=='Yes'].copy() 
        
        # initialize empty rows in the data file for new participants
        # this take a lot of lines
        a = pd.merge(hr_full_data, full_data, left_index=True, right_index=True, how='outer')
        a = a[a.columns[[0, 1, 2, 3, 4, 5, -7, -6, -5, -4, -3, -2, -1]]]
        a.columns = full_data.columns
        a['prior_matches'] = a['prior_matches'].apply(lambda d: d if isinstance(d, list) else [])
        a['current_match'] = a['current_match'].apply(lambda d: d if isinstance(d, list) else [])
        a['current_group'].fillna(-1, inplace=True)
        a['num_prior_matches'].fillna(0, inplace=True)
        a['size_prev_match'].fillna(0, inplace=True)
        a['possible_matches'] = a['possible_matches'].apply(lambda d: d if isinstance(d, list) else [])
        a['num_possible_matches'].fillna(-1, inplace=True)
        full_data = a.copy()
        
        # create data array for just the opt-ins
        data = a.copy()
        data = data[hr_full_data['round'+str(round_widget.value)]=='Yes'].copy()
    
    # calculate the number of groups of 3 and 4 we will have 
    n = len(data)
    if groupsize_widget.value == 3:
        # make groups of 3 and fill in the gaps with groups of 4
        number3groups = n // 3
        number4groups = n - (number3groups * 3)
        number3groups = n // 3 - number4groups
    elif groupsize_widget.value == 4:
        # make groups of 4 and fill in the gaps with groups of 3
        nn = n
        number3groups = 0
        while (nn / 4 - np.floor(nn / 4)) != 0:
            nn -= 3 # keep subtracting groups of 3 until it's divisible by 4
            number3groups += 1
        number4groups = nn // 4

    print('imported participants')

def event_create_match(button):
    # button to perform the match
    global hr_full_data, hr_data, full_data, data
    clear_output()
    display_widget()
    possible_matches()
    #print('round %i possible matches complete'%round_widget.value)
    create_match()
    if out == 1:
        print('round %i match created'%round_widget.value)
    if out == 0:
        print('round %i match failed'%round_widget.value)

def event_download_match(button):
    # button to download the match results in human-readable format
    global hr_full_data, hr_data, full_data, data
    clear_output()
    display_widget()
    # the file to download is the human-readable full data
    return create_download_link(hr_full_data, title="download round %i match"%round_widget.value, \
                         filename="round_%i_match.csv"%round_widget.value)

def event_download_data(button):
    # button to download the matching data in messy format
    global hr_full_data, hr_data, full_data, data
    clear_output()
    display_widget()
    full_data.loc[data.index, :] = data[:].copy()
    return create_download_link(full_data, title="download round %i data"%round_widget.value, \
                         filename="round_%i_data.csv"%round_widget.value)

def event_send_emails(button):
    clear_output()
    display_widget()
    send_emails(data, round_widget.value)
    
def event_send_reminder_emails(button):
    clear_output()
    display_widget()
    send_reminder_emails(data, round_widget.value)
    
def event_complete_match(button):
    # hit this button when the emails and reminder emails are sent AND the match and data are downloaded
    global hr_full_data, hr_data, full_data, data
    clear_output()
    display_widget()
    complete_match(data)
    complete_match(full_data)
    #full_data.loc[data.index, :] = data[:].copy()
    print('previous round completed')

# connecting the jupyter buttons to the actions for each button 
import_button.on_click(event_import)
create_match_button.on_click(event_create_match)
complete_match_button.on_click(event_complete_match)
download_match_button.on_click(event_download_match)
download_data_button.on_click(event_download_data)
send_emails_button.on_click(event_send_emails)
send_reminder_emails_button.on_click(event_send_reminder_emails)

In [2]:
clear_output()
display_widget()

Dropdown(description='Matching round:', options=(1, 2, 3, 4, 5, 6, 7, 8, 9), style=DescriptionStyle(descriptio…

Text(value='Spring2023_program/round_1_match.xlsx', description='Participants file:', style=TextStyle(descript…

Text(value='Spring2023_program/round_1_data.xlsx', description='Data file:', style=TextStyle(description_width…

Dropdown(description='Group Size', index=1, options=(3, 4), style=DescriptionStyle(description_width='auto'), …

Button(description='Import', style=ButtonStyle())

Button(description='Create Match', style=ButtonStyle())

Button(description='Download Match', style=ButtonStyle())

Button(description='Download Data', style=ButtonStyle())

Button(description='Send Emails', style=ButtonStyle())

Button(description='Send Reminder Emails', style=ButtonStyle())

Button(description='Complete Match', style=ButtonStyle())

imported participants


In [3]:
full_data

Unnamed: 0,full_name,first_name,BUID,email,school,department,prior_matches,current_match,current_group,num_prior_matches,size_prev_match,possible_matches,num_possible_matches
0,Kaitlyn Alimenti,Kate,U72475960,alimenti@bu.edu,Other,Other,[],[],-1,0,0,[],-1
1,Yicheng Chen,Yic,53547178,cyccool@bu.edu,Graduate School of Arts and Sciences,Economics,[],"[1, 300, 304, 290]",18,0,0,"[4, 6, 8, 18, 23, 30, 31, 38, 50, 51, 69, 75, ...",95
2,Hsin Huei Chen,Hsin Huei,20145990,hhchen@bu.edu,College of Engineering,Mechanical Engineering,[],[],-1,0,0,[],-1
3,Fatih Acun,Fatih,90491656,acun@bu.edu,College of Engineering,Computer engineering,[],[],-1,0,0,[],-1
4,Jason Rutberg,Jason,82729228,jrutberg@bu.edu,Graduate School of Arts and Sciences,Biostatistics,[],"[4, 242, 30, 174]",20,0,0,"[1, 6, 8, 18, 23, 30, 31, 38, 50, 51, 62, 69, ...",97
5,Elmurat Ashiraliev,Elmurat,49484254,aelmurat@bu.edu,Graduate School of Arts and Sciences,Religion,[],[],-1,0,0,[],-1
6,John Cerritelli,John,43542318,cerritej@bu.edu,Graduate School of Arts and Sciences,Chemistry,[],"[6, 207, 241, 322]",24,0,0,"[1, 4, 8, 18, 23, 30, 31, 38, 50, 51, 62, 69, ...",98
7,Lina Hardin,Lina,45023871,lhardin@bu.edu,School of Law,Law,[],[],-1,0,0,[],-1
8,Ali Raza,Ali,20910632,araza@bu.edu,Graduate School of Arts and Sciences,Computer Science,[],"[8, 266, 100, 116]",6,0,0,"[1, 4, 6, 18, 23, 30, 31, 38, 50, 51, 62, 69, ...",95
9,Anne shapiro,Annie,13446921,anshap@bu.edu,Graduate School of Arts and Sciences,Biostatistics,[],[],-1,0,0,[],-1


In [56]:
data[data['full_name']=='William Saunders']

Unnamed: 0,full_name,first_name,BUID,email,school,department,prior_matches,current_match,current_group,num_prior_matches,size_prev_match,possible_matches,num_possible_matches,randint
227,William Saunders,Will,90747427,wsaund@bu.edu,Graduate School of Arts and Sciences,Astronomy,[],"[227, 328, 69, 249]",23,0,0,"[1, 4, 6, 8, 18, 23, 30, 31, 38, 50, 51, 62, 6...",97,66


In [57]:
full_data[full_data['full_name']=='William Saunders']

Unnamed: 0,full_name,first_name,BUID,email,school,department,prior_matches,current_match,current_group,num_prior_matches,size_prev_match,possible_matches,num_possible_matches
227,William Saunders,Will,90747427,wsaund@bu.edu,Graduate School of Arts and Sciences,Astronomy,[],"[227, 328, 69, 249]",23,0,0,"[1, 4, 6, 8, 18, 23, 30, 31, 38, 50, 51, 62, 6...",97


In [60]:
hr_data[hr_data['full_name']=='William Saunders']

Unnamed: 0,full_name,first_name,BUID,email,school,department,round1,group1
227,William Saunders,Will,90747427,wsaund@bu.edu,Graduate School of Arts and Sciences,Astronomy,Yes,23


In [61]:
hr_full_data[hr_full_data['full_name']=='William Saunders']

Unnamed: 0,full_name,first_name,BUID,email,school,department,round1,group1
227,William Saunders,Will,90747427,wsaund@bu.edu,Graduate School of Arts and Sciences,Astronomy,Yes,23.0


# Recycling bin