Order of things to do:
1. Import 
    - Round 1: Takes in the master cleaned list and opt-in list
    - Rounds 2+: Takes in the master cleaned list, opt-in list, participants list, and full data list. 
    - Returns master and opt-in or errors (telling you what to fix)
2. Dedup
    - De-duplicates the opt-in and master lists. 
3. Sync
    - Takes in master and opt-in lists.
    - Returns participant list with new rows if necessary.
4. Create data
    - Produces or updates the full data list. 
5. Complete match  
    - Completes the previous round if round > 1     
6. Possible matches  
7. Create match   
8. Save data back to Google.
9. Send emails.
10. Send reminder emails.



- Make sure that retrieved data doesn't become corrupted. 
- Make sure that the names of spreadsheets are passed along correctly.

In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

from matplotlib_inline import backend_inline
backend_inline.set_matplotlib_formats('retina')

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.max_rows', None)
#from openpyxl import load_workbook
import pickle
import time

import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import HBox, VBox
from IPython.display import HTML
import base64

#import os
from email.message import EmailMessage
import ssl
import smtplib

import gspread
#gc = gspread.oauth()
gc = gspread.oauth(
        credentials_filename='credentials.json',
        authorized_user_filename='authorized_user.json',
    )

def copy(obj):
    # true deep copy
    return pickle.loads(pickle.dumps(obj))

def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return idx

def convert_cell(cell):
    # converts '[1, 2, 3, ...]' into [1, 2, 3, ...]
    if cell != '[]':
        a = cell.strip('][').split(', ')
        b = [int(i) for i in a]
        return b
    else:
        return []

def init(df):
    # reset some of the columns or create them if they don't exist 
    df['prior_matches'] = [ [] for _ in range(len(df)) ]
    df['current_match'] = [ [] for _ in range(len(df)) ]
    df['current_group'] = -1
    df['num_prior_matches'] = 0
    df['size_prev_match'] = 0 # whether were in a 3 or 4 group previously, or 0 if new
    df['possible_matches'] = [ [] for _ in range(len(df)) ]
    df['num_possible_matches'] = -1

def create_download_link(df, title="Download CSV file", filename="data.csv"): 
    # turns the pandas DataFrame into a csv to download
    csv = df.to_csv(index=True)
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    display(HTML(html))
    

def import_files():
    # imports the round opt-ins and the previous round matching data
    global spreadsheet, full_data, data, master, optin, number3groups, number4groups, participants_prev, participants, full_data_prev
    
    # import master sheet
    spreadsheet = gc.open(spreadsheet_widget.value)
    master_sheet = spreadsheet.worksheet('master_list')
    master = np.array(master_sheet.get_all_values())
    master = pd.DataFrame(data=master[1:,1:], index=master[1:,0].astype(int), columns=master[0,1:])
    master['email'] = master['email'].str.lower()
    master['email'] = master['email'].str.strip()
    master['email_retype'] = master['email_retype'].str.lower()
    master['email_retype'] = master['email_retype'].str.strip()
    
    # import opt-in
    optin_sheet = spreadsheet.worksheet('optin' + str(round_widget.value))
    optin = np.array(optin_sheet.get_all_values())
    optin = pd.DataFrame(data=optin[1:,:], columns=['email', 'available', 'campus', 'degree'])
    optin['email'] = optin['email'].str.lower()
    optin['email'] = optin['email'].str.strip()
    #optin['email_retype'] = optin['email_retype'].str.lower()
    #optin['email_retype'] = optin['email_retype'].str.strip()
    optin = copy(optin[optin['available'] == 'Yes'])
    
    
    # import previous round data (if round > 1)
    if round_widget.value > 1:
        full_data_prev_sheet = spreadsheet.worksheet('full_data')
        full_data_prev = np.array(full_data_prev_sheet.get_all_values())
        full_data_prev = pd.DataFrame(data=full_data_prev[1:,1:], index=full_data_prev[1:,0].astype(int), columns=full_data_prev[0,1:])
        
        participants_prev_sheet = spreadsheet.worksheet('participants')
        participants_prev = np.array(participants_prev_sheet.get_all_values())
        participants_prev = pd.DataFrame(data=participants_prev[1:,1:], index=participants_prev[1:,0].astype(int), \
                                         columns=participants_prev[0,1:])
        participants_prev['email'] = participants_prev['email'].str.lower()
        participants_prev['email'] = participants_prev['email'].str.strip()
    
    print('Imported')

def import_for_reminder():
    # imports the data saved from the round in progress only for the purposes of sending a reminder email
    global spreadsheet, full_data, data
    spreadsheet = gc.open(spreadsheet_widget.value)
    full_data_sheet = spreadsheet.worksheet('full_data')
    full_data = np.array(full_data_sheet.get_all_values())
    full_data = pd.DataFrame(data=full_data[1:,1:], index=full_data[1:,0].astype(int), columns=full_data[0,1:])
    
    full_data['prior_matches'] = \
        [convert_cell(row['prior_matches']) for index, row in full_data.iterrows()]
    full_data['current_match'] = \
        [convert_cell(row['current_match']) for index, row in full_data.iterrows()]
    full_data['possible_matches'] = \
        [convert_cell(row['possible_matches']) for index, row in full_data.iterrows()]
    full_data['email'] = full_data['email'].str.lower()
    full_data['email'] = full_data['email'].str.strip()
    
    data = copy(full_data[full_data['current_group']!='-1'])
    data['current_group'] = data['current_group'].astype(int)
    

def dedup():
    global master, optin
    # check for email errors and duplicates in the master list and remove them 
    # check for email errors and duplicates in the opt-in list and remove them
    
    error_state = 0
    print('checking emails match...')
    # check emails for master
    for index, row in master.iterrows():
        if row['email'] != row['email_retype']:
            print('Email disagreement for ID %s in Master list.' %str(index))
            error_state += 1
    
    # tell you what to do 
    if error_state >= 1:
        raise TypeError('Some emails don\'t match. Fix and rerun')
    if error_state == 0:
        print('no email errors.')

    # check duplicates for master
    print('deduping...')
    unique_emails, counts = np.unique(master['email'], return_counts=True)
    if np.any(counts > 1):
        print('Duplicates detected in Master list...fixing')
        master.drop_duplicates(subset=['email'], keep='first', inplace=True, ignore_index=True)
        #spreadsheet.worksheet('cleaned_master_list').update('A1', master.values.tolist())
        print('Master duplicates removed')
    else:
        print('No duplicates deteted in Master list')
    
    # check duplicates for optin
    unique_emails, counts = np.unique(optin['email'], return_counts=True)
    # remove the first instance of multiple opt-ins, in case someone changed from Yes to No
    if np.any(counts > 1):
        print('Duplicates detected in Optin list...fixing')
        optin.drop_duplicates(subset=['email'], keep='last', inplace=True, ignore_index=True)
        print('Optin duplicates removed')
    else:
        print('No duplicates deteted in Optin list')
    
    # remove all the Nos
    optin = optin[optin['available']=='Yes']
    optin.reset_index(drop=True, inplace=True)

    print('Finished.')
    
def sync_lists():
    '''
    
    JULY 25, 2024 : MAY NOT NEED ANYMORE. 
    
    '''
    # sync the opt-in list to the master list
    global master, optin, participants
    print('syncing the opt-in list to the master list...')
    
    # if round 1, generate the participants list from the master list
    if round_widget.value == 1:
        participants = copy(master)
    # if rounds 2+, populate the participants list with recent additions to the master list
    elif round_widget.value > 1:
        participants = copy(pd.merge(master, participants_prev, how='left'))
    
    # check if the length of the intersection between the two lists is the same as the length of optin
    # if it isn't, then that means there is an error
    if np.count_nonzero(np.in1d(participants['email'].to_numpy(), optin['email'].to_numpy())) != len(optin):
        if np.count_nonzero(np.in1d(participants['email'].to_numpy(), optin['email'].to_numpy())) < len(optin):
            print('Lengths don\'t match, likely an opt-in participant didn\'t fill out the intake form')
            print('Problematic email(s):',optin['email'][~np.in1d(optin['email'].to_numpy(), participants['email'].to_numpy())].to_numpy())
            print('Either add an entry for these emails in the master list or delete them from the opt-in list. Then run again.')
    
        elif np.count_nonzero(np.in1d(participants['email'].to_numpy(), optin['email'].to_numpy())) > len(optin):
            print('Lengths don\'t match, likely there is a duplicate in the intake form')
            print('This shouldn\'t happen, so if it does, that\'s not good. Stop and seek help.')
    
    else:
        print('Lengths match, good, proceeding')
        time.sleep(1)
        # populate the current round opt-in column from the opt-in list
        participants['optin'+str(round_widget.value)] = np.zeros(len(participants))
        for i, answer in enumerate(np.in1d(participants['email'].to_numpy(), \
                                           optin['email'].to_numpy())):
            if answer:
                participants.loc[i, 'optin'+str(round_widget.value)] = 'Yes'
            else:
                participants.loc[i, 'optin'+str(round_widget.value)] = 'No'
        
        # replace field that's entirely space (or empty) with 'No'
        participants.replace(r'^\s*$', 'No', regex=True)
                    
        if np.any(participants['optin'+str(round_widget.value)] == 0):
            # make sure they were each given 'Yes' or 'No'
            print('ERROR')
        else:
            print('Lists synced')

def complete_match(df):
    # move the current match to the end of the list of prior matches and clear the columns 
    for index, row in df.iterrows():
        df.loc[index, 'prior_matches'].extend(row['current_match'])
        df.loc[index, 'num_prior_matches'] = len(row['prior_matches'])
    df['size_prev_match'] = [len(a) for a in df['current_match'].tolist()]
    df['current_match'] = [ [] for _ in range(len(df)) ]
    df['current_group'] = -1

def create_data():
    global full_data, data, number3groups, number4groups, participants_round, participants
    
    if round_widget.value == 1:
        # create the full_data list from scratch
        full_data = copy(master)
        if len(master) != len(participants):
            print('ERROR: The lengths of the master list and participants list disagree. Fix and retry.')
        data = copy(master[participants['optin'+str(round_widget.value)]=='Yes'])
        init(full_data)
        init(data)

    if round_widget.value > 1:
        # full_data_prev imported
        # participants_prev imported and already converted to participants using the master list

        # format the entries correctly
        full_data_prev['prior_matches'] = \
            [convert_cell(row['prior_matches']) for index, row in full_data_prev.iterrows()]
        full_data_prev['current_match'] = \
            [convert_cell(row['current_match']) for index, row in full_data_prev.iterrows()]
        full_data_prev['possible_matches'] = \
            [convert_cell(row['possible_matches']) for index, row in full_data_prev.iterrows()]
        full_data_prev['email'] = full_data_prev['email'].str.lower()
        full_data_prev['email'] = full_data_prev['email'].str.strip()

        # initialize empty rows in the data file for new participants
        # this take a lot of lines
        a = pd.merge(participants, full_data_prev, left_index=True, right_index=True, how='outer')
        leftcols = [col for col in a.columns if '_x' in col] # lefthand side columns to keep 
        rightcols = a.columns[-7:] # righthand side columns to keep
        a = a[np.append(leftcols, rightcols)]
        a.columns = full_data_prev.columns
        a['prior_matches'] = a['prior_matches'].apply(lambda d: d if isinstance(d, list) else [])
        a['current_match'] = a['current_match'].apply(lambda d: d if isinstance(d, list) else [])
        a['current_group'].fillna(-1, inplace=True)
        a['num_prior_matches'].fillna(0, inplace=True)
        a['size_prev_match'].fillna(0, inplace=True)
        a['possible_matches'] = a['possible_matches'].apply(lambda d: d if isinstance(d, list) else [])
        a['num_possible_matches'].fillna(-1, inplace=True)
        full_data = copy(a)

        # create data array for just the opt-ins
        if len(full_data) != len(participants):
            print('ERROR: length of `full_data` doesn\'t equal length of `participants`.')
        data = copy(full_data[participants['optin'+str(round_widget.value)]=='Yes'])

    # calculate the number of groups of 3 and 4 we will have 
    n = len(data)
    if groupsize_widget.value == 3:
        # make groups of 3 and fill in the gaps with groups of 4
        number3groups = n // 3
        number4groups = n - (number3groups * 3)
        number3groups = n // 3 - number4groups
    elif groupsize_widget.value == 4:
        # make groups of 4 and fill in the gaps with groups of 3
        nn = n
        number3groups = 0
        while (nn / 4 - np.floor(nn / 4)) != 0:
            nn -= 3 # keep subtracting groups of 3 until it's divisible by 4
            number3groups += 1
        number4groups = nn // 4

    if round_widget.value > 1:
        complete_match(data)
        complete_match(full_data)
        print('Previous round completed')
    print('All data structures made')
    
def possible_matches(samedepartment=False):
    # determines all the possible matches for each person in the program
    # if samedepartment=False, DON'T allow 2 from same department
    # if samedepartment=True, DO allow 2 from same department (for Virtual only right now)
    data['possible_matches'] = [ [] for _ in range(len(data)) ]
    data['num_possible_matches'] = -1
    if samedepartment == False:
        for index, row in data.iterrows():
            non_previous_matches = data.index.to_numpy()[~np.in1d(data.index.to_numpy(), row['prior_matches'])]
            non_same_department = data.index.to_numpy()[~(row['department'] == data['department'])]
            data.loc[index, 'possible_matches'].extend(np.intersect1d(non_previous_matches, non_same_department))
            data.loc[index, 'num_possible_matches'] = len(data.loc[index, 'possible_matches'])
    if samedepartment == True:
        for index, row in data.iterrows():
            non_previous_matches = data.index.to_numpy()[~np.in1d(data.index.to_numpy(), row['prior_matches'])]
            data.loc[index, 'possible_matches'].extend(non_previous_matches)
            data.loc[index, 'num_possible_matches'] = len(data.loc[index, 'possible_matches'])


def perform_random_loop(df):
    # the loop that attempts to do the matching
    out = 0 # variable to determine when we've succeeded
    # clear any attemps to match that failed
    df['current_match'] = [ [] for _ in range(len(df)) ]
    df['current_group'] = -1
    # create a random column 
    df.loc[:, 'randint'] = np.random.choice(np.arange(0, len(df)), size=len(df), replace=False)
    
    groupnum = 1 # a counter for the group number
    # iterate through, starting with the most number of possible matches 
    for i, (index, row) in enumerate(df.sort_values(['size_prev_match', \
                                                     'num_possible_matches', \
                                                     'randint']).iterrows()):
        # select possible matches for person1
        if i == 0:
            p1_possible = df.loc[index, 'possible_matches']
        elif i > 0 :
            if len(remaining) == 0:
                out = 1
                return out
                break
            elif index not in remaining.index.tolist(): 
                continue
            else:
                p1_possible = np.intersect1d(remaining.loc[index, 'possible_matches'], \
                                             remaining.index.tolist())
        if len(p1_possible) <= 1:
            return out
            break
        # pick a random person2
        p2 = df.loc[p1_possible].sample(1)
        p2_possible = p2['possible_matches'].tolist()
        # take person1 possible matches and remove person2 and all of person2's not possible matches
        p1p2_possible_step1 = np.array(p1_possible)[~np.isin(p1_possible, p2.index.tolist())] # remove p2
        p1p2_possible = p1p2_possible_step1[np.isin(p1p2_possible_step1, p2_possible)]

        if len(p1p2_possible) == 0:
            return out
            break
        # pick a random person3
        p3 = df.loc[p1p2_possible].sample(1)
        p3_possible = p3['possible_matches'].tolist()

        if groupnum <= number4groups:
            # take person3 out oc p1p2_possible
            p1p2p3_possible_step1 = np.array(p1p2_possible)[~np.isin(p1p2_possible, p3.index.tolist())] 
            # keep only person3's possible matches 
            p1p2p3_possible = p1p2p3_possible_step1[np.isin(p1p2p3_possible_step1, p3_possible)]
            
            if len(p1p2p3_possible) == 0:
                return out
                break
            # pick a random person4
            p4 = df.loc[p1p2p3_possible].sample(1)

            # write the current match for all *4* group members
            df.loc[index, 'current_match'].extend([index, p2.index[0], p3.index[0], p4.index[0]])
            df.loc[p2.index[0], 'current_match'].extend([p2.index[0], index, p3.index[0], p4.index[0]])
            df.loc[p3.index[0], 'current_match'].extend([p3.index[0], index, p2.index[0], p4.index[0]])
            df.loc[p4.index[0], 'current_match'].extend([p4.index[0], index, p2.index[0], p3.index[0]])
            
            df.loc[index, 'current_group'] = groupnum
            df.loc[p2.index[0], 'current_group'] = groupnum
            df.loc[p3.index[0], 'current_group'] = groupnum
            df.loc[p4.index[0], 'current_group'] = groupnum
            
        else:
            # write the current match for all *3* group members
            df.loc[index, 'current_match'].extend([index, p2.index[0], p3.index[0]])
            df.loc[p2.index[0], 'current_match'].extend([p2.index[0], index, p3.index[0]])
            df.loc[p3.index[0], 'current_match'].extend([p3.index[0], index, p2.index[0]])
            
            df.loc[index, 'current_group'] = groupnum
            df.loc[p2.index[0], 'current_group'] = groupnum
            df.loc[p3.index[0], 'current_group'] = groupnum

        # create a new version of the overall df with the matches rows removed
        if i == 0:
            if groupnum <= number4groups:
                remaining = df.loc[df.index.difference((index, p2.index[0], p3.index[0], p4.index[0]))]
            else:
                remaining = df.loc[df.index.difference((index, p2.index[0], p3.index[0]))]
        if i > 0:
            if groupnum <= number4groups:
                remaining = remaining.loc[remaining.index.difference((index, p2.index[0], \
                                                                      p3.index[0], p4.index[0]))]
            else:
                remaining = remaining.loc[remaining.index.difference((index, p2.index[0], p3.index[0]))]

        if i == len(df) - 1:
            out = 1
            return out

        groupnum+=1

def create_match():
    # calls the matching loop up to 1000 times to create the match 
    global out, full_data, data, participants, participants_round
    counter = 0
    out = 0
    while out == 0:
        counter += 1
        out = perform_random_loop(data)  
        print(counter, '\r', end='')
        if counter >= 1000:
            print('match failed, not possible')
            break
    if out == 1:
        print('match created')
    if ((out == 0) and (checkbox_widget.value)):
        print('Allowing up to 2 law students in the same group...')
        # divide the law students into 2 random groups
        half_law = len(data[data['department'] == 'School of Law'])//2
        data.loc[data[data['department'] == 'School of Law'].sample(half_law).index, 'department'] = 'School of Law 1'
        data.loc[data[data['department'] == 'School of Law'].index, 'department'] = 'School of Law 2'
        # have to re-run the possible matches determination
        possible_matches()
        # then continue as normal
        counter = 0
        while out == 0:
            counter += 1
            out = perform_random_loop(data)
            print(counter, '\r', end='')
            if counter >= 1000:
                print('match failed again, really not possible')
                break
        if out == 1:
            print('match created with double law students')
            #print(len(data[(data['current_group'].to_numpy() == -1) & (data['school'].to_numpy() == 'School of Law')]))
            counter += 1
            
    # use data to populate participants_round, full_data, and participants
    participants_round = copy(participants[participants['optin' + str(round_widget.value)]=='Yes'])
    participants_round['group' + str(round_widget.value)] = data['current_group']
    participants = pd.merge(participants, participants_round, how='left')
    full_data.loc[data.index, :] = copy(data)   

def save_data():
    if out == 0:
        print('match failed, no data saved')
    else:
        global full_data_to_save, participants_to_save
        # create data in a format that can be saved to Google
        # add headers and index to the data

        full_data_to_save = copy(full_data.astype('str').values.tolist())
        full_data_index = full_data.index.tolist()
        for i, idx_value in enumerate(full_data_index):
            full_data_to_save[i].insert(0, idx_value)
        full_data_to_save.insert(0, ['UNIQUE_ID'] + full_data.columns.values.tolist())

        participants_to_save = copy(participants.astype('str').values.tolist())
        participants_index = participants.index.tolist()
        for i, idx_value in enumerate(participants_index):
            participants_to_save[i].insert(0, idx_value)
        participants_to_save.insert(0, ['UNIQUE_ID'] + participants.columns.values.tolist())


        try:
            spreadsheet.add_worksheet('full_data', rows=len(full_data)+2, cols=len(full_data.columns)+2)
        except:
            pass
        spreadsheet.worksheet('full_data').clear()
        spreadsheet.worksheet('full_data').update('A1', full_data_to_save)


        try:
            spreadsheet.add_worksheet('participants', rows=len(participants)+2, cols=len(participants.columns)+2)
        except:
            pass
        spreadsheet.worksheet('participants').clear()
        spreadsheet.worksheet('participants').update('A1', participants_to_save)

        print('spreadsheets updated')

def send_emails():
    if out == 0:
        print('match failed, no emails sent')
    else:
        global spreadsheet, full_data, data, number3groups, number4groups, participants_round, participants
        print('sending emails...')
        for group_num in np.arange(1, data['current_group'].max()+1):
            print(group_num, '\r', end='')
            time.sleep(2)
            group = data.loc[data['current_group'] == group_num]

            email_sender = 'bumeetup@bu.edu'
            email_password = password_widget.value

            email_recipients = group['email'].tolist()
            greeting = ', ' . join(group['first_name'].tolist()[:-1] + \
                                   ['and ' + group['first_name'].tolist()[-1]])

            # import the email from the spreadsheet and fix the formatting
            email_sheet = spreadsheet.worksheet('match_email')
            email = ''
            for l in email_sheet.get_all_values():
                if l[0] == '':
                    email += '\n\n'
                elif 'XXX' in l[0]:
                    email += l[0].replace('XXX', '%s')
                else:
                    email += l[0]

            subject = 'BU Meetup Round %s'%str(round_widget.value)
            body = email%(greeting, round_widget.value)

            em = EmailMessage()
            em['From'] = email_sender
            em['To'] = email_recipients
            em['Subject'] = subject
            em.set_content(body)

            context = ssl.create_default_context()

            with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:
                smtp.login(email_sender, email_password)
                smtp.sendmail(email_sender, email_recipients, em.as_string())

        print('emails sent       ', '\r', end='')

def send_reminder_emails():
    global spreadsheet, full_data, data, number3groups, number4groups, participants_round, participants
    print('sending reminder emails...')
    for group_num in np.arange(1, data['current_group'].max()+1):
        print(group_num, '\r', end='')
        time.sleep(2)
        group = data.loc[data['current_group'] == group_num]

        email_sender = 'bumeetup@bu.edu'
        email_password = password_widget.value

        email_recipients = group['email'].tolist()
        greeting = ', ' . join(group['first_name'].tolist()[:-1] + \
                               ['and ' + group['first_name'].tolist()[-1]])
        
        # import the email from the spreadsheet and fix the formatting
        email_sheet = spreadsheet.worksheet('reminder_email')
        email = ''
        for l in email_sheet.get_all_values():
            if l[0] == '':
                email += '\n\n'
            elif 'XXX' in l[0]:
                email += l[0].replace('XXX', '%s')
            else:
                email += l[0]
        
        subject = 'Reminder: BU Meetup Round %s'%str(round_widget.value)
        body = email%(greeting, round_widget.value)
        
        em = EmailMessage()
        em['From'] = email_sender
        em['To'] = email_recipients
        em['Subject'] = subject
        em.set_content(body)

        context = ssl.create_default_context()

        with smtplib.SMTP_SSL('smtp.gmail.com', 465, context=context) as smtp:
            smtp.login(email_sender, email_password)
            smtp.sendmail(email_sender, email_recipients, em.as_string())

    print('reminder emails sent       ', '\r', end='')

    
# initialize jupyter widgets 
style = {'description_width': 'initial'}
layout = widgets.Layout(width='auto', height='35px')

spreadsheet_widget = widgets.Text(
    description='Spreadsheet name:', 
    disabled=False, 
    style=style)

round_widget = widgets.Dropdown(
    options=np.arange(1, 10),
    description='Matching round:', 
    disabled=False, 
    style=style)

groupsize_widget = widgets.Dropdown(
    options=[4,3],
    description='Group Size',
    disabled=False,
    style=style)

password_widget = widgets.Text(
    description='Email password:', 
    disabled=False, 
    style=style)

checkbox_widget = widgets.Checkbox(
    value=False,
    description='Allow 2 law students in a group if necessary',
    disabled=False, 
    style=style)

match_button = widgets.Button(description='Run Match, Notify Groups, Update Data', layout=layout, display='flex', \
                              flex_flow='column', align_items='stretch')
remind_button = widgets.Button(description='Remind Groups', layout=layout, display='flex', \
                               flex_flow='column', align_items='stretch')

def display_widget():
    display(round_widget), \
    display(spreadsheet_widget), \
    display(groupsize_widget),\
    display(password_widget), \
    display(checkbox_widget), \
    display(match_button)
    display(remind_button)

# functions for each widget button press
    
def event_match(button):
    # do everything
    clear_output()
    display_widget()
    time.sleep(1)
    import_files()
    time.sleep(1)
    dedup()
    time.sleep(1)
    sync_lists()
    time.sleep(1)
    create_data()
    time.sleep(1)
    possible_matches()
    time.sleep(1)
    create_match()
    time.sleep(1)
    save_data()
    time.sleep(1)
    send_emails() 
    
def event_remind(button):
    clear_output()
    display_widget()
    time.sleep(1)
    import_for_reminder()
    time.sleep(1)
    send_reminder_emails() 
    
    
# connecting the jupyter buttons to the actions for each button 
match_button.on_click(event_match)
remind_button.on_click(event_remind)

In [2]:
# working on the file organization

In [184]:
# values for testing
master_file = "Intake_Fall23_noemails"
roundvalue = 2

In [185]:
def import_files():
    # imports the round opt-ins and the previous round matching data
    global master, intake, condensed, full_data, data, optin, number3groups, number4groups, participants_prev, participants, full_data_prev
    
    # import intake sheet from master file
    master = gc.open(master_file)
    intake = master.worksheet('intake_list')
    intake = np.array(intake.get_all_values())
    intake = pd.DataFrame(data=intake[1:,:], columns=intake[0,:])
    email_cols = [_ for _ in intake.columns if 'email' in _]
    intake[email_cols[0]] = intake[email_cols[0]].str.lower()
    intake[email_cols[0]] = intake[email_cols[0]].str.strip()
    intake[email_cols[1]] = intake[email_cols[1]].str.lower()
    intake[email_cols[1]] = intake[email_cols[1]].str.strip()
    # assign new unique ids to new participants
    need_new_unique_ids = np.arange(intake[intake['User ID'] != ''].index[-1]+1, len(intake))
    intake['User ID'] = intake.loc[intake['User ID'] != '', 'User ID'].astype(int).tolist() + [int(intake.loc[need_new_unique_ids[0]-1, 'User ID'])+i+1 for i, index in enumerate(need_new_unique_ids)]

    #########-------------------### NEED TO DEDUP THE INTAKE LIST HERE

    # create or update the condensed_responses sheet
    condensed = copy(intake.iloc[:, np.arange(0, 10)])
    condensed.columns = ['User_ID', 'Timestamp', 'full_name', 'first_name', 'pronouns', 'email', 'email_retype', 'campus', 'degree', 'school']
    condensed_department_columns = intake.iloc[:, np.arange(10, len(intake.columns))]
    condensed_department_column = []
    for index, row in condensed_department_columns.iterrows():
        for i, item in enumerate(row):
            if item != '':
                condensed_department_column.append(item)
                break
            if i+1 == len(row):
                condensed_department_column.append(condensed.loc[index, 'school'])
    condensed['department'] = condensed_department_column

    # import opt-in sheet
    #optin_sheet = master.worksheet('optin' + str(round_widget.value))
    optin_sheet = master.worksheet('optin' + str(roundvalue))
    optin = np.array(optin_sheet.get_all_values())
    optin = pd.DataFrame(data=optin[1:,:], columns=['email', 'available'])
    optin = copy(optin[optin['available'] == 'Yes'])
    optin['email'] = optin['email'].str.lower()
    optin['email'] = optin['email'].str.strip()

    # remove duplicates in the optin
    optin = optin.drop_duplicates(subset='email', keep='last') # keep the last in case someone changed 

    # merge the optin into condensed 
    condensed = pd.merge(condensed, optin[['email', 'available']], on='email', how='left')
    condensed['available'] = condensed['available'].fillna('No')
    condensed.rename(columns={'available': 'optin' + str(roundvalue)}, inplace=True)
    #condensed.rename(columns={'available': 'optin' + str(round_widget.value)}, inplace=True)
    
    #if round_widget.value > 1:
    if roundvalue > 1:
        all_data_prev_sheet = master.worksheet('all_data')
        all_data_prev = np.array(all_data_prev_sheet.get_all_values())
        all_data_prev = pd.DataFrame(data=all_data_prev[1:,:], columns=all_data_prev[0,:])
        
        all_participants_prev_sheet = master.worksheet('all_participants')
        all_participants_prev = np.array(all_participants_prev_sheet.get_all_values())
        all_participants_prev = pd.DataFrame(data=all_participants_prev[1:,:], columns=all_participants_prev[0,:])
        all_participants_prev['User_ID'] = [int(row['User_ID']) for index, row in all_participants_prev.iterrows()]
        
        all_participants_prev_mini = pd.concat([all_participants_prev[['User_ID']], all_participants_prev.iloc[:, 11:]], axis=1)
        all_participants = pd.merge(condensed, all_participants_prev_mini, on=['User_ID'], how='left')
        all_participants.insert(len(all_participants.columns)-1, 'optin' + str(round_widget.value), all_participants.pop('optin' + str(round_widget.value)))

In [188]:
import_files()

In [193]:
def create_data(program):
    # program must be one of the following: 'Doctoral', 'Masters', 'Med', 'Virtual'
    global all_data, all_participants, all_data_prev, all_participants_prev, full_data, data, number3groups, number4groups, participants_round, participants
    
    #if round_widget.value == 1:
    if roundvalue == 1:
        # create the full_data list from scratch
        if program == 'Doctoral':
            participants = copy(condensed[np.logical_and((condensed['campus']=='Charles River Campus').to_numpy(), condensed['degree'].str.contains('Doctoral').to_numpy())])
            full_data = copy(participants)
        elif program == 'Masters':
            participants = copy(condensed[np.logical_and((condensed['campus']=='Charles River Campus').tolist(), condensed['degree'].str.contains('Masters').tolist())])
            full_data = copy(participants)
        elif program == 'Med':
            participants = copy(condensed[(condensed['campus']=='BU Medical Campus').tolist()])
            full_data = copy(participants)
        elif program == 'Virtual':
            participants = copy(condensed[(condensed['campus']=='Virtual').tolist()])
            full_data = copy(participants)
        full_data.drop(columns='optin1', inplace=True)
        data = copy(full_data[(participants['optin'+str(round_widget.value)]=='Yes').to_numpy()])
        init(full_data)
        init(data)

    #if round_widget.value > 1:
    if roundvalue > 1:
        #if program == 'Doctoral':
        #    full_data_prev = copy(participants)
        
        # full_data_prev imported
        # participants_prev imported and already converted to participants using the master list <---- THIS DOESN'T SEEM RIGHT
        
        '''
        # format the entries correctly
        all_data_prev['prior_matches'] = \
            [convert_cell(row['prior_matches']) for index, row in all_data_prev.iterrows()]
        all_data_prev['current_match'] = \
            [convert_cell(row['current_match']) for index, row in all_data_prev.iterrows()]
        all_data_prev['possible_matches'] = \
            [convert_cell(row['possible_matches']) for index, row in all_data_prev.iterrows()]
        all_data_prev['email'] = all_data_prev['email'].str.lower()
        all_data_prev['email'] = all_data_prev['email'].str.strip()
        '''

        # initialize empty rows in the data file for new participants
        # this take a lot of lines
        a = pd.merge(all_participants, all_data_prev, left_index=True, right_index=True, how='outer')
        leftcols = [col for col in a.columns if '_x' in col] # lefthand side columns to keep 
        rightcols = a.columns[-7:] # righthand side columns to keep
        a = a[np.append(leftcols, rightcols)]
        a.columns = all_data_prev.columns
        a['prior_matches'] = a['prior_matches'].apply(lambda d: d if isinstance(d, list) else [])
        a['current_match'] = a['current_match'].apply(lambda d: d if isinstance(d, list) else [])
        a['current_group'] = a['current_group'].fillna(-1)
        a['num_prior_matches'] = a['num_prior_matches'].fillna(0)
        a['size_prev_match'] = a['size_prev_match'].fillna(0)
        a['possible_matches'] = a['possible_matches'].apply(lambda d: d if isinstance(d, list) else [])
        a['num_possible_matches'] = a['num_possible_matches'].fillna(-1)
        all_data = copy(a)

        # create data array for just the opt-ins
        #if len(all_data) != len(participants):
        #    print('ERROR: length of `all_data` doesn\'t equal length of `participants`.')
        #data = copy(all_data[participants['optin'+str(round_widget.value)]=='Yes'])
        
        complete_match(all_data)
        # _prev is from the end of the previous match (without new signups added)
        all_data_prev = copy(all_data)
        # add in the new signups that we already have in the condensed list
        all_data = pd.merge(condensed.loc[:, condensed.columns[:-1]], all_data, how='left')
        
        # prep the new columns as needed
        prior_match_col = np.where(all_data.columns == 'prior_matches')[0][0]
        current_match_col = np.where(all_data.columns == 'current_match')[0][0]
        possible_match_col = np.where(all_data.columns == 'possible_matches')[0][0]
        current_group_col = np.where(all_data.columns == 'current_group')[0][0]
        num_possible_matches_col = np.where(all_data.columns == 'num_possible_matches')[0][0]
        num_prior_matches_col = np.where(all_data.columns == 'num_prior_matches')[0][0]
        size_prev_match_col = np.where(all_data.columns == 'size_prev_match')[0][0]
        
        def replace_na(value, replacer):
            if value == []:
                return value
            try:
                if pd.isna(value):
                    return replacer
                return value
            except ValueError as e:
                return value
    
        cols_with_lists = [prior_match_col, current_match_col, possible_match_col]
        cols_with_negones = [current_group_col, num_possible_matches_col]
        cols_with_zeros = [num_prior_matches_col, size_prev_match_col]
        all_data.iloc[:, cols_with_lists] = all_data.iloc[:, cols_with_lists].map(replace_na, replacer=[])
        all_data.iloc[:, cols_with_negones] = all_data.iloc[:, cols_with_negones].map(replace_na, replacer=-1)
        all_data.iloc[:, cols_with_zeros] = all_data.iloc[:, cols_with_zeros].map(replace_na, replacer=0)

        # update all_participants
        all_participants = pd.merge(all_participants, condensed, how='right')
        
        
        # pull out the right program
        if program == 'Doctoral':
            full_data = copy(all_data[np.logical_and((all_data['campus']=='Charles River Campus').to_numpy(), all_data['degree'].str.contains('Doctoral').to_numpy())])
            participants = copy(all_participants[np.logical_and((all_participants['campus']=='Charles River Campus').to_numpy(), all_participants['degree'].str.contains('Doctoral').to_numpy())])
        elif program == 'Masters':
            full_data = copy(all_data[np.logical_and((all_data['campus']=='Charles River Campus').to_numpy(), all_data['degree'].str.contains('Masters').to_numpy())])
            participants = copy(all_participants[np.logical_and((all_participants['campus']=='Charles River Campus').to_numpy(), all_participants['degree'].str.contains('Masters').to_numpy())])
        elif program == 'Med':
            full_data = copy(all_data[(all_data['campus']=='BU Medical Campus').tolist()])
            participants = copy(participants[(participants['campus']=='BU Medical Campus').tolist()])
        elif program == 'Virtual':
            full_data = copy(all_data[(all_data['campus']=='Virtual').tolist()])
            participants = copy(participants[(participants['campus']=='Virtual').tolist()])
        data = copy(full_data[(participants['optin'+str(round_widget.value)]=='Yes').to_numpy()])
        

    # calculate the number of groups of 3 and 4 we will have 
    n = len(data)
    if groupsize_widget.value == 3:
        # make groups of 3 and fill in the gaps with groups of 4
        number3groups = n // 3
        number4groups = n - (number3groups * 3)
        number3groups = n // 3 - number4groups
    elif groupsize_widget.value == 4:
        # make groups of 4 and fill in the gaps with groups of 3
        nn = n
        number3groups = 0
        while (nn / 4 - np.floor(nn / 4)) != 0:
            nn -= 3 # keep subtracting groups of 3 until it's divisible by 4
            number3groups += 1
        number4groups = nn // 4


        print('Previous round completed')
    print('All data structures made')

THINGS IN PROGRESS:

Need to create all_participants for rounds 2+

Everything is still using indices when it should be using be using the User_IDs

In [102]:
clear_output()
display_widget()

Dropdown(description='Matching round:', index=1, options=(1, 2, 3, 4, 5, 6, 7, 8, 9), style=DescriptionStyle(d…

Text(value='', description='Spreadsheet name:', style=TextStyle(description_width='initial'))

Dropdown(description='Group Size', options=(4, 3), style=DescriptionStyle(description_width='initial'), value=…

Text(value='', description='Email password:', style=TextStyle(description_width='initial'))

Checkbox(value=False, description='Allow 2 law students in a group if necessary', style=CheckboxStyle(descript…

Button(description='Run Match, Notify Groups, Update Data', layout=Layout(height='35px', width='auto'), style=…

Button(description='Remind Groups', layout=Layout(height='35px', width='auto'), style=ButtonStyle())

Don't forget to move the opt-in number checker to the new opt-in merging location.

In [194]:
create_data(program='Doctoral')

Previous round completed
All data structures made


In [200]:
possible_matches()

In [201]:
create_match()

1 match created


In [202]:
a = copy(full_data)
aa = copy(participants)

In [203]:
create_data(program='Masters')

Previous round completed
All data structures made


In [206]:
possible_matches()

In [207]:
create_match()

1 match created


In [208]:
b = copy(full_data)
bb = copy(participants)

In [209]:
create_data(program='Med')

ValueError: Item wrong length 0 instead of 287.

In [79]:
possible_matches()

In [80]:
create_match()

1 match created


In [81]:
c = copy(full_data)
cc = copy(participants)

In [82]:
create_data(program='Virtual')

Previous round completed
All data structures made


In [83]:
possible_matches(samedepartment=True)

In [84]:
create_match()

1 match created


In [85]:
d = copy(full_data)
dd = copy(participants)

In [86]:
all_data = pd.concat([a, b, c, d]).sort_values('User_ID')
all_participants = pd.concat([aa, bb, cc, dd]).sort_values('User_ID')

In [87]:
def save_data():
    if out == 0:
        print('match failed, no data saved')
    else:
        global all_data_to_save, all_participants_to_save
        # create data in a format that can be saved to Google
        # add headers and index to the data

        intake_to_save = copy(intake.astype('str').values.tolist())
        intake_index = intake.index.tolist()
        #for i, idx_value in enumerate(intake_index):
        #    intake_to_save[i].insert(0, idx_value)
        intake_to_save.insert(0, intake.columns.values.tolist())
        
        condensed_to_save = copy(condensed.loc[:, :condensed.columns[-2]].astype('str').values.tolist())
        condensed_index = condensed.index.tolist()
        #for i, idx_value in enumerate(condensed_index):
        #    condensed_to_save[i].insert(0, idx_value)
        condensed_to_save.insert(0, condensed.loc[:, :condensed.columns[-2]].columns.values.tolist())
        
        all_data_to_save = copy(all_data.astype('str').values.tolist())
        all_data_index = all_data.index.tolist()
        #for i, idx_value in enumerate(all_data_index):
        #    all_data_to_save[i].insert(0, idx_value)
        all_data_to_save.insert(0, all_data.columns.values.tolist())

        all_participants_to_save = copy(all_participants.astype('str').values.tolist())
        all_participants_index = all_participants.index.tolist()
        #for i, idx_value in enumerate(all_participants_index):
        #    all_participants_to_save[i].insert(0, idx_value)
        all_participants_to_save.insert(0, all_participants.columns.values.tolist())

        try:
            master.add_worksheet('intake_list', rows=len(intake)+2, cols=len(intake.columns)+2)
        except:
            pass
        master.worksheet('intake_list').clear()
        master.worksheet('intake_list').update(range_name='A1', values=intake_to_save)   

        
        try:
            master.add_worksheet('condensed_responses', rows=len(condensed)+2, cols=len(condensed.columns)+2)
        except:
            pass
        master.worksheet('condensed_responses').clear()
        master.worksheet('condensed_responses').update(range_name='A1', values=condensed_to_save)   
        
        try:
            master.add_worksheet('all_data', rows=len(all_data)+2, cols=len(all_data.columns)+2)
        except:
            pass
        master.worksheet('all_data').clear()
        master.worksheet('all_data').update(range_name='A1', values=all_data_to_save)


        try:
            master.add_worksheet('all_participants', rows=len(all_participants)+2, cols=len(all_participants.columns)+2)
        except:
            pass
        master.worksheet('all_participants').clear()
        master.worksheet('all_participants').update(range_name='A1', values=all_participants_to_save)     

        print('spreadsheets updated')

In [88]:
save_data()

  master.worksheet('intake_list').update(range_name='A1', values=intake_to_save)
  master.worksheet('condensed_responses').update(range_name='A1', values=condensed_to_save)
  master.worksheet('all_data').update(range_name='A1', values=all_data_to_save)
  master.worksheet('all_participants').update(range_name='A1', values=all_participants_to_save)


spreadsheets updated
