# REU Student Application Assembly

The goal of this notebook is to assemble application materials into a package for reviewers.

In [None]:
import numpy as np
import pandas as pd

import os, sys
import glob
import datetime

from mailmerge import MailMerge
from shutil import copy2
from docx2pdf import convert
from PyPDF2 import PdfFileMerger, PdfFileReader, PdfFileWriter
from pikepdf import Pdf

%load_ext autoreload
%autoreload 1

### Update these values every year

In [None]:
# CSV with application info
df_app_orig = pd.read_csv('../REU_2021_final.csv')

# CSV with rec letter info
df_letter = pd.read_csv('../REU_Letters_final.csv')

# These were the indices to be dropped we determined by inspecting the applications
drop_list = [124, 59, 65, 116, 17]

### Define dataframes

In [None]:
# Define a full name column (first plus last) for use later
df_app_orig['full_name'] = df_app_orig['Q1']+df_app_orig['Q2']

# Remove unfinished applications, student who will graduate before program, and non-citizens
df_app_slice = df_app_orig[(df_app_orig['Finished']=='True') & (df_app_orig['Q5']=='Yes') &
            (df_app_orig['Q12']=='No')]

# Find the duplicates
df_duplicate = df_app_slice[df_app_slice.duplicated(subset = ['full_name'], keep=False)]

# Print duplicates to a csv file
df_duplicate.sort_values(by=['full_name']).to_csv('../duplicates.csv')

In [None]:
# Drop the indices from the drop list
df_app_slice.drop(drop_list, inplace=True)

In [None]:
# Replace NaNs (makes the application prettier)
df_app = df_app_slice.replace(np.nan, '', regex=True)

# Reset the indices
df_app.reset_index(inplace=True)

# Print a final list to put on Google docs
df_app.drop(df_app.columns[np.arange(19)], axis=1).to_csv('../final_list.csv')

### Make Student Directories and Create an Application docx for each student

In [None]:
dt_now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

f_error = f'../errors-{dt_now}.txt'
f_err = open(f_error, "a")

for i in df_app.index.tolist():
    try:
        # Format student name
        name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
        print(f'Starting {name}')

        # Define student directory
        student_dir = f"../student_folders/{df_app['Q2'][i]}_{df_app['Q1'][i]}"

        # Make the directory
        os.mkdir(student_dir)

        # MailMerge to create pretty application
        with MailMerge('../application_template.docx') as document:
            f_docx = f"../docx/{df_app['Q2'][i]}_{df_app['Q1'][i]}_application.docx"
            f_pdf = f"../docx/{df_app['Q2'][i]}_{df_app['Q1'][i]}_application.pdf"
            document.merge(Q34_2=str(df_app['Q34_2'][i]), Q25_3=str(df_app['Q25_3'][i]), 
                           Q26=str(df_app['Q26'][i]), Q231=str(df_app['Q23.1'][i]), 
                           Q30=str(df_app['Q30'][i]), Q31=str(df_app['Q31'][i]), 
                           Q38=str(df_app['Q38'][i]), Q34_3=str(df_app['Q34_3'][i]), 
                           Q63=str(df_app['Q63'][i]), Q2=str(df_app['Q2'][i]), 
                           Q36=str(df_app['Q36'][i]), Q40=str(df_app['Q40'][i]), 
                           Q33=str(df_app['Q33'][i]), Q25_1=str(df_app['Q25_1'][i]), 
                           Q11=str(df_app['Q11'][i]), Q37=str(df_app['Q37'][i]), 
                           Q24=str(df_app['Q24'][i]), Q19=str(df_app['Q19'][i]), 
                           Q27=str(df_app['Q27'][i]), Q34_5=str(df_app['Q34_5'][i]), 
                           Q28=str(df_app['Q28'][i]), Q23=str(df_app['Q23'][i]), 
                           Q29=str(df_app['Q29'][i]), Q39=str(df_app['Q39'][i]), 
                           Q34_4=str(df_app['Q34_4'][i]), Q32=str(df_app['Q32'][i]), 
                           Q22=str(df_app['Q22'][i]), Q21=str(df_app['Q21'][i]), 
                           Q6=str(df_app['Q6'][i]), Q1=str(df_app['Q1'][i]), 
                           Q25_2=str(df_app['Q25_2'][i]), Q43=str(df_app['Q43'][i]), 
                           Q34_1=str(df_app['Q34_1'][i]), Q7=str(df_app['Q7'][i]), 
                           Q20=str(df_app['Q20'][i]))

            # Write pretty application
            document.write(f_docx)


    except:
        print(f'Problem with {name}')
        f_err.write(f'Problem with {name}\n \n')
        error = sys.exc_info()
        print(error)
        f_err.write(f'{error} \n \n')
        pass
        
f_err.close()

### Convert Application docx files to pdf

In [None]:
convert('../docx', keep_active=True)

### Move Application PDFs to student folders

In [None]:
f_error = f'../errors-{dt_now}.txt'
f_err = open(f_error, "a")

for i in df_app.index.tolist():
    try:
        # Format student name
        name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
        print(f'Starting {name}')

        # Define student directory
        student_dir = f'../student_folders/{df_app.Q2[i]}_{df_app.Q1[i]}'

        # Copy PDF of application to student directory
        f_pdf = f'../docx/{df_app.Q2[i]}_{df_app.Q1[i]}_application.pdf'
        copy2(f_pdf, student_dir)

    except:
        print(f'Problem with {name}')
        f_err.write(f'Problem with {name}')
        error = sys.exc_info()
        print(error)
        f_err.write(f'{error} \n \n')
        pass
        
f_err.close()

### Copy transcripts to student folders

In [None]:
f_error = f'../errors-{dt_now}.txt'
f_err = open(f_error, "a")

for i in df_app.index.tolist():
    try:
        # Format student name
        name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
        print(f'Starting {name}')

        # Define student directory
        student_dir = f'../student_folders/{df_app.Q2[i]}_{df_app.Q1[i]}'

        # Copy transcripts to student dir
        id_num = df_app['ResponseId'][i]
        id_name = f'../Q45/*{id_num}*'
        transcripts = glob.glob(id_name)
        for transcript in transcripts:
            tran_name = os.path.basename(transcript)
            tran_rename = f'zzzz_{tran_name}'
            copy2(transcript, f'{student_dir}/{tran_rename}')

    except:
        print(f'Problem with {name}')
        f_err.write(f'Problem with {name}')
        error = sys.exc_info()
        print(error)
        f_err.write(f'{error} \n \n')
        pass
        
f_err.close()

### Create sheets with recommendation info

In [None]:
# Replace NaNs
df_letter = df_letter.replace(np.nan, '', regex=True)

In [None]:
for i in df_app.index.tolist():
    # Format student name
    name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
    print(f'Starting {name}')

    # Define student directory
    student_dir = f'../student_folders/{df_app.Q2[i]}_{df_app.Q1[i]}'

    # Make Rec Sheets from template
    df_name = df_letter[df_letter['Q13'] == name]

    n = 1
    for ind in df_name.index.tolist():
        # MailMerge to create pretty rec sheets
        with MailMerge('../rec_template.docx') as document:
            f_docx = f"../docx/rec/{df_app['Q2'][i]}_{df_app['Q1'][i]}_rec_{n}.docx"
            f_pdf = f"../docx/rec/{df_app['Q2'][i]}_{df_app['Q1'][i]}_rec_{n}.pdf"
            document.merge(Q1=str(df_letter['Q1'][ind]), Q2=str(df_letter['Q2'][ind]), 
                           Q3=str(df_letter['Q3'][ind]), Q12=str(df_letter['Q12'][ind]), 
                           Q13=str(df_letter['Q13'][ind]), Q14=str(df_letter['Q14'][ind]))

            # Write pretty application
            document.write(f_docx)

            n += 1

### Convert rec sheets from docx to pdf

In [None]:
convert('../docx/rec', keep_active=True)

### Move rec sheets to student folders

In [None]:
# Copy rec sheets to student directories

f_error = f'../errors-{dt_now}.txt'
f_err = open(f_error, "a")

for i in df_app.index.tolist():
    try:
        # Format student name
        name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
        print(f'Starting {name}')

        # Define student directory
        student_dir = f'../student_folders/{df_app.Q2[i]}_{df_app.Q1[i]}'

        # Copy PDF of rec sheets to student directory
        for file in glob.glob(f'../docx/rec/{df_app.Q2[i]}_{df_app.Q1[i]}_rec_*.pdf'):
            rec_sheet = os.path.basename(file)
            rec_sheet_rename = f'zz_{rec_sheet}'
            copy2(file, f'{student_dir}/{rec_sheet_rename}')

    except:
        print(f'Problem with {name}')
        f_err.write(f'Problem with {name}')
        error = sys.exc_info()
        print(error)
        f_err.write(f'{error} \n \n')
        pass
        
f_err.close()

### Move recommendation letters to student folders

In [None]:
f_error = f'../errors-{dt_now}.txt'
f_err = open(f_error, "a")

for i in df_app.index.tolist():
    try:
        # Format student name
        name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
        print(f'Starting {name}')

        # Define student directory
        student_dir = f'../student_folders/{df_app.Q2[i]}_{df_app.Q1[i]}'

        # Copy rec letters to student dir
        df_name = df_letter[df_letter['Q13'] == name]
        ids = df_name['ResponseId'].tolist()
        if len(ids) > 0:
            for id in ids:
                id_name = f'../Q15/*{id}*'
                rec_letter = glob.glob(id_name)[0]
                rec_name = os.path.basename(rec_letter)
                rec_letter_rename = f'zzz_{rec_name}'
                copy2(rec_letter, f'{student_dir}/{rec_letter_rename}')

    except:
        print(f'Problem with {name}')
        f_err.write(f'Problem with {name}')
        error = sys.exc_info()
        print(error)
        f_err.write(f'{error} \n \n')
        pass
        
f_err.close()

### Save a merged PDF

In [None]:
f_error = f'../errors-{dt_now}.txt'
f_err = open(f_error, "a")

for i in df_app.index.tolist():
    try:
        # Format student name
        name = f"{df_app['Q1'][i]} {df_app['Q2'][i]}"
        print(f'Starting {name}')

        # Define student directory
        student_dir = f'../student_folders/{df_app.Q2[i]}_{df_app.Q1[i]}'

        # Merge pdfs
        pdfs_unsorted = glob.glob(f'{student_dir}/*.pdf')
        pdfs_unsorted_2 = glob.glob(f'{student_dir}/*.PDF')
        pdfs = sorted(pdfs_unsorted+pdfs_unsorted_2)

        if len(pdfs) > 0:
            out = PdfFileWriter()
            merger = PdfFileMerger(strict=False)
            for pdf in pdfs:
                pdf_file = PdfFileReader(pdf)
                if pdf_file.isEncrypted:
                    # This block creates a dummy decrypted file and adds it to the merged pdf
                    with Pdf.open(pdf) as pdffile:
                        pdffile.save("myfile_decrypted.pdf")
                    merger.append("myfile_decrypted.pdf", import_bookmarks=False)
                else:            
                    merger.append(pdf, import_bookmarks=False)
            merger.write(f'{student_dir}/{df_app.Q2[i]}_{df_app.Q1[i]}_merged.pdf')
            merger.close()

    except:
        print(f'Problem with {name}')
        f_err.write(f'Problem with {name}')
        error = sys.exc_info()
        print(error)
        f_err.write(f'{error} \n \n')
        pass
        
f_err.close()